├── .gitignore ├── lean-toolchain ├── .github ├── dependabot.yml └── workflows │ ├── build.yml │ ├── docs.yml │ └── update-toolchain.yml ├── .vscode └── settings.json ├── test ├── issue-39.lean └── issue-35.lean ├── Parser ├── RegEx.lean ├── Char.lean ├── Prelude.lean ├── Char │ ├── Numeric.lean │ ├── Basic.lean │ └── Unicode.lean ├── RegEx │ ├── Basic.lean │ └── Compile.lean ├── Stream.lean ├── Error.lean ├── Parser.lean └── Basic.lean ├── docs └── lakefile.toml ├── Parser.lean ├── lakefile.toml ├── test.lean ├── lake-manifest.json ├── README.md ├── examples ├── Roman.lean ├── JSON.lean └── BNF.lean └── LICENSE /.gitignore: -------------------------------------------------------------------------------- 1 | /.lake 2 | *~ 3 | -------------------------------------------------------------------------------- /lean-toolchain: -------------------------------------------------------------------------------- 1 | leanprover/lean4:v4.27.0-rc1 2 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "daily" 7 | commit-message: 8 | prefix: "chore" 9 | 10 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "editor.acceptSuggestionOnEnter": "off", 3 | "editor.tabSize": 2, 4 | "editor.rulers": [100], 5 | "files.insertFinalNewline": true, 6 | "files.trimTrailingWhitespace": true, 7 | } 8 | -------------------------------------------------------------------------------- /test/issue-39.lean: -------------------------------------------------------------------------------- 1 | import Parser 2 | 3 | open Parser 4 | 5 | def test := 6 | match (endOfInput : TrivialParser Substring Char Unit).run "abcd" with 7 | | .ok _ _ => false 8 | | .error _ _ => true 9 | 10 | #guard test 11 | -------------------------------------------------------------------------------- /Parser/RegEx.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022-2023 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser.RegEx.Basic 7 | import Parser.RegEx.Compile 8 | -------------------------------------------------------------------------------- /docs/lakefile.toml: -------------------------------------------------------------------------------- 1 | name = "docs" 2 | reservoir = false 3 | packagesDir = "../.lake/packages" 4 | buildDir = "." 5 | 6 | [[require]] 7 | scope = "leanprover" 8 | name = "doc-gen4" 9 | rev = "main" 10 | 11 | [[require]] 12 | name = "Parser" 13 | path = ".." 14 | -------------------------------------------------------------------------------- /test/issue-35.lean: -------------------------------------------------------------------------------- 1 | import Parser.Char 2 | 3 | open Parser 4 | 5 | def test : Bool := 6 | match Parser.run (Char.string "abc" : SimpleParser Substring Char String) "abc" with 7 | | .ok s r => s == "" && r == "abc" 8 | | .error _ _ => false 9 | 10 | #guard test 11 | -------------------------------------------------------------------------------- /Parser/Char.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022-2023 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser.Char.Basic 7 | import Parser.Char.Numeric 8 | import Parser.Char.Unicode 9 | -------------------------------------------------------------------------------- /Parser/Prelude.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Batteries 7 | import UnicodeBasic 8 | 9 | instance : Std.Stream String.Slice Char where 10 | next? s := s.front? >>= fun c => return (c, s.drop 1) 11 | -------------------------------------------------------------------------------- /Parser.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser.Basic 7 | import Parser.Char 8 | import Parser.Error 9 | import Parser.Parser 10 | import Parser.Prelude 11 | import Parser.RegEx 12 | import Parser.Stream 13 | -------------------------------------------------------------------------------- /lakefile.toml: -------------------------------------------------------------------------------- 1 | name = "Parser" 2 | defaultTargets = ["Parser"] 3 | 4 | [[require]] 5 | name = "batteries" 6 | git = "https://github.com/leanprover-community/batteries" 7 | rev = "main" 8 | 9 | [[require]] 10 | name = "UnicodeBasic" 11 | git = "https://github.com/fgdorais/lean4-unicode-basic" 12 | rev = "main" 13 | 14 | [[lean_lib]] 15 | name = "Parser" 16 | 17 | [[lean_exe]] 18 | name = "test" 19 | -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: 4 | workflow_dispatch: 5 | pull_request: 6 | branches: ["main"] 7 | push: 8 | branches: ["main", "stable"] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | 14 | steps: 15 | - uses: actions/checkout@v6 16 | - uses: leanprover/lean-action@v1 17 | with: 18 | test: false 19 | use-mathlib-cache: false 20 | -------------------------------------------------------------------------------- /test.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2024 Kim Morrison. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | Authors: Kim Morrison, François G. Dorais 5 | -/ 6 | open IO.Process 7 | 8 | /-- 9 | Run tests, via the Batteries test runner. 10 | 11 | When https://github.com/leanprover/lean4/issues/4121 12 | "allow using an upstream executable as a lake `@[test_runner]`" 13 | is resolved, this file can be replaced with a line in `lakefile.lean`. 14 | -/ 15 | def main (args : List String) : IO Unit := do 16 | let exitcode ← (← spawn { cmd := "lake", args := #["exe", "batteries/test"] ++ args }).wait 17 | exit exitcode.toUInt8 18 | -------------------------------------------------------------------------------- /lake-manifest.json: -------------------------------------------------------------------------------- 1 | {"version": "1.1.0", 2 | "packagesDir": ".lake/packages", 3 | "packages": 4 | [{"url": "https://github.com/fgdorais/lean4-unicode-basic", 5 | "type": "git", 6 | "subDir": null, 7 | "scope": "", 8 | "rev": "256e9993b028465b3e183f6da5f2ab02b7236725", 9 | "name": "UnicodeBasic", 10 | "manifestFile": "lake-manifest.json", 11 | "inputRev": "main", 12 | "inherited": false, 13 | "configFile": "lakefile.lean"}, 14 | {"url": "https://github.com/leanprover-community/batteries", 15 | "type": "git", 16 | "subDir": null, 17 | "scope": "", 18 | "rev": "6254bed25866358ce4f841fa5a13b77de04ffbc8", 19 | "name": "batteries", 20 | "manifestFile": "lake-manifest.json", 21 | "inputRev": "main", 22 | "inherited": false, 23 | "configFile": "lakefile.toml"}], 24 | "name": "Parser", 25 | "lakeDir": ".lake"} 26 | -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | name: Publish Docs 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | publish-docs: 8 | runs-on: ubuntu-latest 9 | permissions: 10 | contents: write 11 | steps: 12 | 13 | - name: Checkout 14 | uses: actions/checkout@v6 15 | 16 | - name: Install Lean 17 | uses: leanprover/lean-action@v1 18 | with: 19 | test: false 20 | lint: false 21 | use-github-cache: true 22 | 23 | - name: Build Docs 24 | working-directory: docs 25 | run: | 26 | lake build Parser:docs 27 | 28 | - name: Publish Docs 29 | run: | 30 | git config user.name 'Parser Bot' 31 | git config user.email 'parser-bot@users.noreply.github.com' 32 | git checkout -b docs 33 | git add docs/doc docs/doc-data 34 | git commit -m 'chore: generate docs' 35 | git push origin docs --force 36 | -------------------------------------------------------------------------------- /.github/workflows/update-toolchain.yml: -------------------------------------------------------------------------------- 1 | name: Update Toolchain 2 | 3 | on: 4 | workflow_dispatch: 5 | schedule: 6 | - cron: '0 3 * * *' 7 | 8 | jobs: 9 | update-toolchain: 10 | name: update toolchain 11 | runs-on: ubuntu-latest 12 | steps: 13 | 14 | - name: checkout 15 | uses: actions/checkout@v6 16 | 17 | - name: install jq 18 | uses: dcarbone/install-jq-action@v3 19 | 20 | - name: get release tag 21 | id: get-latest-release 22 | run: | 23 | LEAN_TAG=$(curl -sSf "https://api.github.com/repos/leanprover/lean4/releases" | jq -r '.[0].tag_name') 24 | echo "LEAN_TAG=$LEAN_TAG" >> $GITHUB_ENV 25 | 26 | - name: update lean-toolchain 27 | id: update-toolchain 28 | run: | 29 | if [ $(curl -sSf "https://raw.githubusercontent.com/leanprover-community/batteries/main/lean-toolchain") = "leanprover/lean4:$LEAN_TAG" ] && [ $(curl -sSf "https://raw.githubusercontent.com/fgdorais/lean4-unicode-basic/main/lean-toolchain") = "leanprover/lean4:$LEAN_TAG" ] ; 30 | then 31 | echo "leanprover/lean4:$LEAN_TAG" > lean-toolchain ; 32 | fi 33 | 34 | - name: create pull request 35 | uses: peter-evans/create-pull-request@v8 36 | with: 37 | commit-message: "chore: update toolchain ${{ env.LEAN_TAG }}" 38 | committer: GitHub 39 | author: ${{ github.actor }} <${{ github.actor }}@users.noreply.github.com> 40 | signoff: false 41 | branch: update-toolchain-${{ env.LEAN_TAG }} 42 | delete-branch: true 43 | title: 'chore: update toolchain ${{ env.LEAN_TAG }}' 44 | draft: false 45 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Lean 4 / Parser 2 | 3 | A parser combinator library for [Lean 4](https://leanprover.github.io/). 4 | 5 | Source documentation is available at [www.dorais.org/lean4-parser/doc/](https://www.dorais.org/lean4-parser/doc). 6 | 7 | ## Usage 8 | 9 | Add this dependency to your project's `lakefile.toml`: 10 | 11 | ```toml 12 | [[require]] 13 | name = "Parser" 14 | git = "https://github.com/fgdorais/lean4-parser" 15 | rev = "main" 16 | ``` 17 | Then add `import Parser` at the top of any Lean file where you plan to use this library. 18 | For example: 19 | ```lean 20 | import Parser 21 | 22 | open Parser Char 23 | 24 | /-- 25 | Parses a list of sign-separated integers (no spaces) from an input string and returns the sum. 26 | -/ 27 | def parseSum : SimpleParser Substring Char Int := do 28 | let mut sum : Int := 0 29 | -- parse until all input is consumed 30 | while ! (← test endOfInput) do 31 | -- parse an integer (decimal only) and add to sum 32 | sum := sum + (← ASCII.parseInt) 33 | return sum 34 | 35 | -- returns 42 36 | #eval match parseSum.run "11-1+2-3+33" with 37 | | .ok _ sum => sum 38 | | .error _ e => panic! (toString e) 39 | ``` 40 | 41 | The `examples` directory contains more elaborate sample parsers. 42 | 43 | ## Acknowledgements 44 | 45 | Original work for the Lean 4 Parser library was done by [François G. Dorais](https://github.com/fgdorais), [Kyrill Serdyuk](https://github.com/kyserd), and [Emma Shroyer](https://github.com/emma-shroyer). 46 | This work was partly supported by the CEMS REU program at The University of Vermont. 47 | 48 | ----- 49 | 50 | * The Parser library is copyright © 2022-2025 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. The library is released under the [Apache 2.0 license](http://www.apache.org/licenses/LICENSE-2.0). See the file LICENSE for additional details. 51 | 52 | -------------------------------------------------------------------------------- /Parser/Char/Numeric.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022-2023 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser.Basic 7 | import Parser.Char.Basic 8 | 9 | namespace Parser.Char.ASCII 10 | variable {ε σ m} [Parser.Stream σ Char] [Parser.Error ε σ Char] [Monad m] 11 | 12 | @[inline] 13 | private def decNum (n : Nat := 0) : ParserT ε σ Char m (Nat × Nat) := 14 | foldl (fun (r : Nat × Nat) (d : Fin 10) => (10 * r.1 + d, r.2+1)) (n,0) ASCII.digit 15 | 16 | @[inline] 17 | private def binNum (n : Nat := 0) : ParserT ε σ Char m (Nat × Nat) := 18 | foldl (fun (r : Nat × Nat) (d : Fin 2) => (r.1 <<< 1 + d, r.2+1)) (n,0) ASCII.binDigit 19 | 20 | @[inline] 21 | private def octNum (n : Nat := 0) : ParserT ε σ Char m (Nat × Nat) := 22 | foldl (fun (r : Nat × Nat) (d : Fin 8) => (r.1 <<< 3 + d, r.2+1)) (n,0) ASCII.octDigit 23 | 24 | @[inline] 25 | private def hexNum (n : Nat := 0) : ParserT ε σ Char m (Nat × Nat) := 26 | foldl (fun (r : Nat × Nat) (d : Fin 16) => (r.1 <<< 4 + d, r.2+1)) (n,0) ASCII.hexDigit 27 | 28 | /-- Parse a `Nat` -/ 29 | def parseNat (decimalOnly := true) : ParserT ε σ Char m Nat := do 30 | match ← ASCII.digit with 31 | | ⟨0, _⟩ => 32 | if decimalOnly then 33 | Prod.fst <$> ASCII.decNum 34 | else 35 | first [ 36 | char 'b' *> binNum, 37 | char 'x' *> hexNum, 38 | octNum, 39 | return 0] 40 | | ⟨n, _⟩ => Prod.fst <$> ASCII.decNum n 41 | where 42 | binNum := do 43 | let ⟨n, _⟩ ← ASCII.binDigit 44 | Prod.fst <$> ASCII.binNum n 45 | octNum := do 46 | let ⟨n, _⟩ ← ASCII.octDigit 47 | Prod.fst <$> ASCII.octNum n 48 | hexNum := do 49 | let ⟨n, _⟩ ← ASCII.hexDigit 50 | Prod.fst <$> ASCII.hexNum n 51 | 52 | /-- Parse an `Int` -/ 53 | def parseInt (decimalOnly := true) : ParserT ε σ Char m Int := do 54 | match ← option? (char '+' <|> char '-') with 55 | | some '-' => Int.negOfNat <$> parseNat decimalOnly 56 | | _ => Int.ofNat <$> parseNat decimalOnly 57 | 58 | /-- Parse scientific notation -/ 59 | @[inline] 60 | def parseScientific (α) [OfScientific α] : ParserT ε σ Char m α := do 61 | let (man, pre) ← ASCII.decNum 62 | let (man, aft) ← (char '.' *> ASCII.decNum man) <|> pure (man, 0) 63 | if pre + aft = 0 then throwUnexpected 64 | let exp : Int ← ((char 'E' <|> char 'e') *> parseInt) <|> pure 0 65 | return OfScientific.ofScientific man (exp < aft) (exp - aft).natAbs 66 | 67 | /-- Parse a `Float` -/ 68 | def parseFloat : ParserT ε σ Char m Float := do 69 | match ← option? (char '+' <|> char '-') with 70 | | some '-' => Float.neg <$> parseScientific Float 71 | | _ => parseScientific Float 72 | 73 | end Parser.Char.ASCII 74 | -------------------------------------------------------------------------------- /examples/Roman.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022-2025 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser 7 | 8 | /-! # Roman Numeral Parser 9 | 10 | Roman numerals are composed from seven basic tokens: 11 | 12 | I = 1, V = 5, X = 10, L = 50, C = 100, D = 500, M = 1000. 13 | 14 | These are combined using additive and subtractive notation to form decimal places: 15 | 16 | * units: I = 1, II = 2, III = 3, IV = 4, V = 5, VI = 6, VII = 7, VIII = 8, IX = 9. 17 | * tens: X = 10, XX = 20, XXX = 30, XL = 40, L = 50, LX = 60, LXX = 70, LXXX = 80, XD = 90. 18 | * hundreds: C = 100, CC = 200, CCC = 300, CD = 400, D = 500, DC = 600, DCC = 700, DCCC = 800, 19 | CM = 900. 20 | * thousands: M = 1000, MM = 2000, MMM = 3000. 21 | 22 | These decimal places are then concatenated from highest to lowest in order to form any integer from 23 | 1 to 3999. For example, MCMLXXXVII = M + CM + LXXX + VII = 1987. 24 | -/ 25 | 26 | namespace Roman 27 | 28 | open Parser Char 29 | 30 | /-- Roman parser monad -/ 31 | protected abbrev Parser := Parser Unit String.Slice Char 32 | 33 | /-- Parse a roman numeral (uppercase) -/ 34 | protected def parse : Roman.Parser Nat := 35 | stepM >>= stepC >>= stepX >>= stepI 36 | 37 | where 38 | 39 | /-- Parse thousands (up to 3000) -/ 40 | stepM : Roman.Parser Nat := 41 | -- 0, M = 1000, MM = 2000, MMM = 3000 42 | (1000 * .) <$> countUpTo 3 (char 'M') 43 | 44 | /-- Parse hundreds and add to `n` -/ 45 | stepC (n : Nat) : Roman.Parser Nat := 46 | first [ 47 | -- CM = 900 48 | char 'C' *> char 'M' *> pure (n + 900), 49 | -- D = 500, DC = 600, DCC = 700, DCCC = 800 50 | char 'D' *> (n + 500 + 100 * .) <$> countUpTo 3 (char 'C'), 51 | -- CD = 400 52 | char 'C' *> char 'D' *> pure (n + 400), 53 | -- 0, C = 100, CC = 200, CCC = 300 54 | (n + 100 * .) <$> countUpTo 3 (char 'C')] 55 | 56 | /-- Parse tens and add to `n` -/ 57 | stepX (n : Nat) : Roman.Parser Nat := 58 | first [ 59 | -- XC = 90 60 | char 'X' *> char 'C' *> pure (n + 90), 61 | -- L = 50, LX = 60, LXX = 70, LXXX = 80 62 | char 'L' *> (n + 50 + 10 * .) <$> countUpTo 3 (char 'X'), 63 | -- XL = 40 64 | char 'X' *> char 'L' *> pure (n + 40), 65 | -- 0, X = 10, XX = 20, XXX = 30 66 | (n + 10 * .) <$> countUpTo 3 (char 'X')] 67 | 68 | /-- Parse units and add to `n` -/ 69 | stepI (n : Nat) : Roman.Parser Nat := 70 | first [ 71 | -- IX = 9 72 | char 'I' *> char 'X' *> pure (n + 9), 73 | -- V = 5, VI = 6, VII = 7, VIII = 80 74 | char 'V' *> (n + 5 + .) <$> countUpTo 3 (char 'I'), 75 | -- IV = 4 76 | char 'I' *> char 'V' *> pure (n + 4), 77 | -- 0, I = 1, II = 2, III = 3 78 | (n + .) <$> countUpTo 3 (char 'I')] 79 | 80 | end Roman 81 | 82 | /-- Interpret the string as a roman numeral -/ 83 | def String.toNatRoman? (s : String) (upper : Bool := true) : Option Nat := 84 | let s := if upper then s else s.map .toUpper 85 | match Parser.run (Roman.parse <* Parser.endOfInput) s.toSlice with 86 | | .ok _ (n+1) => some (n+1) 87 | | _ => none 88 | 89 | @[inline, inherit_doc String.toNatRoman?] 90 | def String.toNatRoman! (s : String) (upper : Bool := true) : Nat := 91 | s.toNatRoman? upper |>.get! 92 | -------------------------------------------------------------------------------- /Parser/RegEx/Basic.lean: -------------------------------------------------------------------------------- 1 | 2 | /- 3 | Copyright © 2022-2023 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 4 | Released under Apache 2.0 license as described in the file LICENSE. 5 | -/ 6 | 7 | import Parser.Basic 8 | 9 | namespace Parser 10 | 11 | /-- Type of regular expressions -/ 12 | inductive RegEx : Type _ → Type _ 13 | /-- Character set -/ 14 | | set : (α → Bool) → RegEx α 15 | /-- Alternation -/ 16 | | alt : RegEx α → RegEx α → RegEx α 17 | /-- Concatenation -/ 18 | | cat : RegEx α → RegEx α → RegEx α 19 | /-- Unbounded repetition -/ 20 | | repMany : RegEx α → RegEx α 21 | /-- Bounded repetition -/ 22 | | repUpTo : Nat → RegEx α → RegEx α 23 | /-- Grouping -/ 24 | | group : RegEx α → RegEx α 25 | 26 | namespace RegEx 27 | 28 | /-- Grouping depth -/ 29 | def depth : RegEx α → Nat -- TODO: make computed field 30 | | .set _ => 0 31 | | .alt e₁ e₂ => depth e₁ + depth e₂ 32 | | .cat e₁ e₂ => depth e₁ + depth e₂ 33 | | .repMany e => depth e 34 | | .repUpTo _ e => depth e 35 | | .group e => depth e + 1 36 | 37 | /-- Empty character set -/ 38 | def fail : RegEx α := set fun _ => false 39 | 40 | instance (α) : Inhabited (RegEx α) := ⟨fail⟩ 41 | 42 | /-- Universal character set -/ 43 | def any : RegEx α := set fun _ => true 44 | 45 | /-- Nil expression -/ 46 | def nil : RegEx α := repUpTo 0 default 47 | 48 | /-- Optional -/ 49 | def opt (e : RegEx α) := repUpTo 1 e 50 | 51 | /-- Repetition -/ 52 | def rep (n : Nat) (e : RegEx α) := 53 | match n with 54 | | 0 => repUpTo 0 e 55 | | 1 => e 56 | | n+1 => cat e (rep n e) 57 | 58 | /-- Unbounded repetition, at least once -/ 59 | def repMany1 (e : RegEx α) := cat e (repMany e) 60 | 61 | /-- Unbounded repetition, at least `n` times -/ 62 | def repManyN (n : Nat) (e : RegEx α) := 63 | match n with 64 | | 0 => repMany e 65 | | n+1 => cat e (repManyN n e) 66 | 67 | section 68 | variable {ε σ α β} [Parser.Stream σ α] [Parser.Error ε σ α] {m} [Monad m] 69 | 70 | /-- Fold over a regex match from the right -/ 71 | protected partial def foldr (f : α → β → β) : RegEx α → ParserT ε σ α m β → ParserT ε σ α m β 72 | | .set s, k => tokenFilter s >>= fun x => f x <$> k 73 | | .alt e₁ e₂, k => RegEx.foldr f e₁ k <|> RegEx.foldr f e₂ k 74 | | .cat e₁ e₂, k => RegEx.foldr f e₁ (RegEx.foldr f e₂ k) 75 | | .repUpTo 0 _, k => k 76 | | .repUpTo (n+1) e, k => RegEx.foldr f e (RegEx.foldr f (.repUpTo n e) k) <|> k 77 | | .repMany e, k => RegEx.foldr f e (RegEx.foldr f (.repMany e) k) <|> k 78 | | .group e, k => RegEx.foldr f e k 79 | 80 | /-- `take re` parses tokens matching regex `re` returning the list of tokens, otherwise fails -/ 81 | protected def take (re : RegEx α) : ParserT ε σ α m (List α) := 82 | re.foldr (.::.) (pure []) 83 | 84 | /-- `drop re` parses tokens matching regex `re`, otherwise fails -/ 85 | protected def drop (re : RegEx α) : ParserT ε σ α m Unit := 86 | re.foldr (fun _ => id) (pure ()) 87 | 88 | /-- `count re` parses tokens matching regex `re` returning the number of tokens, otherwise fails -/ 89 | protected def count (re : RegEx α) : ParserT ε σ α m Nat := 90 | re.foldr (fun _ => Nat.succ) (pure 0) 91 | 92 | /-- Parses tokens matching regex `re` returning all the matching group segments, otherwise fails -/ 93 | protected partial def «match» (re : RegEx α) : ParserT ε σ α m (Array (Option (Stream.Segment σ))) := do 94 | loop re 0 (Array.replicate re.depth none) 95 | where 96 | loop : RegEx α → Nat → Array (Option (Stream.Segment σ)) → ParserT ε σ α m (Array (Option (Stream.Segment σ))) 97 | | .set s, _, ms => tokenFilter s *> return ms 98 | | .alt e₁ e₂, lvl, ms => loop e₁ lvl ms <|> loop e₂ (lvl + e₁.depth) ms 99 | | .cat e₁ e₂, lvl, ms => loop e₁ lvl ms >>= loop e₂ (lvl + e₁.depth) 100 | | .repUpTo 0 _, _, ms => return ms 101 | | .repUpTo (n+1) e, lvl, ms => loop e lvl ms >>= loop (.repUpTo n e) lvl <|> return ms 102 | | .repMany e, lvl, ms => loop e lvl ms >>= loop (.repMany e) lvl <|> return ms 103 | | .group e, lvl, ms => do 104 | let mut ms := ms 105 | for i in [lvl:ms.size] do ms := ms.set! i none 106 | let start ← Parser.getPosition 107 | ms ← loop e (lvl+1) ms 108 | let stop ← Parser.getPosition 109 | return ms.set! lvl (some (start, stop)) 110 | 111 | end 112 | -------------------------------------------------------------------------------- /Parser/Stream.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022-2025 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser.Prelude 7 | 8 | /-! # Parser Stream 9 | 10 | Parsers read input tokens from a stream. To help with error reporting and backtracking, the 11 | `Parser.Stream` class extends the basic `Stream` class with functionality to save and restore 12 | stream positions. 13 | 14 | The simple way to implement backtracking after a parsing error is to first save the stream state 15 | before parsing and, upon encountering an error, restore the saved stream state. The issue with this 16 | strategy is that each backtrack point adds a reference to the entire stream state. This prevents 17 | linear use of the stream state. The `Parser.Stream` class allows users to work around this issue. 18 | The `Parser.Stream.Position` type is intended to store just enough information to *reconstruct* the 19 | stream state at a save point without having to save the entire stream state. 20 | -/ 21 | 22 | /-- *Parser stream class* 23 | 24 | This class extends the basic `Stream` class with position features needed by parsers for 25 | backtracking and error reporting. 26 | 27 | * The type `Position` is used to record position data for the stream type. 28 | * `getPosition (s : σ) : Position` returns the current position of stream `s`. 29 | * `setPosition (s : σ) (p : Position) : σ` restores stream `s` to position `p`. 30 | 31 | Implementations should try to make the `Position` type as lightweight as possible for `getPosition` 32 | and `setPosition` to work properly. Often `Position` is just a scalar type or another simple type. 33 | This may allow for parsers to use the stream state more efficiently. 34 | -/ 35 | protected class Parser.Stream (σ : Type _) (τ : outParam (Type _)) extends Std.Stream σ τ where 36 | Position : Type _ 37 | getPosition : σ → Position 38 | setPosition : σ → Position → σ 39 | attribute [reducible, inherit_doc Parser.Stream] Parser.Stream.Position 40 | attribute [inherit_doc Parser.Stream] Parser.Stream.getPosition Parser.Stream.setPosition 41 | 42 | namespace Parser.Stream 43 | 44 | /-- Stream segment type. -/ 45 | def Segment (σ) [Parser.Stream σ τ] := Stream.Position σ × Stream.Position σ 46 | 47 | /-- Start position of stream segment. -/ 48 | abbrev Segment.start [Parser.Stream σ τ] (s : Segment σ) := s.1 49 | 50 | /-- Stop position of stream segment. -/ 51 | abbrev Segment.stop [Parser.Stream σ τ] (s : Segment σ) := s.2 52 | 53 | /-- Default wrapper to make a `Parser.Stream` from a plain `Stream`. 54 | 55 | This wrapper uses the entire stream state as position information; this is not efficient. Always 56 | prefer tailored `Parser.Stream` instances to this default. 57 | -/ 58 | @[nolint unusedArguments] 59 | def mkDefault (σ τ) [Std.Stream σ τ] := σ 60 | 61 | @[reducible] 62 | instance (σ τ) [self : Std.Stream σ τ] : Parser.Stream (mkDefault σ τ) τ where 63 | toStream := self 64 | Position := σ 65 | getPosition s := s 66 | setPosition _ p := p 67 | 68 | @[reducible] 69 | instance : Parser.Stream String.Slice Char where 70 | Position := String.Slice 71 | getPosition s := s 72 | setPosition _ s := s 73 | 74 | @[reducible] 75 | instance : Parser.Stream Substring.Raw Char where 76 | Position := String.Pos.Raw 77 | getPosition s := s.startPos 78 | setPosition s p := 79 | if p ≤ s.stopPos then 80 | { s with startPos := p } 81 | else 82 | { s with startPos := s.stopPos } 83 | 84 | @[reducible] 85 | instance (τ) : Parser.Stream (Subarray τ) τ where 86 | Position := Nat 87 | getPosition s := s.start 88 | setPosition s p := 89 | if h : p ≤ s.stop then 90 | ⟨{ s.internalRepresentation with start := p, start_le_stop := h }⟩ 91 | else 92 | ⟨{ s.internalRepresentation with start := s.stop, start_le_stop := Nat.le_refl s.stop }⟩ 93 | 94 | @[reducible] 95 | instance : Parser.Stream ByteSlice UInt8 where 96 | Position := Nat 97 | getPosition s := s.start 98 | setPosition s p := s.slice p 99 | 100 | /-- `OfList` is a view of a list stream that keeps track of consumed tokens. -/ 101 | structure OfList (τ : Type _) where 102 | /-- Remaining tokens. -/ 103 | next : List τ 104 | /-- Consumed tokens. -/ 105 | past : List τ := [] 106 | 107 | /-- Restore a list stream to a given position. -/ 108 | def OfList.setPosition {τ} (s : OfList τ) (p : Nat) : OfList τ := 109 | if s.past.length < p then 110 | fwd (p - s.past.length) s 111 | else 112 | rev (s.past.length - p) s 113 | where 114 | /-- Internal for `OfList.setPosition`. -/ 115 | fwd : Nat → OfList τ → OfList τ 116 | | k+1, ⟨x :: rest, past⟩ => fwd k ⟨rest, x :: past⟩ 117 | | _, s => s 118 | /-- Internal for `OfList.setPosition`. -/ 119 | rev : Nat → OfList τ → OfList τ 120 | | k+1, ⟨rest, x :: past⟩ => rev k ⟨x :: rest, past⟩ 121 | | _, s => s 122 | 123 | /-- Make a `Parser.Stream` from a `List`. -/ 124 | def mkOfList {τ} (data : List τ) (pos : Nat := 0) : OfList τ := 125 | OfList.setPosition { next := data } pos 126 | 127 | @[reducible] 128 | instance (τ) : Parser.Stream (OfList τ) τ where 129 | Position := Nat 130 | getPosition s := s.past.length 131 | setPosition := OfList.setPosition 132 | next? s := 133 | match s with 134 | | ⟨x :: rest, past⟩ => some (x, ⟨rest, x :: past⟩) 135 | | _ => none 136 | 137 | end Parser.Stream 138 | -------------------------------------------------------------------------------- /Parser/Error.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022-2025 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser.Prelude 7 | import Parser.Stream 8 | 9 | /-! # Parser Error 10 | 11 | The class `Parser.Error` is used throughout the library for the purpose of reporting parser errors. 12 | Users are encouraged to provide their own instances tailored to their applications. 13 | 14 | Three general purpose instances are provided: 15 | 16 | * `Parser.Error.Simple` records all parsing error information, without processing. 17 | * `Parser.Error.Basic` just records the location of the primary parsing error. 18 | * `Parser.Error.Trivial` discards all parsing error information. 19 | 20 | These are intended for use in parser development and as building blocks (or inspiration) for 21 | tailored instances. 22 | -/ 23 | 24 | /-- *Parser error class* 25 | 26 | This class declares an error type for a given parser stream. 27 | 28 | Given `Parser.Stream σ τ`, `Parser.Error ε σ τ` provides two basic mechanisms for reporting parsing 29 | errors: 30 | 31 | * `unexpected (p : Stream.Position σ) (t : Option τ) : ε` 32 | is used to report an unexpected input at a given position, optionally with the offending token. 33 | * `addMessage (e : ε) (p : Stream.Position σ) (info : String)` 34 | is used to add additional error information at a given position. 35 | 36 | This class can be extended to provide additional error reporting and processing functonality, but 37 | only these two mechanisms are used within the library. 38 | -/ 39 | protected class Parser.Error (ε σ : Type _) (τ : outParam (Type _)) [Parser.Stream σ τ] where 40 | unexpected : Stream.Position σ → Option τ → ε 41 | addMessage : ε → Stream.Position σ → String → ε 42 | attribute [inherit_doc Parser.Error] Parser.Error.unexpected Parser.Error.addMessage 43 | 44 | namespace Parser.Error 45 | 46 | /-- *Trivial error type* 47 | 48 | This error type simply discards all error information. This is useful for parsers that cannot fail, 49 | or where parsing errors are intended to be handled by other means. 50 | -/ 51 | abbrev Trivial := Unit 52 | 53 | instance (σ τ) [Parser.Stream σ τ] : Parser.Error Trivial σ τ where 54 | unexpected _ _ := () 55 | addMessage e _ _ := e 56 | 57 | /-- *Basic error type* 58 | 59 | This error type records the position and, optionally, the offending token where a parsing error 60 | occurred; any additional information is discarded. This is useful for parsers where the cause of 61 | parsing errors is predictable and only the position of the error is needed for processing. 62 | -/ 63 | abbrev Basic (σ τ) [Parser.Stream σ τ] := Parser.Stream.Position σ × Option τ 64 | 65 | instance (σ τ) [Parser.Stream σ τ] : Parser.Error (Basic σ τ) σ τ where 66 | unexpected p t := (p, t) 67 | addMessage e _ _ := e 68 | 69 | instance (σ τ) [Repr τ] [Parser.Stream σ τ] [Repr (Parser.Stream.Position σ)] : 70 | ToString (Basic σ τ) where 71 | toString 72 | | (pos, some tok) => s!"unexpected input {repr tok} at {repr pos}" 73 | | (pos, none) => s!"unexpected input at {repr pos}" 74 | 75 | /-- *Simple error type* 76 | 77 | This error type simply records all the error information provided, without additional processing. 78 | Users are expected to provide any necessary post-processing. This is useful for parser development. 79 | -/ 80 | inductive Simple (σ τ) [Parser.Stream σ τ] 81 | /-- Unexpected input at position -/ 82 | | unexpected : Stream.Position σ → Option τ → Simple σ τ 83 | /-- Add error message at position -/ 84 | | addMessage : Simple σ τ → Stream.Position σ → String → Simple σ τ 85 | 86 | -- The derive handler for `Repr` fails, this is a workaround. 87 | private def Simple.reprPrec {σ τ} [Parser.Stream σ τ] [Repr τ] [Repr (Stream.Position σ)] : 88 | Simple σ τ → Nat → Std.Format 89 | | unexpected pos a, prec => 90 | Repr.addAppParen 91 | (Std.Format.group 92 | (Std.Format.nest (if prec >= max_prec then 1 else 2) 93 | (Std.Format.text "Parser.Error.Simple.unexpected" ++ 94 | Std.Format.line ++ 95 | reprArg pos ++ 96 | Std.Format.line ++ 97 | reprArg a))) 98 | prec 99 | | addMessage e pos msg, prec => 100 | Repr.addAppParen 101 | (Std.Format.group 102 | (Std.Format.nest (if prec >= max_prec then 1 else 2) 103 | (Std.Format.text "Parser.Error.Simple.addMessage" ++ 104 | Std.Format.line ++ 105 | reprPrec e max_prec ++ 106 | Std.Format.line ++ 107 | reprArg pos ++ 108 | Std.Format.line ++ 109 | reprArg msg))) 110 | prec 111 | 112 | instance (σ τ) [Parser.Stream σ τ] [Repr τ] [Repr (Stream.Position σ)] : Repr (Simple σ τ) where 113 | reprPrec := Simple.reprPrec 114 | 115 | private def Simple.toString {σ τ} [Repr τ] [Parser.Stream σ τ] [Repr (Parser.Stream.Position σ)] : 116 | Simple σ τ → String 117 | | unexpected pos (some tok) => s!"unexpected token {repr tok} at {repr pos}" 118 | | unexpected pos none => s!"unexpected token at {repr pos}" 119 | | addMessage e pos msg => Simple.toString e ++ s!"; {msg} at {repr pos}" 120 | 121 | instance (σ τ) [Repr τ] [Parser.Stream σ τ] [Repr (Parser.Stream.Position σ)] : 122 | ToString (Simple σ τ) where 123 | toString := Simple.toString 124 | 125 | instance (σ τ) [Parser.Stream σ τ] : Parser.Error (Simple σ τ) σ τ where 126 | unexpected := Simple.unexpected 127 | addMessage := Simple.addMessage 128 | 129 | end Parser.Error 130 | -------------------------------------------------------------------------------- /Parser/RegEx/Compile.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022-2023 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser.Char 7 | import Parser.Char.Numeric 8 | import Parser.RegEx.Basic 9 | 10 | /-! ## RegEx Syntax 11 | 12 | We currently use a very simplified form for regular expression syntax. 13 | 14 | Operators: 15 | 16 | * A match of `α|β` consists of either a match of `α` or a match of `β` 17 | * A match of `αβ` consists of a match of `α` followed by a match of `β` 18 | * A match of `α?` consists of at most one match of `α` 19 | * A match of `α*` consists of zero or more back-to-back matches of `α` 20 | * A match of `α+` consists of one or more back-to-back matches of `α` 21 | * A match of `α{m}` consists of exactly `m` back-to-back matches of `α` 22 | * A match of `α{m,n}` consists of at least `m` but at most `n` back-to-back matches of `α` 23 | * A match of `α{m,}` consists of `m` or more back-to-back matches of `α` 24 | * A match of `α{,n}` consists of at most `n` back-to-back matches of `α` 25 | * A match of `(α)` consists of a match of `α` 26 | 27 | These are listed from lowest to highest precedence. 28 | 29 | Character matching: 30 | 31 | * `.` matches any character. 32 | * A single character matches itself with the exception of the special characters: `.`, `?`, `*`, 33 | `+`, `|`, `\`, `(`, `)`, `{`, `}`, `[`, `]`. These special characters can be matched by 34 | preceding them with an escape character `\`. 35 | * `[c]` matches one character from the class `c`. 36 | * `[^c]` matches one character not in the class `c`. 37 | 38 | Character classes support single characters and character ranges. The special characters `-`, 39 | `[`, `\`, `]` must be preceded by an escape character `\` within a class. 40 | -/ 41 | 42 | 43 | namespace Parser.RegEx 44 | open Char 45 | 46 | private abbrev REParser := TrivialParser Substring.Raw Char 47 | 48 | mutual 49 | 50 | private partial def re0 : REParser (RegEx Char) := 51 | re1 >>= loop 52 | where 53 | 54 | loop (e : RegEx Char) := do 55 | if ← test (char '|') then 56 | loop (.alt e (← re1)) 57 | else 58 | return e 59 | 60 | private partial def re1 : REParser (RegEx Char) := do 61 | re2 >>= loop <|> return .nil 62 | where 63 | 64 | loop (e : RegEx Char) := do 65 | match ← option? re2 with 66 | | some a => loop (.cat e a) 67 | | none => return e 68 | 69 | private partial def re2 : REParser (RegEx Char) := 70 | re3 >>= loop 71 | where 72 | 73 | loop (e : RegEx Char) := do 74 | match ← option? <| first [star e, plus e, opt e, reps e] with 75 | | some e => loop e 76 | | none => return e 77 | 78 | opt (e : RegEx Char) := do 79 | char '?' *> return .opt e 80 | 81 | star (e : RegEx Char) := do 82 | char '*' *> return .repMany e 83 | 84 | plus (e : RegEx Char) := do 85 | char '+' *> return .repMany1 e 86 | 87 | reps (e : RegEx Char) : REParser (RegEx Char) := 88 | withBacktracking do 89 | let _ ← char '{' 90 | let e ← 91 | match ← option? ASCII.parseNat with 92 | | some min => 93 | let emin : RegEx Char := RegEx.rep min e 94 | match ← option? (char ',' *> option? ASCII.parseNat) with 95 | | some (some max) => pure <| RegEx.cat emin (.repUpTo (max - min) e) 96 | | some none => pure <| RegEx.cat emin (.repMany e) 97 | | none => pure <| emin 98 | | none => 99 | let max ← char ',' *> ASCII.parseNat 100 | pure <| .repUpTo max e 101 | let _ ← char '}' 102 | return e 103 | 104 | private partial def re3 : REParser (RegEx Char) := do 105 | first [tok, any, set, grp] 106 | where 107 | 108 | any : REParser (RegEx Char) := 109 | char '.' *> return .any 110 | 111 | grp : REParser (RegEx Char) := 112 | withBacktracking do 113 | let _ ← char '(' 114 | let n ← test (char '?' *> char ':') 115 | let e ← re0 116 | let _ ← char ')' 117 | return if n then e else .group e 118 | 119 | setLoop (filter : Char → Bool) : REParser (Char → Bool) := do 120 | match ← option? <| tokenFilter (!['-', '[', ']'].elem .) with 121 | | some c => 122 | let c ← if c == '\\' then esc else pure c 123 | let f ← try withBacktracking do 124 | let _ ← char '-' 125 | let c' ← tokenFilter (!['-', '[', ']'].elem .) 126 | let c' ← if c' == '\\' then esc else pure c' 127 | pure <| fun x => c ≤ x && x ≤ c' 128 | catch _ => 129 | pure <| fun x => x == c 130 | setLoop fun x => filter x || f x 131 | | none => return filter 132 | 133 | set : REParser (RegEx Char) := 134 | withBacktracking do 135 | let _ ← char '[' 136 | let n ← test (char '^') 137 | let f ← setLoop fun _ => false 138 | let _ ← char ']' 139 | if n then 140 | return .set (! f .) 141 | else 142 | return .set (f .) 143 | 144 | tok : REParser (RegEx Char) := do 145 | let special := ['.', '?', '*', '+', '|', '(', ')', '{', '}', '[', ']'] 146 | let c ← tokenFilter (!special.elem .) 147 | let c ← if c == '\\' then esc else pure c 148 | return .set (. == c) 149 | 150 | esc : REParser Char := do 151 | match ← anyToken with 152 | | 't' => return '\t' 153 | | 'n' => return '\n' 154 | | 'r' => return '\r' 155 | | 'u' => 156 | let n ← (·.val) <$> Parser.Char.ASCII.hexDigit 157 | let n ← ((n <<< 4) + ·.val) <$> Parser.Char.ASCII.hexDigit 158 | let n ← ((n <<< 4) + ·.val) <$> Parser.Char.ASCII.hexDigit 159 | let n ← ((n <<< 4) + ·.val) <$> Parser.Char.ASCII.hexDigit 160 | return Char.ofNat n 161 | | 'x' => 162 | let n ← (·.val) <$> Parser.Char.ASCII.hexDigit 163 | let n ← ((n <<< 4) + ·.val) <$> Parser.Char.ASCII.hexDigit 164 | return Char.ofNat n 165 | | c => return c 166 | 167 | end 168 | 169 | /-- Compiles a regex from a string, returns `none` on faiure -/ 170 | protected def compile? (s : String) : Option (RegEx Char) := 171 | match Parser.run (re0 <* endOfInput) s with 172 | | .ok _ r => some r 173 | | .error _ _ => none 174 | 175 | /-- Compiles a regex from a string, panics on faiure -/ 176 | protected def compile! (s : String) : RegEx Char := 177 | match RegEx.compile? s with 178 | | some r => r 179 | | none => panic! "invalid regular expression" 180 | -------------------------------------------------------------------------------- /Parser/Char/Basic.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser.Basic 7 | import Parser.RegEx.Basic 8 | 9 | namespace Parser.Char 10 | variable {ε σ m} [Parser.Stream σ Char] [Parser.Error ε σ Char] [Monad m] 11 | 12 | /-- `char tk` accepts and returns character `tk`, otherwise fails -/ 13 | @[inline] 14 | def char (tk : Char) : ParserT ε σ Char m Char := 15 | withErrorMessage s!"expected {repr tk}" <| token tk 16 | 17 | /-- `chars tks` accepts and returns string `tks`, otherwise fails -/ 18 | def chars (tks : String) : ParserT ε σ Char m String := 19 | withErrorMessage s!"expected {repr tks}" do 20 | let mut acc : String := "" 21 | for tk in tks.toList do 22 | acc := acc.push (← token tk) 23 | return acc 24 | 25 | /-- `string tks` accepts and returns string `tks`, otherwise fails -/ 26 | def string [Parser.Error ε Substring.Raw Char] (tks : String) : ParserT ε Substring.Raw Char m String := 27 | withErrorMessage s!"expected {repr tks}" do 28 | let ⟨str, start, stop⟩ ← getStream 29 | if start.offsetBy tks.rawEndPos ≤ stop ∧ String.Pos.Raw.substrEq tks 0 str start tks.rawEndPos.byteIdx then 30 | setPosition (start.offsetBy tks.rawEndPos) 31 | return tks 32 | else 33 | throwUnexpected 34 | 35 | /-- `captureStr p` parses `p` and returns the output of `p` with the corresponding Substring.Raw -/ 36 | def captureStr [Parser.Error ε Substring.Raw Char] (p : ParserT ε Substring.Raw Char m α) : 37 | ParserT ε Substring.Raw Char m (α × Substring.Raw) := do 38 | let ⟨str,_,_⟩ ← getStream 39 | let (x, start, stop) ← withCapture p 40 | return (x, ⟨str, start, stop⟩) 41 | 42 | /-- `matchStr re` accepts and returns substring matches for regex `re` groups, otherwise fails -/ 43 | def matchStr [Parser.Error ε Substring.Raw Char] (re : RegEx Char) : 44 | ParserT ε Substring.Raw Char m (Array (Option Substring.Raw)) := do 45 | let ⟨str,_,_⟩ ← getStream 46 | let ms ← re.match 47 | return ms.map fun 48 | | some (start, stop) => some ⟨str, start, stop⟩ 49 | | none => none 50 | 51 | /-- Parse space (U+0020) -/ 52 | @[inline] 53 | def space : ParserT ε σ Char m Char := 54 | withErrorMessage "expected space (U+0020)" <| token ' ' 55 | 56 | /-- Parse horizontal tab (U+0009) -/ 57 | @[inline] 58 | def tab : ParserT ε σ Char m Char := 59 | withErrorMessage "expected horizontal tab (U+0009)" <| token '\t' 60 | 61 | /-- Parse line feed (U+000A) -/ 62 | @[inline] 63 | def ASCII.lf : ParserT ε σ Char m Char := 64 | withErrorMessage "expected line feed (U+000A)" <| token '\n' 65 | 66 | /-- Parse carriage return (U+000D) -/ 67 | @[inline] 68 | def ASCII.cr : ParserT ε σ Char m Char := 69 | withErrorMessage "expected carriage return (U+000D)" <| token '\r' 70 | 71 | /-- Parse end of line -/ 72 | @[inline] 73 | def eol : ParserT ε σ Char m Char := 74 | withErrorMessage "expected newline" do 75 | (ASCII.cr *> ASCII.lf) <|> ASCII.lf 76 | 77 | namespace ASCII 78 | 79 | /-- Parse whitespace character -/ 80 | def whitespace : ParserT ε σ Char m Char := 81 | withErrorMessage "expected whitespace character" do 82 | tokenFilter fun c => c == ' ' || c >= '\t' && c <= '\r' 83 | 84 | /-- Parse uppercase letter character (`A`..`Z`) -/ 85 | def uppercase : ParserT ε σ Char m Char := 86 | withErrorMessage "expected uppercase letter character" do 87 | tokenFilter fun c => c >= 'A' && c <= 'Z' 88 | 89 | /-- Parse lowercase letter character (`a`..`z`)-/ 90 | def lowercase : ParserT ε σ Char m Char := 91 | withErrorMessage "expected lowercase letter character" do 92 | tokenFilter fun c => c >= 'a' && c <= 'z' 93 | 94 | /-- Parse alphabetic character (`A`..`Z` and `a`..`z`) -/ 95 | def alpha : ParserT ε σ Char m Char := 96 | withErrorMessage "expected alphabetic character" do 97 | tokenFilter fun c => if c >= 'a' then c <= 'z' else c >= 'A' && c <= 'Z' 98 | 99 | /-- Parse numeric character (`0`..`9`)-/ 100 | def numeric : ParserT ε σ Char m Char := 101 | withErrorMessage "expected decimal digit character" do 102 | tokenFilter fun c => c >= '0' && c <= '9' 103 | 104 | /-- Parse alphabetic letter or digit (`A`..`Z`, `a`..`z` and `0`..`9`) -/ 105 | def alphanum : ParserT ε σ Char m Char := 106 | withErrorMessage "expected letter or digit character" do 107 | tokenFilter fun c => 108 | if c >= 'a' then c <= 'z' 109 | else if c >= 'A' then c <= 'Z' 110 | else c >= '0' && c <= '9' 111 | 112 | /-- Parse control character -/ 113 | def control : ParserT ε σ Char m Char := 114 | withErrorMessage "expected control character" do 115 | tokenFilter fun c => c.val < 0x20 || c.val == 0x7f 116 | 117 | /-- Parse decimal digit (`0`-`9`) -/ 118 | def digit : ParserT ε σ Char m (Fin 10) := 119 | withErrorMessage "expected decimal digit" do 120 | tokenMap fun c => 121 | if c < '0' then none else 122 | let val := c.toNat - '0'.toNat 123 | if h : val < 10 then 124 | some ⟨val, h⟩ 125 | else 126 | none 127 | 128 | /-- Parse binary digit (`0`..`1`) -/ 129 | def binDigit : ParserT ε σ Char m (Fin 2) := 130 | withErrorMessage "expected binary digit" do 131 | tokenMap fun 132 | | '0' => some ⟨0, Nat.zero_lt_succ 1⟩ 133 | | '1' => some ⟨1, Nat.succ_lt_succ (Nat.zero_lt_succ 0)⟩ 134 | | _ => none 135 | 136 | /-- Parse octal digit (`0`..`7`) -/ 137 | def octDigit : ParserT ε σ Char m (Fin 8) := 138 | withErrorMessage "expected octal digit" do 139 | tokenMap fun c => 140 | if c >= '0' then 141 | let val := c.toNat - '0'.toNat 142 | if h : val < 8 then 143 | some ⟨val, h⟩ 144 | else 145 | none 146 | else 147 | none 148 | 149 | /-- Parse hexadecimal digit (`0`..`9`, `A`..`F` and `a`..`f`) -/ 150 | def hexDigit : ParserT ε σ Char m (Fin 16) := 151 | withErrorMessage "expected hexadecimal digit" do 152 | tokenMap fun c => 153 | if c < '0' then none else 154 | let val := c.toNat - '0'.toNat 155 | if h : val < 10 then 156 | some ⟨val, Nat.lt_trans h (by decide)⟩ 157 | else if c < 'A' then none else 158 | let val := val - ('A'.toNat - '9'.toNat - 1) 159 | if h : val < 16 then 160 | some ⟨val, h⟩ 161 | else if c < 'a' then none else 162 | let val := val - ('a'.toNat - 'A'.toNat) 163 | if h : val < 16 then 164 | some ⟨val, h⟩ 165 | else 166 | none 167 | 168 | end ASCII 169 | 170 | end Parser.Char 171 | -------------------------------------------------------------------------------- /examples/JSON.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022-2025 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser 7 | 8 | /-! # A JSON Validator 9 | 10 | The JSON data interchange syntax is defined in [ECMA Standard 404][ECMA]. A convenient visual 11 | representation of the syntax can be found at [json.org][JSON]. 12 | 13 | For convenience, the syntax has been translated to BNF in the comments to the code below. The BNF 14 | variant used does not allow character escape sequences. Instead, `""` inside a string represents the 15 | double quotes character. Unicode code points can also be used in the form `U+` followed by at least 16 | four (and at most six) uppercase hexadecimal digits for the code point (if more than four, the first 17 | cannot be zero). 18 | 19 | [ECMA]: https://www.ecma-international.org/publications-and-standards/standards/ecma-404/ 20 | [JSON]: https://www.json.org/json-en.html 21 | -/ 22 | 23 | namespace JSON 24 | 25 | open Parser Char 26 | 27 | /-- JSON parser monad -/ 28 | protected abbrev Parser := SimpleParser String.Slice Char 29 | 30 | /-- Parse JSON white spaces 31 | 32 | JSON only recognizes four white space characters: space (U+0020), line feed (U+000A), carriage 33 | return (U+000D), horizontal tab (U+0009). 34 | ``` 35 | ::= "" | U+00020 | U+000A | U+000D | U+0009 36 | ``` 37 | -/ 38 | def ws : JSON.Parser Unit := 39 | dropMany <| tokenFilter [' ', '\n', '\r', '\t'].contains 40 | 41 | /-- Parse a JSON number 42 | 43 | Specification: 44 | ``` 45 | ::= 46 | ``` 47 | -/ 48 | protected partial def number : JSON.Parser Unit := 49 | withErrorMessage "expected number" do 50 | /- 51 | ``` 52 | ::= "0" | 53 | 54 | ::= "" | "-" 55 | 56 | := "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 57 | 58 | ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 59 | 60 | ::= "" | 61 | ``` 62 | -/ 63 | optional (char '-') -- `` 64 | first [ 65 | drop 1 (char '0'), -- `"0" |` 66 | dropMany1 ASCII.digit, -- ` ` 67 | throwUnexpected 68 | ] 69 | /- 70 | ``` 71 | := "" | "." 72 | 73 | ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 74 | 75 | ::= "" | 76 | ``` 77 | -/ 78 | optional do -- `"" |` 79 | drop 1 (char '.') -- `"."` 80 | dropMany1 ASCII.digit -- ` ` 81 | 82 | /- 83 | ``` 84 | ::= "" | 85 | 86 | ::= "e" | "E" 87 | 88 | ::= "" | "+" | "-" 89 | 90 | ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 91 | 92 | ::= "" | 93 | ``` 94 | -/ 95 | optional do -- `"" |` 96 | drop 1 (char 'e' <|> char 'E') -- `` 97 | optional (char '+' <|> char '-') -- `` 98 | dropMany1 ASCII.digit -- ` ` 99 | 100 | /-- Parse a JSON string 101 | 102 | The only characters that must be escaped in a JSON string are `"` (U+0022), `\` (U+005C), and 103 | control characters (U+0000 .. U+001F). 104 | 105 | Specification: 106 | ``` 107 | ::= """" """" 108 | 109 | ::= "" | 110 | 111 | ::= "\" | U+0020 .. U+10FFFF except """" (U+0022) and "\" (U+005C) 112 | ``` 113 | -/ 114 | protected def string : JSON.Parser Unit := 115 | withErrorMessage "expected string" do 116 | char '"' *> dropUntil (drop 1 <| char '"') do 117 | first [ 118 | char '\\' *> escape, -- `"\" |` 119 | drop 1 <| tokenFilter fun c => c ≥ ' ', -- `` 120 | throwUnexpected 121 | ] 122 | where 123 | /-- 124 | ``` 125 | ::= """" | "\" | "/" | "b" | "f" | "n" | "r" | "t" 126 | | "u" 127 | 128 | ::= "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" 129 | | "A" | "B" | "C" | "D" | "E" | "F" | "a" | "b" | "c" | "d" | "e" | "f" 130 | ``` 131 | -/ 132 | escape : JSON.Parser Unit := 133 | withErrorMessage "expected escape" do 134 | first [ 135 | drop 1 <| tokenFilter ['"', '\\', '/', 'b', 'f', 'n', 'r', 't'].contains, 136 | char 'u' *> drop 4 ASCII.hexDigit, 137 | throwUnexpected 138 | ] 139 | 140 | mutual 141 | 142 | /-- Parse a JSON value 143 | 144 | Specification: 145 | ``` 146 | ::= | | | | "true" | "false" | "null" 147 | ``` 148 | The `object` and `array` parsers recursively 149 | depend on `value` so they are in a mutual 150 | declaration block. 151 | -/ 152 | protected partial def value : JSON.Parser Unit := 153 | first [ 154 | JSON.object, 155 | JSON.array, 156 | JSON.string, 157 | JSON.number, 158 | drop 1 <| chars "true", 159 | drop 1 <| chars "false", 160 | drop 1 <| chars "null", 161 | throwUnexpectedWithMessage none "expected value" 162 | ] 163 | 164 | /-- Parse a JSON object 165 | 166 | Specification: 167 | ``` 168 | ::= "{" "}" | "{" "}" 169 | 170 | ::= | "," 171 | 172 | ::= ":" 173 | ``` 174 | -/ 175 | protected partial def object : JSON.Parser Unit := 176 | withErrorMessage "expected object" do 177 | drop 1 <| char '{' 178 | let _ ← sepBy (char ',') do 179 | let _ ← ws *> JSON.string <* ws 180 | drop 1 <| char ':' 181 | let _ ← ws *> JSON.value <* ws 182 | drop 1 <| char '}' 183 | 184 | /-- Parse a JSON array 185 | 186 | Specification: 187 | ``` 188 | ::= "[" "]" | "[" "]" 189 | 190 | ::= | "," 191 | 192 | ::= 193 | ``` 194 | -/ 195 | protected partial def array : JSON.Parser Unit := 196 | withErrorMessage "expected array" do 197 | drop 1 <| char '[' 198 | let _ ← sepBy (char ',') do 199 | let _ ← ws *> JSON.value <* ws 200 | drop 1 <| char ']' 201 | 202 | 203 | end 204 | 205 | /-- JSON validator -/ 206 | def validate (str : String) : Bool := 207 | match Parser.run (ws *> JSON.value <* ws <* endOfInput) str.toSlice with 208 | | .ok _ _ => true 209 | | .error _ _ => false 210 | 211 | end JSON 212 | -------------------------------------------------------------------------------- /examples/BNF.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser 7 | 8 | /-! 9 | The string `BNF.bnf` below represents BNF syntax in BNF. In this example, we 10 | will write a BNF parser and verify that it can correctly parse its own 11 | syntax! 12 | 13 | There are many BNF variants and there is no official one. The common feature 14 | of these variants is that BNF syntax avoids parentheses and has only two 15 | combinators: concatenation and alternative. Rule identifiers must consist 16 | only of letters, numbers and hyphens `-` and must start with a letter. 17 | Each rule is terminated by an end-of-line marker. 18 | 19 | The BNF variant below simplifies the syntax for literals by only allowing 20 | single-quoted literals and single quotes within literals must be doubled. 21 | Thus `''''` represents one single quote and `''''''` represents two. The 22 | characters that can occur in literals are limited to ASCII letters, digits, 23 | and a selected list of symbols. Literals can also contain end-of-line 24 | marker. 25 | -/ 26 | 27 | namespace BNF 28 | 29 | /-- String representation of BNF syntax -/ 30 | protected def bnf : String := 31 | -- All the line breaks are significant! 32 | " ::= | 33 | ::= '<' '>' '::=' 34 | ::= | '|' 35 | ::= | 36 | ::= '''' '''' | '<' '>' 37 | ::= '' | 38 | ::= | '''''' 39 | ::= 40 | ::= '' | 41 | ::= | | '-' 42 | ::= | 43 | ::= | | 44 | ::= 'A' | 'B' | 'C' | 'D' | 'E' | 'F' | 'G' | 'H' | 'I' | 'J' | 'K' | 'L' | 'M' | 'N' | 'O' | 'P' | 'Q' | 'R' | 'S' | 'T' | 'U' | 'V' | 'W' | 'X' | 'Y' | 'Z' | 'a' | 'b' | 'c' | 'd' | 'e' | 'f' | 'g' | 'h' | 'i' | 'j' | 'k' | 'l' | 'm' | 'n' | 'o' | 'p' | 'q' | 'r' | 's' | 't' | 'u' | 'v' | 'w' | 'x' | 'y' | 'z' 45 | ::= '0' | '1' | '2' | '3' | '4' | '5' | '6' | '7' | '8' | '9' 46 | ::= '|' | ' ' | '!' | '#' | '$' | '%' | '&' | '(' | ')' | '*' | '+' | ',' | '-' | '.' | '/' | ':' | ';' | '>' | '=' | '<' | '?' | '@' | '[' | ']' | '^' | '_' | '`' | '{' | '}' | '~' | 47 | ::= '' | ' ' 48 | ::= ' 49 | ' 50 | " 51 | 52 | /-! 53 | ## BNF Syntax Tree ## 54 | -/ 55 | 56 | /-- Type for -/ 57 | inductive Term 58 | | rule : String → Term 59 | | literal : String → Term 60 | deriving Repr, Inhabited 61 | 62 | instance : ToString Term where 63 | toString 64 | | .rule name => "<" ++ name ++ ">" 65 | | .literal str => "'" ++ str.replace "'" "''" ++ "'" 66 | 67 | /-- Type for -/ 68 | inductive ExprCat where 69 | | pure : Term → ExprCat 70 | | cons : Term → ExprCat → ExprCat 71 | deriving Repr, Inhabited 72 | 73 | instance : ToString ExprCat := 74 | let rec pp : ExprCat → String 75 | | .pure e => toString e 76 | | .cons e es => toString e ++ " " ++ pp es 77 | ⟨pp⟩ 78 | 79 | /-- Type for -/ 80 | inductive ExprAlt where 81 | | pure : ExprCat → ExprAlt 82 | | cons : ExprCat → ExprAlt → ExprAlt 83 | deriving Repr, Inhabited 84 | 85 | instance : ToString ExprAlt := 86 | let rec pp : ExprAlt → String 87 | | .pure l => toString l 88 | | .cons l ls => toString l ++ " | " ++ pp ls 89 | ⟨pp⟩ 90 | 91 | /-- Type for -/ 92 | inductive Syntax where 93 | | pure : String → ExprAlt → Syntax 94 | | cons : String → ExprAlt → Syntax → Syntax 95 | deriving Repr, Inhabited 96 | 97 | instance : ToString Syntax := 98 | let rec pp : Syntax → String 99 | | .pure n e => s!"<{n}> ::= {toString e}\n" 100 | | .cons n e stx => s!"<{n}> ::= {toString e}\n" ++ pp stx 101 | ⟨pp⟩ 102 | 103 | /-! 104 | ## BNF Parser ## 105 | -/ 106 | 107 | /-- BNF parser monad -/ 108 | abbrev BNFParser := SimpleParser String.Slice Char 109 | 110 | namespace BNFParser 111 | open Parser Char 112 | 113 | /-- Parser for -/ 114 | def eol : BNFParser Char := 115 | withErrorMessage "" do 116 | Parser.Char.eol 117 | 118 | /-- Parser for -/ 119 | def spaces : BNFParser Unit := 120 | withErrorMessage "" do 121 | dropMany (char ' ') 122 | 123 | /-- Parser for -/ 124 | def symbol : BNFParser Char := 125 | let list := ['|', ' ', '!', '#', '$', '%', '&', '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', 126 | '>', '=', '<', '?', '@', '[', ']', '^', '_', '`', '{', '}', '~', '\n'] 127 | withErrorMessage "" do 128 | tokenFilter list.elem 129 | 130 | /-- Parser for -/ 131 | def digit : BNFParser Char := 132 | withErrorMessage "" do 133 | ASCII.numeric 134 | 135 | /-- Parser for -/ 136 | def letter : BNFParser Char := 137 | withErrorMessage "" do 138 | ASCII.alpha 139 | 140 | /-- Parser for -/ 141 | def character : BNFParser Char := 142 | withErrorMessage "" do 143 | ASCII.alphanum <|> symbol 144 | 145 | /-- Parser for -/ 146 | def lineEnd : BNFParser Unit := 147 | withErrorMessage "" do 148 | dropMany (spaces <* eol) 149 | 150 | /-- Parser for -/ 151 | def nameCharacter : BNFParser Char := 152 | withErrorMessage "" do 153 | ASCII.alphanum <|> char '-' 154 | 155 | /-- Parser for -/ 156 | def nameString : BNFParser String := 157 | withErrorMessage "" do 158 | foldl String.push "" nameCharacter 159 | 160 | /-- Parser for -/ 161 | def name : BNFParser String := 162 | withErrorMessage "" do 163 | let a ← letter 164 | let s ← nameString 165 | return a.toString ++ s 166 | 167 | /-- Parser for -/ 168 | def textCharacter : BNFParser Char := 169 | withErrorMessage "" do 170 | character <|> char '\'' *> char '\'' 171 | 172 | /-- Parser for -/ 173 | partial def text : BNFParser String := 174 | withErrorMessage "" do 175 | foldl String.push "" textCharacter 176 | 177 | /-- Parser for -/ 178 | def term : BNFParser Term := 179 | let literal : BNFParser String := char '\'' *> text <* char '\'' 180 | let rule : BNFParser String := char '<' *> name <* char '>' 181 | withErrorMessage "" do 182 | Term.literal <$> literal <|> Term.rule <$> rule 183 | 184 | /-- Parser for -/ 185 | partial def exprCat : BNFParser ExprCat := 186 | withErrorMessage "" do 187 | let expr ← spaces *> term 188 | ExprCat.cons expr <$> exprCat 189 | <|> return ExprCat.pure expr 190 | 191 | /-- Parser for -/ 192 | partial def exprAlt : BNFParser ExprAlt := 193 | withErrorMessage "" <| do 194 | let expr ← exprCat 195 | ExprAlt.cons expr <$> (spaces *> char '|' *> exprAlt) 196 | <|> return ExprAlt.pure expr 197 | 198 | /-- Parser for -/ 199 | def rule : BNFParser (String × ExprAlt) := 200 | withErrorMessage "" do 201 | let name ← spaces *> char '<' *> name <* char '>' 202 | let _ ← spaces *> chars "::=" 203 | let expr ← exprAlt <* lineEnd 204 | return (name, expr) 205 | 206 | /-- Parser for -/ 207 | partial def «syntax» : BNFParser Syntax := 208 | withErrorMessage "" do 209 | let (name, expr) ← withErrorMessage ": expected rule" rule 210 | Syntax.cons name expr <$> «syntax» 211 | <|> return Syntax.pure name expr 212 | 213 | end BNFParser 214 | 215 | /-- Parse BNF from string -/ 216 | def parse (input : String) : Except String BNF.Syntax := 217 | match (BNFParser.syntax <* Parser.endOfInput).run input.toSlice with 218 | | .ok _ stx => .ok stx 219 | | .error _ err => .error ("error: " ++ toString err) 220 | 221 | section Test 222 | 223 | /-- Parsed BNF syntax -/ 224 | protected def stx : IO BNF.Syntax := 225 | match BNF.parse BNF.bnf with 226 | | .ok stx => return stx 227 | | .error e => IO.println e *> return default 228 | 229 | #eval show IO Bool from do 230 | let stx ← BNF.stx 231 | return toString stx == BNF.bnf -- round trip? 232 | 233 | end Test 234 | 235 | end BNF 236 | -------------------------------------------------------------------------------- /Parser/Char/Unicode.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser.Char.Basic 7 | 8 | namespace Parser.Char.Unicode 9 | variable {ε σ m} [Parser.Stream σ Char] [Parser.Error ε σ Char] [Monad m] 10 | 11 | /-- parse alphabetic letter character -/ 12 | def alpha : ParserT ε σ Char m Char := 13 | withErrorMessage "expected letter" do 14 | tokenFilter Unicode.isAlpha 15 | 16 | /-- parse lowercase letter character -/ 17 | def lowercase : ParserT ε σ Char m Char := 18 | withErrorMessage "expected lowercase letter" do 19 | tokenFilter Unicode.isLowercase 20 | 21 | /-- parse math symbol character -/ 22 | def math : ParserT ε σ Char m Char := 23 | withErrorMessage "expected math symbol" do 24 | tokenFilter Unicode.isMath 25 | 26 | /-- parse uppercase letter character -/ 27 | def uppercase : ParserT ε σ Char m Char := 28 | withErrorMessage "expected uppercase letter" do 29 | tokenFilter Unicode.isUppercase 30 | 31 | /-- parse whitespace character -/ 32 | def whitespace : ParserT ε σ Char m Char := 33 | withErrorMessage "expected whitespace" do 34 | tokenFilter Unicode.isWhiteSpace 35 | 36 | /-- parse decimal digit character -/ 37 | def digit : ParserT ε σ Char m (Fin 10) := 38 | withErrorMessage "expected decimal digit" do 39 | tokenMap Unicode.getDigit? 40 | 41 | /-- parse hexadecimal digit character -/ 42 | def hexDigit : ParserT ε σ Char m (Fin 16) := 43 | withErrorMessage "expected hexadecimal decimal digit" do 44 | tokenMap Unicode.getHexDigit? 45 | 46 | /-! 47 | ## General Category ## 48 | -/ 49 | 50 | /-- parse character from given general category -/ 51 | def parseGeneralCategory (category : Unicode.GC) : ParserT ε σ Char m Char := 52 | withErrorMessage s!"expected character of general category {category}" do 53 | tokenFilter (. ∈ category) 54 | 55 | namespace GeneralCategory 56 | 57 | /-- parse letter (general category L) -/ 58 | def letter : ParserT ε σ Char m Char := 59 | withErrorMessage "expected letter (L)" do 60 | tokenFilter Unicode.GeneralCategory.isLetter 61 | 62 | /-- parse cased letter (general category LC) -/ 63 | def casedLetter : ParserT ε σ Char m Char := 64 | withErrorMessage "expected cased letter (LC)" do 65 | tokenFilter Unicode.GeneralCategory.isCasedLetter 66 | 67 | /-- parse lowercase letter (general category Ll) -/ 68 | def lowercaseLetter : ParserT ε σ Char m Char := 69 | withErrorMessage "expected lowercase letter (Ll)" do 70 | tokenFilter Unicode.GeneralCategory.isLowercaseLetter 71 | 72 | /-- parse uppercase letter (general category Lu) -/ 73 | def uppercaseLetter : ParserT ε σ Char m Char := 74 | withErrorMessage "expected uppercase letter (Lu)" do 75 | tokenFilter Unicode.GeneralCategory.isUppercaseLetter 76 | 77 | /-- parse titlecase letter (general category Lt) -/ 78 | def titlecaseLetter : ParserT ε σ Char m Char := 79 | withErrorMessage "expected titlecase letter (Lt)" do 80 | tokenFilter Unicode.GeneralCategory.isTitlecaseLetter 81 | 82 | /-- parse other letter (general category Lm) -/ 83 | def modifierLetter : ParserT ε σ Char m Char := 84 | withErrorMessage "expected modifier letter (Lm)" do 85 | tokenFilter Unicode.GeneralCategory.isModifierLetter 86 | 87 | /-- parse other letter (general category Lo) -/ 88 | def otherLetter : ParserT ε σ Char m Char := 89 | withErrorMessage "expected other letter (Lo)" do 90 | tokenFilter Unicode.GeneralCategory.isOtherLetter 91 | 92 | /-- parse mark (general category M) -/ 93 | def mark : ParserT ε σ Char m Char := 94 | withErrorMessage "expected mark (M)" do 95 | tokenFilter Unicode.GeneralCategory.isMark 96 | 97 | /-- parse spacing combining mark (general category Mc) -/ 98 | def spacingMark : ParserT ε σ Char m Char := 99 | withErrorMessage "expected spacing mark (Mc)" do 100 | tokenFilter Unicode.GeneralCategory.isSpacingMark 101 | 102 | /-- parse nonspacing combining mark (general category Mn) -/ 103 | def nonspacingMark : ParserT ε σ Char m Char := 104 | withErrorMessage "expected nonspacing mark (Mn)" do 105 | tokenFilter Unicode.GeneralCategory.isNonspacingMark 106 | 107 | /-- parse enclosing combining mark (general category Me) -/ 108 | def enclosingMark : ParserT ε σ Char m Char := 109 | withErrorMessage "expected enclosing mark (Me)" do 110 | tokenFilter Unicode.GeneralCategory.isEnclosingMark 111 | 112 | /-- parse number (general category N) -/ 113 | def number : ParserT ε σ Char m Char := 114 | withErrorMessage "expected number (N)" do 115 | tokenFilter Unicode.GeneralCategory.isNumber 116 | 117 | /-- parse decimal number (general category Nd) -/ 118 | def decimalNumber : ParserT ε σ Char m Char := 119 | withErrorMessage "expected decimal number (Nd)" do 120 | tokenFilter Unicode.GeneralCategory.isDecimalNumber 121 | 122 | /-- parse letter number (general category Nl) -/ 123 | def letterNumber : ParserT ε σ Char m Char := 124 | withErrorMessage "expected letter number (Nl)" do 125 | tokenFilter Unicode.GeneralCategory.isLetterNumber 126 | 127 | /-- parse other number (general category No) -/ 128 | def otherNumber : ParserT ε σ Char m Char := 129 | withErrorMessage "expected other number (No)" do 130 | tokenFilter Unicode.GeneralCategory.isOtherNumber 131 | 132 | /-- parse punctuation (general category P) -/ 133 | def punctuation : ParserT ε σ Char m Char := 134 | withErrorMessage "expected punctuation (P)" do 135 | tokenFilter Unicode.GeneralCategory.isPunctuation 136 | 137 | /-- parse connector punctuation (general category Pc) -/ 138 | def connectorPunctuation : ParserT ε σ Char m Char := 139 | withErrorMessage "expected connector punctuation (Pc)" do 140 | tokenFilter Unicode.GeneralCategory.isConnectorPunctuation 141 | 142 | /-- parse dash punctuation (general category Pd) -/ 143 | def dashPunctuation : ParserT ε σ Char m Char := 144 | withErrorMessage "expected dash punctuation (Pd)" do 145 | tokenFilter Unicode.GeneralCategory.isDashPunctuation 146 | 147 | /-- parse opening punctuation (general category Ps) -/ 148 | def openPunctuation : ParserT ε σ Char m Char := 149 | withErrorMessage "expected opening punctuation (Ps)" do 150 | tokenFilter Unicode.GeneralCategory.isOpenPunctuation 151 | 152 | /-- parse closing punctuation (general category Pe) -/ 153 | def closePunctuation : ParserT ε σ Char m Char := 154 | withErrorMessage "expected opening punctuation (Pe)" do 155 | tokenFilter Unicode.GeneralCategory.isClosePunctuation 156 | 157 | /-- parse initial punctuation (general category Pi) -/ 158 | def initialPunctuation : ParserT ε σ Char m Char := 159 | withErrorMessage "expected initial punctuation (Pi)" do 160 | tokenFilter Unicode.GeneralCategory.isInitialPunctuation 161 | 162 | /-- parse final punctuation (general category Pf) -/ 163 | def finalPunctuation : ParserT ε σ Char m Char := 164 | withErrorMessage "expected final punctuation (Pf)" do 165 | tokenFilter Unicode.GeneralCategory.isFinalPunctuation 166 | 167 | /-- parse other punctuation (general category Po) -/ 168 | def otherPunctuation : ParserT ε σ Char m Char := 169 | withErrorMessage "expected other punctuation (Po)" do 170 | tokenFilter Unicode.GeneralCategory.isOtherPunctuation 171 | 172 | /-- parse symbol (general category S) -/ 173 | def symbol : ParserT ε σ Char m Char := 174 | withErrorMessage "expected symbol (S)" do 175 | tokenFilter Unicode.GeneralCategory.isSymbol 176 | 177 | /-- parse math symbol (general category Sm) -/ 178 | def mathSymbol : ParserT ε σ Char m Char := 179 | withErrorMessage "expected math symbol (Sm)" do 180 | tokenFilter Unicode.GeneralCategory.isMathSymbol 181 | 182 | /-- parse currency symbol (general category Sc) -/ 183 | def currencySymbol : ParserT ε σ Char m Char := 184 | withErrorMessage "expected currency symbol (Sc)" do 185 | tokenFilter Unicode.GeneralCategory.isCurrencySymbol 186 | 187 | /-- parse modifier symbol (general category Sk) -/ 188 | def modifierSymbol : ParserT ε σ Char m Char := 189 | withErrorMessage "expected modifier symbol (Sk)" do 190 | tokenFilter Unicode.GeneralCategory.isModifierSymbol 191 | 192 | /-- parse other symbol (general category So) -/ 193 | def otherSymbol : ParserT ε σ Char m Char := 194 | withErrorMessage "expected other symbol (So)" do 195 | tokenFilter Unicode.GeneralCategory.isOtherSymbol 196 | 197 | /-- parse separator (general category Z) -/ 198 | def separator : ParserT ε σ Char m Char := 199 | withErrorMessage "expected separator (Z)" do 200 | tokenFilter Unicode.GeneralCategory.isSeparator 201 | 202 | /-- parse space separator (general category Zs) -/ 203 | def spaceSeparator : ParserT ε σ Char m Char := 204 | withErrorMessage "expected space separator (Zs)" do 205 | tokenFilter Unicode.GeneralCategory.isSpaceSeparator 206 | 207 | /-- parse line separator (general category Zl) -/ 208 | def lineSeparator : ParserT ε σ Char m Char := 209 | withErrorMessage "expected line separator (Zl)" do 210 | tokenFilter Unicode.GeneralCategory.isLineSeparator 211 | 212 | /-- parse paragraph separator (general category Zp) -/ 213 | def paragraphSeparator : ParserT ε σ Char m Char := 214 | withErrorMessage "expected paragraph separator (Zp)" do 215 | tokenFilter Unicode.GeneralCategory.isParagraphSeparator 216 | 217 | /-- parse other character (general category C) -/ 218 | def other : ParserT ε σ Char m Char := 219 | withErrorMessage "expected other character (C)" do 220 | tokenFilter Unicode.GeneralCategory.isOther 221 | 222 | /-- parse control character (general category Cc) -/ 223 | def control : ParserT ε σ Char m Char := 224 | withErrorMessage "expected control character (Cc)" do 225 | tokenFilter Unicode.GeneralCategory.isControl 226 | 227 | /-- parse format character (general category Cf) -/ 228 | def format : ParserT ε σ Char m Char := 229 | withErrorMessage "expected format character (Cf)" do 230 | tokenFilter Unicode.GeneralCategory.isFormat 231 | 232 | /-- parse surrogate character (general category Cs) -/ 233 | def surrogate : ParserT ε σ Char m Char := 234 | withErrorMessage "expected surrogate character (Cs)" do 235 | tokenFilter Unicode.GeneralCategory.isSurrogate 236 | 237 | /-- parse private-use character (general category Co) -/ 238 | def privateUse : ParserT ε σ Char m Char := 239 | withErrorMessage "expected private-use character (Co)" do 240 | tokenFilter Unicode.GeneralCategory.isPrivateUse 241 | 242 | /-- parse unassigned character (general category Cn) -/ 243 | def noncharacter : ParserT ε σ Char m Char := 244 | withErrorMessage "expected unassigned character (Cn)" do 245 | tokenFilter Unicode.GeneralCategory.isUnassigned 246 | 247 | end GeneralCategory 248 | 249 | end Parser.Char.Unicode 250 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /Parser/Parser.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022-2024 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser.Prelude 7 | import Parser.Error 8 | import Parser.Stream 9 | 10 | /-- Parser result type. -/ 11 | protected inductive Parser.Result.{u} (ε σ α : Type u) : Type u 12 | /-- Result: success! -/ 13 | | ok : σ → α → Parser.Result ε σ α 14 | /-- Result: error! -/ 15 | | error : σ → ε → Parser.Result ε σ α 16 | deriving Inhabited, Repr 17 | 18 | /-- 19 | `ParserT ε σ τ` is a monad transformer to parse tokens of type `τ` from the stream type `σ` with 20 | error type `ε`. 21 | -/ 22 | def ParserT (ε σ τ : Type _) [Parser.Stream σ τ] [Parser.Error ε σ τ] (m : Type _ → Type _) 23 | (α : Type _) : Type _ := σ → m (Parser.Result ε σ α) 24 | 25 | /-- Run the monadic parser `p` on input stream `s`. -/ 26 | @[inline] 27 | def ParserT.run [Parser.Stream σ τ] [Parser.Error ε σ τ] (p : ParserT ε σ τ m α) (s : σ) : 28 | m (Parser.Result ε σ α) := p s 29 | 30 | instance (σ ε τ m) [Parser.Stream σ τ] [Parser.Error ε σ τ] [Monad m] : 31 | Monad (ParserT ε σ τ m) where 32 | pure x s := return .ok s x 33 | bind x f s := x s >>= fun 34 | | .ok s a => f a s 35 | | .error s e => return .error s e 36 | map f x s := x s >>= fun 37 | | .ok s a => return .ok s (f a) 38 | | .error s e => return .error s e 39 | seq f x s := f s >>= fun 40 | | .ok s f => x () s >>= fun 41 | | .ok s x => return .ok s (f x) 42 | | .error s e => return .error s e 43 | | .error s e => return .error s e 44 | seqLeft x y s := x s >>= fun 45 | | .ok s x => y () s >>= fun 46 | | .ok s _ => return .ok s x 47 | | .error s e => return .error s e 48 | | .error s e => return .error s e 49 | seqRight x y s := x s >>= fun 50 | | .ok s _ => y () s >>= fun 51 | | .ok s y => return .ok s y 52 | | .error s e => return .error s e 53 | | .error s e => return .error s e 54 | 55 | instance (σ ε τ m) [Parser.Stream σ τ] [Parser.Error ε σ τ] [Monad m] : 56 | MonadExceptOf ε (ParserT ε σ τ m) where 57 | throw e s := return .error s e 58 | tryCatch p c s := p s >>= fun 59 | | .ok s v => return .ok s v 60 | | .error s e => (c e).run s 61 | 62 | instance (σ ε τ m) [Parser.Stream σ τ] [Parser.Error ε σ τ] [Monad m] : 63 | OrElse (ParserT ε σ τ m α) where 64 | orElse p q s := 65 | let savePos := Parser.Stream.getPosition s 66 | p s >>= fun 67 | | .ok s v => return .ok s v 68 | | .error s _ => q () (Parser.Stream.setPosition s savePos) 69 | 70 | instance (σ ε τ m) [Parser.Stream σ τ] [Parser.Error ε σ τ] [Monad m] : 71 | MonadLift m (ParserT ε σ τ m) where 72 | monadLift x s := (.ok s ·) <$> x 73 | 74 | /-- 75 | `Parser ε σ τ` monad to parse tokens of type `τ` from the stream type `σ` with error type `ε`. 76 | -/ 77 | abbrev Parser (ε σ τ) [Parser.Stream σ τ] [Parser.Error ε σ τ] := ParserT ε σ τ Id 78 | 79 | /-- Run parser `p` on input stream `s`. -/ 80 | @[inline] 81 | protected def Parser.run {ε σ τ α} [Parser.Stream σ τ] [Parser.Error ε σ τ] (p : Parser ε σ τ α) 82 | (s : σ) : Parser.Result ε σ α := p s 83 | 84 | /-- 85 | `TrivialParserT σ τ` monad transformer to parse tokens of type `τ` from the stream `σ` with trivial 86 | error handling. 87 | -/ 88 | abbrev TrivialParserT (σ τ) [Parser.Stream σ τ] (m) := ParserT Parser.Error.Trivial σ τ m 89 | 90 | /-- 91 | `TrivialParser σ τ` monad to parse tokens of type `τ` from the stream `σ` with trivial error 92 | handling. 93 | -/ 94 | abbrev TrivialParser (σ τ) [Parser.Stream σ τ] := Parser Parser.Error.Trivial σ τ 95 | 96 | /-- 97 | `BasicParserT σ τ` monad transformer to parse tokens of type `τ` from the stream `σ` with basic 98 | error handling. 99 | -/ 100 | abbrev BasicParserT (σ τ) [Parser.Stream σ τ] (m) := ParserT (Parser.Error.Basic σ τ) σ τ m 101 | 102 | /-- 103 | `BasicParser σ τ` monad to parse tokens of type `τ` from the stream `σ` with basic error handling. 104 | -/ 105 | abbrev BasicParser (σ τ) [Parser.Stream σ τ] := Parser (Parser.Error.Basic σ τ) σ τ 106 | 107 | /-- 108 | `SimpleParserT σ τ` monad transformer to parse tokens of type `τ` from the stream `σ` with simple 109 | error handling. 110 | -/ 111 | abbrev SimpleParserT (σ τ) [Parser.Stream σ τ] (m) := ParserT (Parser.Error.Simple σ τ) σ τ m 112 | 113 | /-- 114 | `SimpleParser σ τ` monad to parse tokens of type `τ` from the stream `σ` with simple error handling. 115 | -/ 116 | abbrev SimpleParser (σ τ) [Parser.Stream σ τ] := Parser (Parser.Error.Simple σ τ) σ τ 117 | 118 | namespace Parser 119 | variable {ε σ α β : Type u} [Parser.Stream σ τ] [Parser.Error ε σ τ] [Monad m] [MonadExceptOf ε m] 120 | 121 | /-! # Stream Functions -/ 122 | 123 | /-- Get parser stream. -/ 124 | @[inline] 125 | def getStream : ParserT ε σ τ m σ := 126 | fun s => return .ok s s 127 | 128 | /-- Set parser stream. -/ 129 | @[inline] 130 | def setStream (s : σ) : ParserT ε σ τ m PUnit := 131 | fun _ => return .ok s PUnit.unit 132 | 133 | /-- Get stream position from parser. -/ 134 | @[inline] 135 | def getPosition : ParserT ε σ τ m (Stream.Position σ) := 136 | Stream.getPosition <$> getStream 137 | 138 | /-- Set stream position from parser. -/ 139 | @[inline] 140 | def setPosition (pos : Stream.Position σ) : ParserT ε σ τ m PUnit := do 141 | setStream <| Stream.setPosition (← getStream) pos 142 | 143 | /-- `withBacktracking p` parses `p` but does not consume any input on error. -/ 144 | @[inline] 145 | def withBacktracking (p : ParserT ε σ τ m α) : ParserT ε σ τ m α := do 146 | let savePos ← getPosition 147 | try p 148 | catch e => 149 | setPosition savePos 150 | throw e 151 | 152 | /-- 153 | `withCapture p` parses `p` and returns the output of `p` with the corresponding stream segment. 154 | -/ 155 | def withCapture {ε σ α : Type _} [Parser.Stream σ τ] [Parser.Error ε σ τ] (p : ParserT ε σ τ m α) : 156 | ParserT ε σ τ m (α × Stream.Segment σ) := do 157 | let startPos ← getPosition 158 | let x ← p 159 | let stopPos ← getPosition 160 | return (x, startPos, stopPos) 161 | 162 | /-! # Error Functions -/ 163 | 164 | /-- Throw error on unexpected token. -/ 165 | @[inline] 166 | def throwUnexpected (input : Option τ := none) : ParserT ε σ τ m α := do 167 | throw (Error.unexpected (← getPosition) input) 168 | 169 | /-- Throw error with additional message. -/ 170 | @[inline] 171 | def throwErrorWithMessage (e : ε) (msg : String) : ParserT ε σ τ m α := do 172 | throw (Error.addMessage e (← getPosition) msg) 173 | 174 | /-- Throw error on unexpected token with error message. -/ 175 | @[inline] 176 | def throwUnexpectedWithMessage (input : Option τ := none) (msg : String) : ParserT ε σ τ m α := do 177 | throwErrorWithMessage (Error.unexpected (← getPosition) input) msg 178 | 179 | /-- Add message on parser error. -/ 180 | @[inline] 181 | def withErrorMessage (msg : String) (p : ParserT ε σ τ m α) : ParserT ε σ τ m α := do 182 | try p catch e => throwErrorWithMessage e msg 183 | 184 | /-! # Low-Level Combinators -/ 185 | 186 | /-! ### `foldl` family -/ 187 | 188 | @[specialize] 189 | private partial def efoldlPAux [Inhabited ε] [Inhabited σ] [Inhabited β] 190 | (f : β → α → ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (y : β) (s : σ) : 191 | m (Parser.Result ε σ (β × ε × Bool)) := 192 | let savePos := Stream.getPosition s 193 | p s >>= fun 194 | | .ok s x => f y x s >>= fun 195 | | .ok s y => efoldlPAux f p y s 196 | | .error s e => return .ok (Stream.setPosition s savePos) (y, e, true) 197 | | .error s e => return .ok (Stream.setPosition s savePos) (y, e, false) 198 | 199 | /-- 200 | `foldlP f init p` folds the parser function `f` from left to right using `init` as an intitial 201 | value and the parser `p` to generate inputs of type `α`. The folding ends as soon as the update 202 | parser function `(p >>= f ⬝)` fails. Then the final folding result is returned along with the pair: 203 | 204 | - `(e, true)` if the final `p` succeeds but then `f` fails reporting error `e`. 205 | - `(e, false)` if the final `p` fails reporting error `e`. 206 | 207 | In either case, the final `p` is not consumed. This parser never fails. 208 | -/ 209 | @[inline] 210 | def efoldlP (f : β → α → ParserT ε σ τ m β) (init : β) (p : ParserT ε σ τ m α) : 211 | ParserT ε σ τ m (β × ε × Bool) := 212 | fun s => 213 | have : Inhabited β := ⟨init⟩ 214 | have : Inhabited σ := ⟨s⟩ 215 | have : Inhabited ε := ⟨Error.unexpected (Stream.getPosition s) none⟩ 216 | efoldlPAux f p init s 217 | 218 | /-- 219 | `foldlM f init p` folds the monadic function `f` from left to right using `init` as an intitial 220 | value and the parser `p` to generate inputs of type `α`. The folding ends as soon as `p` fails and 221 | the error reported by `p` is returned along with the result of folding. This parser never fails. 222 | -/ 223 | @[inline] 224 | def efoldlM (f : β → α → m β) (init : β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (β × ε) := 225 | efoldlP (fun y x => monadLift <| f y x) init p >>= fun (y,e,_) => return (y,e) 226 | 227 | /-- 228 | `foldl f init p` folds the function `f` from left to right using `init` as an intitial value 229 | and the parser `p` to generate inputs of type `α`. The folding ends as soon as `p` fails and the 230 | error reported by `p` is returned along with the result of folding. This parser never fails. 231 | -/ 232 | @[inline] 233 | def efoldl (f : β → α → β) (init : β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (β × ε) := 234 | efoldlM (fun y x => pure <| f y x) init p 235 | 236 | /-- 237 | `foldlP f init p` folds the parser function `f` from left to right using `init` as an intitial 238 | value and the parser `p` to generate inputs of type `α`. The folding ends as soon as the update 239 | function `(p >>= f ·)` fails. This parser never fails. 240 | -/ 241 | @[inline] 242 | def foldlP (f : β → α → ParserT ε σ τ m β) (init : β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β := 243 | Prod.fst <$> efoldlP f init p 244 | 245 | /-- 246 | `foldlM f init p` folds the monadic function `f` from left to right using `init` as an intitial 247 | value and the parser `p` to generate inputs of type `α`. The folding ends as soon as `p` fails. 248 | This parser never fails. 249 | -/ 250 | @[inline] 251 | def foldlM (f : β → α → m β) (init : β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β := 252 | Prod.fst <$> efoldlM f init p 253 | 254 | /-- 255 | `foldl f init p` folds the function `f` from left to right using `init` as an intitial value and 256 | the parser `p` to generate inputs of type `α`. The folding ends as soon as `p` fails. 257 | This parser never fails. 258 | -/ 259 | @[inline] 260 | def foldl (f : β → α → β) (init : β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β := 261 | Prod.fst <$> efoldl f init p 262 | 263 | /-! ### `option` family -/ 264 | 265 | /-- 266 | `eoption p` tries to parse `p` (with backtracking) and returns: 267 | 268 | - `Sum.inl x` if `p` returns `x`, 269 | - `Sum.inr e` if `p`fails with error `e`. 270 | 271 | This parser never fails. 272 | -/ 273 | @[specialize] 274 | def eoption (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Sum α ε) := 275 | fun s => 276 | let savePos := Stream.getPosition s 277 | p s >>= fun 278 | | .ok s x => return .ok s (.inl x) 279 | | .error s e => return .ok (Stream.setPosition s savePos) (.inr e) 280 | 281 | /-- 282 | `optionM p` tries to parse `p` (with backtracking) and returns `x` if `p` returns `x`, returns the 283 | monadic value `default` if `p` fails. This parser never fails. 284 | -/ 285 | @[inline] 286 | def optionM (p : ParserT ε σ τ m α) (default : m α) : ParserT ε σ τ m α := do 287 | match ← eoption p with 288 | | .inl x => return x 289 | | .inr _ => default 290 | 291 | /-- 292 | `optionD p` tries to parse `p` (with backtracking) and returns `x` if `p` returns `x`, returns 293 | `default` if `p` fails. This parser never fails. 294 | -/ 295 | @[inline] 296 | def optionD (p : ParserT ε σ τ m α) (default : α) : ParserT ε σ τ m α := 297 | optionM p (pure default) 298 | 299 | /-- 300 | `option! p` tries to parse `p` (with backtracking) and returns `x` if `p` returns `x`, returns 301 | `Inhabited.default` if `p` fails. This parser never fails. 302 | -/ 303 | @[inline] 304 | def option! [Inhabited α] (p : ParserT ε σ τ m α) : ParserT ε σ τ m α := 305 | optionD p default 306 | 307 | /-- 308 | `option? p` tries to parse `p` and returns `some x` if `p` returns `x`, returns `none` if `p` 309 | fails. This parser never fails. 310 | -/ 311 | @[inline] 312 | def option? (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Option α) := 313 | option! (some <$> p) 314 | 315 | /-- 316 | `optional p` tries to parse `p` (with backtracking) ignoring output or errors. This parser never 317 | fails. 318 | -/ 319 | @[inline] 320 | def optional (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit := 321 | eoption p *> return 322 | 323 | /-! ### `first` family -/ 324 | 325 | /-- 326 | `efirst ps` tries parsers from the list `ps` in order (with backtracking) until one succeeds: 327 | 328 | - Once a parser `p` succeeds with value `x` then `some x` is returne along with the list of errors 329 | from all previous parsers. 330 | - If none succeed then `none` is returned along with the list of errors of all parsers. 331 | 332 | This parser never fails. 333 | -/ 334 | def efirst (ps : List (ParserT ε σ τ m α)) : ParserT ε σ τ m (Option α × List ε) := 335 | go ps [] 336 | where 337 | go : List (ParserT ε σ τ m α) → List ε → ParserT ε σ τ m (Option α × List ε) 338 | | [], es => return (none, es.reverse) 339 | | p :: ps, es => do 340 | match ← eoption p with 341 | | .inl x => return (some x, es.reverse) 342 | | .inr e => go ps (e :: es) 343 | 344 | /-- 345 | `first ps` tries parsers from the list `ps` in order (with backtracking) until one succeeds and 346 | returns the result of that parser. 347 | 348 | The optional parameter `combine` can be used to control the error reported when all parsers fail. 349 | The default is to only report the error from the last parser. 350 | -/ 351 | def first (ps : List (ParserT ε σ τ m α)) (combine : ε → ε → ε := fun _ => id) : 352 | ParserT ε σ τ m α := do 353 | go ps (Error.unexpected (← getPosition) none) 354 | where 355 | go : List (ParserT ε σ τ m α) → ε → ParserT ε σ τ m α 356 | | [], e, s => return .error s e 357 | | p :: ps, e, s => 358 | let savePos := Stream.getPosition s 359 | p s >>= fun 360 | | .ok s v => return .ok s v 361 | | .error s f => go ps (combine e f) (Stream.setPosition s savePos) 362 | -------------------------------------------------------------------------------- /Parser/Basic.lean: -------------------------------------------------------------------------------- 1 | /- 2 | Copyright © 2022-2024 François G. Dorais, Kyrill Serdyuk, Emma Shroyer. All rights reserved. 3 | Released under Apache 2.0 license as described in the file LICENSE. 4 | -/ 5 | 6 | import Parser.Prelude 7 | import Parser.Error 8 | import Parser.Parser 9 | import Parser.Stream 10 | 11 | namespace Parser 12 | variable [Parser.Stream σ τ] [Parser.Error ε σ τ] [Monad m] 13 | 14 | /-! # Token Functions -/ 15 | 16 | /-- 17 | `tokenCore next?` reads a token from the stream using `next?`. 18 | 19 | This is a low-level parser to customize how the parser stream is used. 20 | -/ 21 | @[inline] 22 | def tokenCore (next? : σ → Option (τ × σ)) : ParserT ε σ τ m (ULift τ) := do 23 | match next? (← getStream) with 24 | | some (tok, stream) => 25 | let _ ← setStream stream 26 | return ⟨tok⟩ 27 | | none => throwUnexpected 28 | 29 | /-- 30 | `tokenMap test` accepts token `t` with result `x` if `test t = some x`, otherise fails reporting 31 | the unexpected token. 32 | -/ 33 | @[specialize] 34 | def tokenMap (test : τ → Option α) : ParserT ε σ τ m α := do 35 | let ⟨tok⟩ ← tokenCore Stream.next? 36 | match test tok with 37 | | some x => return x 38 | | none => throwUnexpected tok 39 | 40 | /-- 41 | `anyToken` consumes and returns one token from the stream. Only fails on end of stream. 42 | -/ 43 | @[inline] 44 | def anyToken : ParserT ε σ τ m τ := 45 | tokenMap some 46 | 47 | /-- 48 | `tokenFilter test` accepts and returns token `t` if `test t = true`, otherwise fails reporting 49 | unexpected token. 50 | -/ 51 | @[inline] 52 | def tokenFilter (test : τ → Bool) : ParserT ε σ τ m τ := 53 | tokenMap fun c => if test c then some c else none 54 | 55 | /-- 56 | `token tk` accepts and returns `tk`, otherwise fails otherwise fails reporting unexpected token. 57 | -/ 58 | @[inline] 59 | def token [BEq τ] (tk : τ) : ParserT ε σ τ m τ := 60 | tokenFilter (. == tk) 61 | 62 | /-- 63 | `tokenArray tks` accepts and returns tokens from `tks` in order, otherwise fails reporting the 64 | first unexpected token. 65 | -/ 66 | def tokenArray [BEq τ] (tks : Array τ) : ParserT ε σ τ m (Array τ) := 67 | withBacktracking do 68 | let mut acc : Array τ := #[] 69 | for tk in tks do 70 | acc := acc.push (← token tk) 71 | return acc 72 | 73 | /-- 74 | `tokenArray tks` accepts and returns tokens from `tks` in order, otherwise fails reporting the 75 | first unexpected token. 76 | -/ 77 | def tokenList [BEq τ] (tks : List τ) : ParserT ε σ τ m (List τ) := 78 | withBacktracking do 79 | let mut acc : Array τ := #[] 80 | for tk in tks do 81 | acc := acc.push (← token tk) 82 | return acc.toList 83 | 84 | /-! # Basic Combinators -/ 85 | 86 | /-- 87 | `lookAhead p` tries to parses `p` without consuming any input. If `p` fails then the stream is 88 | backtracked with the same error. 89 | -/ 90 | def lookAhead (p : ParserT ε σ τ m α) : ParserT ε σ τ m α := do 91 | let savePos ← getPosition 92 | try 93 | let x ← p 94 | setPosition savePos 95 | return x 96 | catch e => 97 | setPosition savePos 98 | throw e 99 | 100 | /-- 101 | `peek` returns the next token, without consuming any input. Only fails on end of stream. 102 | -/ 103 | abbrev peek : ParserT ε σ τ m τ := lookAhead anyToken 104 | 105 | /-- 106 | `notFollowedBy p` succeeds only if `p` fails. Consumes no input regardless of outcome. 107 | -/ 108 | @[inline] 109 | def notFollowedBy (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit := do 110 | try 111 | let _ ← lookAhead p 112 | catch _ => 113 | return 114 | throwUnexpected 115 | 116 | /-- 117 | `endOfInput` succeeds only on end of stream. Consumes no input. 118 | -/ 119 | abbrev endOfInput : ParserT ε σ τ m PUnit := notFollowedBy anyToken 120 | 121 | /-- 122 | `test p` returns `true` if `p` succeeds and `false` otherwise. This parser never fails. 123 | -/ 124 | @[inline] 125 | def test (p : ParserT ε σ τ m α) : ParserT ε σ τ m Bool := 126 | optionD (p *> return true) false 127 | 128 | /-! ### `foldr` -/ 129 | 130 | /-- `foldr f p q` -/ 131 | @[inline] 132 | partial def foldr (f : α → β → β) (p : ParserT ε σ τ m α) (q : ParserT ε σ τ m β) : 133 | ParserT ε σ τ m β := 134 | try 135 | let x ← withBacktracking p 136 | let y ← foldr f p q 137 | return f x y 138 | catch _ => q 139 | 140 | /-! ### `take` family -/ 141 | 142 | /-- 143 | `take n p` parses exactly `n` occurrences of `p`, and returns an array of the returned values 144 | of `p`. 145 | -/ 146 | @[inline] 147 | def take (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α) := 148 | withBacktracking do rest n #[] 149 | where 150 | rest : Nat → Array α → ParserT ε σ τ m (Array α) 151 | | 0, xs => return xs 152 | | n+1, xs => do rest n <| xs.push (← p) 153 | 154 | /-- 155 | `takeUpTo n p` parses up to `n` occurrences of `p`, and returns an array of the returned values 156 | of `p`. This parser never fails. 157 | -/ 158 | @[inline] 159 | def takeUpTo (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α) := 160 | rest n #[] 161 | where 162 | rest : Nat → Array α → ParserT ε σ τ m (Array α) 163 | | 0, xs => return xs 164 | | n+1, xs => do 165 | match ← option? p with 166 | | some x => rest n <| xs.push x 167 | | none => return xs 168 | 169 | /-- 170 | `takeMany p` parses zero or more occurrences of `p` until it fails, and returns the array of 171 | returned values of `p`. This parser never fails. 172 | -/ 173 | @[inline] 174 | def takeMany (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α) := 175 | foldl Array.push #[] p 176 | 177 | /-- 178 | `takeMany1 p` parses one or more occurrences of `p` until it fails, and returns the array of 179 | returned values of `p`. Consumes no input on error. -/ 180 | @[inline] 181 | def takeMany1 (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α) := 182 | withBacktracking do foldl Array.push #[(← p)] p 183 | 184 | /-- 185 | `takeManyN n p` parses `n` or more occurrences of `p` until it fails, and returns the array of 186 | returned values of `p`. Consumes no input on error. 187 | -/ 188 | @[inline] 189 | def takeManyN (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α) := 190 | withBacktracking do foldl Array.push (← take n p) p 191 | 192 | /-- 193 | `takeUntil stop p` parses zero or more occurrences of `p` until `stop` succeeds, and returns the 194 | array of returned values of `p` and the output of `stop`. If `p` fails before `stop` is encountered, 195 | the error from `p` is reported and no input is consumed. 196 | -/ 197 | partial def takeUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : 198 | ParserT ε σ τ m (Array α × β) := 199 | have := Inhabited.mk do return ((#[] : Array α), (← stop)) 200 | withBacktracking do rest #[] 201 | where 202 | rest [Inhabited (ParserT ε σ τ m (Array α × β))] (acc : Array α) := do 203 | match ← option? stop with 204 | | some y => return (acc, y) 205 | | none => rest <| acc.push (← p) 206 | 207 | /-! ### `drop` family -/ 208 | 209 | /-- 210 | `drop n p` parses exactly `n` occurrences of `p` (without backtracking), ignoring all outputs. 211 | -/ 212 | @[inline] 213 | def drop (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit := 214 | match n with 215 | | 0 => return 216 | | n+1 => p *> drop n p 217 | 218 | /-- 219 | `dropUpTo n p` parses up to `n` occurrences of `p` (with backtracking) ignoring all outputs. This 220 | parser never fails. 221 | -/ 222 | @[inline] 223 | def dropUpTo (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit := 224 | match n with 225 | | 0 => return 226 | | n+1 => do 227 | match ← option? p with 228 | | some _ => drop n p 229 | | none => return 230 | 231 | /-- 232 | `dropMany p` parses zero or more occurrences of `p` (with backtracking) until it fails, ignoring 233 | all outputs. 234 | -/ 235 | @[inline] 236 | def dropMany (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit := 237 | foldl (Function.const α) .unit p 238 | 239 | /-- 240 | `dropMany1 p` parses one or more occurrences of `p` (with backtracking) until it fails, ignoring 241 | all outputs. 242 | -/ 243 | @[inline] 244 | def dropMany1 (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit := 245 | withBacktracking p *> foldl (Function.const α) () p 246 | 247 | /-- 248 | `dropManyN n p` parses `n` or more occurrences of `p` until it fails, ignoring all outputs. 249 | -/ 250 | @[inline] 251 | def dropManyN (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m PUnit := 252 | withBacktracking do drop n p *> foldl (Function.const α) () p 253 | 254 | /-- 255 | `dropUntil stop p` runs `p` until `stop` succeeds, returns the output of `stop` ignoring all 256 | outputs from `p`. If `p` fails before encountering `stop` then the error from `p` is reported 257 | and no input is consumed. 258 | -/ 259 | partial def dropUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m β := 260 | withBacktracking loop 261 | where 262 | loop := do 263 | match ← option? stop with 264 | | some s => return s 265 | | none => p *> loop 266 | 267 | /-! `count` family -/ 268 | 269 | /-- 270 | `count p` parses occurrences of `p` (with backtracking) until it fails and returns the count of 271 | successes. 272 | -/ 273 | @[inline] 274 | partial def count (p : ParserT ε σ τ m α) : ParserT ε σ τ m Nat := 275 | foldl (fun n _ => n+1) 0 p 276 | 277 | /-- 278 | `countUpTo n p` parses up to `n` occurrences of `p` until it fails, and returns the count of 279 | successes. This parser never fails. 280 | -/ 281 | @[inline] 282 | def countUpTo (n : Nat) (p : ParserT ε σ τ m α) : ParserT ε σ τ m Nat := 283 | loop n 0 284 | where 285 | loop : Nat → Nat → ParserT ε σ τ m Nat 286 | | 0, ct => return ct 287 | | n+1, ct => do 288 | match ← option? p with 289 | | some _ => loop n (ct+1) 290 | | none => return ct 291 | 292 | /-- 293 | `countUntil stop p` counts zero or more occurrences of `p` until `stop` succeeds, and returns 294 | the count of successes and the output of `stop`. If `p` fails before encountering `stop` then the 295 | error from `p` is reported and no input is consumed. 296 | -/ 297 | partial def countUntil (stop : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : 298 | ParserT ε σ τ m (Nat × β) := do 299 | let _ := Inhabited.mk do return (0, ← stop) 300 | withBacktracking do loop 0 301 | where 302 | loop [Inhabited (ParserT ε σ τ m (Nat × β))] (ct : Nat) := do 303 | match ← option? stop with 304 | | some s => return (ct, s) 305 | | none => p *> loop (ct+1) 306 | 307 | /-! ### `endBy` family -/ 308 | 309 | @[specialize] 310 | private def endByCore (sep : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (init : Array α) (strict : Bool := false) : 311 | ParserT ε σ τ m (Array α) := do 312 | match ← efoldlP (fun xs x => sep *> pure (xs.push x)) init p with 313 | | (xs, e, true) => if strict then throw e else return xs 314 | | (xs, _, _) => return xs 315 | 316 | /-- 317 | `endBy p sep` parses zero or more occurrences of `p`, separated and ended by `sep`, returns 318 | the array of values returned by `p`. 319 | 320 | The optional `strict` parameter controls error reporting: 321 | 322 | * If `strict = false` then this parser never fails and returns the longest possible array. 323 | * If `strict = true` then this parser returns the longest possible array but fails if there is a 324 | final occurrence of `p` without a trailing `sep`. Then the error of `sep` is reported. 325 | 326 | No input is consumed on error. 327 | -/ 328 | @[inline] 329 | def endBy (sep : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (strict : Bool := false) : 330 | ParserT ε σ τ m (Array α) := withBacktracking do endByCore sep p #[] strict 331 | 332 | /-- 333 | `endBy1 p sep` parses one or more occurrences of `p`, separated and ended by `sep`, returns 334 | the array of values returned by `p`. 335 | 336 | The optional `strict` parameter controls error reporting after parsing the initial `p` and `sep`: 337 | 338 | * If `strict = false` then this parser never fails and returns the longest possible array. 339 | * If `strict = true` then this parser returns the longest possible array but fails if there is a 340 | final occurrence of `p` without a trailing `sep`. Then the error of `sep` is reported. 341 | 342 | No input is consumed on error. 343 | -/ 344 | @[inline] 345 | def endBy1 (sep : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (strict : Bool := False) : 346 | ParserT ε σ τ m (Array α) := withBacktracking do endByCore sep p #[← p <* sep] strict 347 | 348 | /-! ### `sepBy` family -/ 349 | 350 | @[specialize] 351 | private def sepByCore (sep : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (init : Array α) (strict : Bool := false) : 352 | ParserT ε σ τ m (Array α) := do 353 | match ← efoldlP (fun xs _ => p >>= fun x => pure (xs.push x)) init sep with 354 | | (xs, e, true) => if strict then throw e else return xs 355 | | (xs, _, _) => return xs 356 | 357 | /-- 358 | `sepBy p sep` parses zero or more occurrences of `p`, separated by `sep`, returns the array of 359 | values returned by `p`. 360 | 361 | The optional `strict` parameter controls error reporting: 362 | 363 | * If `strict = false` then this parser never fails and returns the longest possible array. 364 | * If `strict = true` then this parser returns the longest possible array but fails if there is a 365 | final occurrence of `sep` without a trailing `p`. Then the error of `p` is reported. 366 | 367 | No input is consumed on error. 368 | -/ 369 | @[inline] 370 | def sepBy (sep : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (strict : Bool := false) : 371 | ParserT ε σ τ m (Array α) := withBacktracking do 372 | match ← option? p with 373 | | some x => sepByCore sep p #[x] strict 374 | | none => return #[] 375 | 376 | /-- 377 | `sepBy1 p sep` parses one or more occurrences of `p`, separated by `sep`, returns the array of 378 | values returned by `p`. 379 | 380 | The optional `strict` parameter controls error reporting after parsing the initial `p`: 381 | 382 | * If `strict = false` then this parser never fails and returns the longest possible array. 383 | * If `strict = true` then this parser returns the longest possible array but fails if there is a 384 | final occurrence of `sep` without a trailing `p`. Then the error of `p` is reported. 385 | 386 | No input is consumed on error. 387 | -/ 388 | @[inline] 389 | def sepBy1 (sep : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) (strict : Bool := false) : 390 | ParserT ε σ τ m (Array α) := withBacktracking do sepByCore sep p #[← p] strict 391 | 392 | /-- 393 | `sepNoEndBy p sep` parses zero or more occurrences of `p`, separated `sep` but without a trailing 394 | `sep`, returns the array of values returned by `p`. 395 | -/ 396 | @[inline] 397 | def sepNoEndBy (sep : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α) := 398 | sepBy sep p true 399 | 400 | /-- 401 | `sepNoEndBy1 p sep` parses one or more occurrences of `p`, separated `sep` but without a trailing 402 | `sep`, returns the array of values returned by `p`. 403 | -/ 404 | @[inline] 405 | def sepNoEndBy1 (sep : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α) := 406 | sepBy1 sep p true 407 | 408 | /-- 409 | `sepEndBy p sep` parses zero or more occurrences of `p`, separated by `sep` with an optional 410 | trailing `sep`, returns the array of values returned by `p`. This parser never fails. -/ 411 | @[inline] 412 | def sepEndBy (sep : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α) := 413 | sepBy sep p <* optional sep 414 | 415 | /-- 416 | `sepEndBy1 p sep` parses one or more occurrences of `p`, separated by `sep` with an optional 417 | trailing `sep`, returns the array of values returned by `p`. This parser never fails. 418 | -/ 419 | @[inline] 420 | def sepEndBy1 (sep : ParserT ε σ τ m β) (p : ParserT ε σ τ m α) : ParserT ε σ τ m (Array α) := 421 | sepBy1 sep p <* optional sep 422 | --------------------------------------------------------------------------------