├── .github └── workflows │ └── rust.yml ├── .gitignore ├── Cargo.lock ├── Cargo.toml ├── LICENSE ├── README.md ├── bootstrap.sh ├── peg-macros ├── Cargo.toml ├── LICENSE ├── analysis.rs ├── ast.rs ├── bin.rs ├── grammar.rs ├── grammar.rustpeg ├── lib.rs ├── tokens.rs └── translate.rs ├── peg-runtime ├── Cargo.toml ├── LICENSE ├── error.rs ├── lib.rs ├── slice.rs └── str.rs ├── src └── lib.rs └── tests ├── compile-fail ├── cache_with_args.rs ├── cache_with_args.stderr ├── duplicate_rule.rs ├── duplicate_rule.stderr ├── incomplete_grammar.rs ├── incomplete_grammar.stderr ├── left_recursion_without_cache.rs ├── left_recursion_without_cache.stderr ├── nullable_loop.rs ├── nullable_loop.stderr ├── rule_args_errors.rs ├── rule_args_errors.stderr ├── rust_action_syntax_error.rs ├── rust_action_syntax_error.stderr ├── rust_action_type_error.rs ├── rust_action_type_error.stderr ├── syntax_error.rs ├── syntax_error.stderr ├── use_undefined_result.rs ├── use_undefined_result.stderr ├── use_undefined_rule.rs └── use_undefined_rule.stderr ├── run-pass ├── arithmetic.rs ├── arithmetic_ast.rs ├── arithmetic_infix.rs ├── arithmetic_infix_ast.rs ├── arithmetic_infix_ast_span.rs ├── arithmetic_with_left_recursion.rs ├── assembly_ast_dyn_type_param_bounds.rs ├── borrow_from_input.rs ├── bytes.rs ├── conditional_block.rs ├── crate_import.rs ├── custom_expr.rs ├── errors.rs ├── generic_fn_traits.rs ├── grammar_with_args_and_cache.rs ├── keyval.rs ├── lifetimes.rs ├── memoization.rs ├── no_eof.rs ├── optional.rs ├── pattern.rs ├── pos_neg_assert.rs ├── position.rs ├── raw_ident.rs ├── renamed_imports.rs ├── repeats.rs ├── return_type.rs ├── rule_args.rs ├── rule_generic.rs ├── rule_where_clause.rs ├── rust_use_tree.rs ├── test-hygiene.rs ├── tokens.rs ├── tokens_struct.rs └── utf8.rs └── trybuild.rs /.github/workflows/rust.yml: -------------------------------------------------------------------------------- 1 | name: Rust 2 | 3 | on: 4 | push: 5 | pull_request: 6 | 7 | env: 8 | CARGO_TERM_COLOR: always 9 | 10 | jobs: 11 | build: 12 | name: Rust ${{matrix.rust}} 13 | strategy: 14 | fail-fast: false 15 | matrix: 16 | rust: [stable, 1.68.0] 17 | 18 | runs-on: ubuntu-latest 19 | 20 | steps: 21 | - uses: actions/checkout@v3 22 | - uses: dtolnay/rust-toolchain@master 23 | with: 24 | toolchain: ${{matrix.rust}} 25 | components: rustfmt 26 | - name: Check bootstrap 27 | run: ./bootstrap.sh && git diff --exit-code 28 | - name: Run tests 29 | run: cargo test --all 30 | - name: Run tests with trace feature 31 | run: cargo test --all --features trace 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | target 3 | peg-macros/grammar_new.rs 4 | peg-macros/grammar_old.rs 5 | -------------------------------------------------------------------------------- /Cargo.lock: -------------------------------------------------------------------------------- 1 | # This file is automatically @generated by Cargo. 2 | # It is not intended for manual editing. 3 | version = 3 4 | 5 | [[package]] 6 | name = "equivalent" 7 | version = "1.0.2" 8 | source = "registry+https://github.com/rust-lang/crates.io-index" 9 | checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" 10 | 11 | [[package]] 12 | name = "glob" 13 | version = "0.3.2" 14 | source = "registry+https://github.com/rust-lang/crates.io-index" 15 | checksum = "a8d1add55171497b4705a648c6b583acafb01d58050a51727785f0b2c8e0a2b2" 16 | 17 | [[package]] 18 | name = "hashbrown" 19 | version = "0.15.2" 20 | source = "registry+https://github.com/rust-lang/crates.io-index" 21 | checksum = "bf151400ff0baff5465007dd2f3e717f3fe502074ca563069ce3a6629d07b289" 22 | 23 | [[package]] 24 | name = "indexmap" 25 | version = "2.7.1" 26 | source = "registry+https://github.com/rust-lang/crates.io-index" 27 | checksum = "8c9c992b02b5b4c94ea26e32fe5bccb7aa7d9f390ab5c1221ff895bc7ea8b652" 28 | dependencies = [ 29 | "equivalent", 30 | "hashbrown", 31 | ] 32 | 33 | [[package]] 34 | name = "itoa" 35 | version = "1.0.14" 36 | source = "registry+https://github.com/rust-lang/crates.io-index" 37 | checksum = "d75a2a4b1b190afb6f5425f10f6a8f959d2ea0b9c2b1d79553551850539e4674" 38 | 39 | [[package]] 40 | name = "memchr" 41 | version = "2.7.4" 42 | source = "registry+https://github.com/rust-lang/crates.io-index" 43 | checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3" 44 | 45 | [[package]] 46 | name = "once_cell" 47 | version = "1.20.3" 48 | source = "registry+https://github.com/rust-lang/crates.io-index" 49 | checksum = "945462a4b81e43c4e3ba96bd7b49d834c6f61198356aa858733bc4acf3cbe62e" 50 | 51 | [[package]] 52 | name = "peg" 53 | version = "0.8.5" 54 | dependencies = [ 55 | "peg-macros", 56 | "peg-runtime", 57 | "trybuild", 58 | "version_check", 59 | ] 60 | 61 | [[package]] 62 | name = "peg-macros" 63 | version = "0.8.5" 64 | dependencies = [ 65 | "peg-runtime", 66 | "proc-macro2", 67 | "quote", 68 | ] 69 | 70 | [[package]] 71 | name = "peg-runtime" 72 | version = "0.8.5" 73 | 74 | [[package]] 75 | name = "proc-macro2" 76 | version = "1.0.93" 77 | source = "registry+https://github.com/rust-lang/crates.io-index" 78 | checksum = "60946a68e5f9d28b0dc1c21bb8a97ee7d018a8b322fa57838ba31cc878e22d99" 79 | dependencies = [ 80 | "unicode-ident", 81 | ] 82 | 83 | [[package]] 84 | name = "quote" 85 | version = "1.0.38" 86 | source = "registry+https://github.com/rust-lang/crates.io-index" 87 | checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc" 88 | dependencies = [ 89 | "proc-macro2", 90 | ] 91 | 92 | [[package]] 93 | name = "ryu" 94 | version = "1.0.19" 95 | source = "registry+https://github.com/rust-lang/crates.io-index" 96 | checksum = "6ea1a2d0a644769cc99faa24c3ad26b379b786fe7c36fd3c546254801650e6dd" 97 | 98 | [[package]] 99 | name = "serde" 100 | version = "1.0.218" 101 | source = "registry+https://github.com/rust-lang/crates.io-index" 102 | checksum = "e8dfc9d19bdbf6d17e22319da49161d5d0108e4188e8b680aef6299eed22df60" 103 | dependencies = [ 104 | "serde_derive", 105 | ] 106 | 107 | [[package]] 108 | name = "serde_derive" 109 | version = "1.0.218" 110 | source = "registry+https://github.com/rust-lang/crates.io-index" 111 | checksum = "f09503e191f4e797cb8aac08e9a4a4695c5edf6a2e70e376d961ddd5c969f82b" 112 | dependencies = [ 113 | "proc-macro2", 114 | "quote", 115 | "syn", 116 | ] 117 | 118 | [[package]] 119 | name = "serde_json" 120 | version = "1.0.139" 121 | source = "registry+https://github.com/rust-lang/crates.io-index" 122 | checksum = "44f86c3acccc9c65b153fe1b85a3be07fe5515274ec9f0653b4a0875731c72a6" 123 | dependencies = [ 124 | "itoa", 125 | "memchr", 126 | "ryu", 127 | "serde", 128 | ] 129 | 130 | [[package]] 131 | name = "serde_spanned" 132 | version = "0.6.8" 133 | source = "registry+https://github.com/rust-lang/crates.io-index" 134 | checksum = "87607cb1398ed59d48732e575a4c28a7a8ebf2454b964fe3f224f2afc07909e1" 135 | dependencies = [ 136 | "serde", 137 | ] 138 | 139 | [[package]] 140 | name = "syn" 141 | version = "2.0.98" 142 | source = "registry+https://github.com/rust-lang/crates.io-index" 143 | checksum = "36147f1a48ae0ec2b5b3bc5b537d267457555a10dc06f3dbc8cb11ba3006d3b1" 144 | dependencies = [ 145 | "proc-macro2", 146 | "quote", 147 | "unicode-ident", 148 | ] 149 | 150 | [[package]] 151 | name = "termcolor" 152 | version = "1.4.1" 153 | source = "registry+https://github.com/rust-lang/crates.io-index" 154 | checksum = "06794f8f6c5c898b3275aebefa6b8a1cb24cd2c6c79397ab15774837a0bc5755" 155 | dependencies = [ 156 | "winapi-util", 157 | ] 158 | 159 | [[package]] 160 | name = "toml" 161 | version = "0.8.20" 162 | source = "registry+https://github.com/rust-lang/crates.io-index" 163 | checksum = "cd87a5cdd6ffab733b2f74bc4fd7ee5fff6634124999ac278c35fc78c6120148" 164 | dependencies = [ 165 | "serde", 166 | "serde_spanned", 167 | "toml_datetime", 168 | "toml_edit", 169 | ] 170 | 171 | [[package]] 172 | name = "toml_datetime" 173 | version = "0.6.8" 174 | source = "registry+https://github.com/rust-lang/crates.io-index" 175 | checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41" 176 | dependencies = [ 177 | "serde", 178 | ] 179 | 180 | [[package]] 181 | name = "toml_edit" 182 | version = "0.22.24" 183 | source = "registry+https://github.com/rust-lang/crates.io-index" 184 | checksum = "17b4795ff5edd201c7cd6dca065ae59972ce77d1b80fa0a84d94950ece7d1474" 185 | dependencies = [ 186 | "indexmap", 187 | "serde", 188 | "serde_spanned", 189 | "toml_datetime", 190 | "winnow", 191 | ] 192 | 193 | [[package]] 194 | name = "trybuild" 195 | version = "1.0.90" 196 | source = "registry+https://github.com/rust-lang/crates.io-index" 197 | checksum = "2aa6f84ec205ebf87fb7a0abdbcd1467fa5af0e86878eb6d888b78ecbb10b6d5" 198 | dependencies = [ 199 | "glob", 200 | "once_cell", 201 | "serde", 202 | "serde_derive", 203 | "serde_json", 204 | "termcolor", 205 | "toml", 206 | ] 207 | 208 | [[package]] 209 | name = "unicode-ident" 210 | version = "1.0.17" 211 | source = "registry+https://github.com/rust-lang/crates.io-index" 212 | checksum = "00e2473a93778eb0bad35909dff6a10d28e63f792f16ed15e404fca9d5eeedbe" 213 | 214 | [[package]] 215 | name = "version_check" 216 | version = "0.9.5" 217 | source = "registry+https://github.com/rust-lang/crates.io-index" 218 | checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" 219 | 220 | [[package]] 221 | name = "winapi-util" 222 | version = "0.1.9" 223 | source = "registry+https://github.com/rust-lang/crates.io-index" 224 | checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" 225 | dependencies = [ 226 | "windows-sys", 227 | ] 228 | 229 | [[package]] 230 | name = "windows-sys" 231 | version = "0.59.0" 232 | source = "registry+https://github.com/rust-lang/crates.io-index" 233 | checksum = "1e38bc4d79ed67fd075bcc251a1c39b32a1776bbe92e5bef1f0bf1f8c531853b" 234 | dependencies = [ 235 | "windows-targets", 236 | ] 237 | 238 | [[package]] 239 | name = "windows-targets" 240 | version = "0.52.6" 241 | source = "registry+https://github.com/rust-lang/crates.io-index" 242 | checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973" 243 | dependencies = [ 244 | "windows_aarch64_gnullvm", 245 | "windows_aarch64_msvc", 246 | "windows_i686_gnu", 247 | "windows_i686_gnullvm", 248 | "windows_i686_msvc", 249 | "windows_x86_64_gnu", 250 | "windows_x86_64_gnullvm", 251 | "windows_x86_64_msvc", 252 | ] 253 | 254 | [[package]] 255 | name = "windows_aarch64_gnullvm" 256 | version = "0.52.6" 257 | source = "registry+https://github.com/rust-lang/crates.io-index" 258 | checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3" 259 | 260 | [[package]] 261 | name = "windows_aarch64_msvc" 262 | version = "0.52.6" 263 | source = "registry+https://github.com/rust-lang/crates.io-index" 264 | checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469" 265 | 266 | [[package]] 267 | name = "windows_i686_gnu" 268 | version = "0.52.6" 269 | source = "registry+https://github.com/rust-lang/crates.io-index" 270 | checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b" 271 | 272 | [[package]] 273 | name = "windows_i686_gnullvm" 274 | version = "0.52.6" 275 | source = "registry+https://github.com/rust-lang/crates.io-index" 276 | checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66" 277 | 278 | [[package]] 279 | name = "windows_i686_msvc" 280 | version = "0.52.6" 281 | source = "registry+https://github.com/rust-lang/crates.io-index" 282 | checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66" 283 | 284 | [[package]] 285 | name = "windows_x86_64_gnu" 286 | version = "0.52.6" 287 | source = "registry+https://github.com/rust-lang/crates.io-index" 288 | checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78" 289 | 290 | [[package]] 291 | name = "windows_x86_64_gnullvm" 292 | version = "0.52.6" 293 | source = "registry+https://github.com/rust-lang/crates.io-index" 294 | checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d" 295 | 296 | [[package]] 297 | name = "windows_x86_64_msvc" 298 | version = "0.52.6" 299 | source = "registry+https://github.com/rust-lang/crates.io-index" 300 | checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" 301 | 302 | [[package]] 303 | name = "winnow" 304 | version = "0.7.3" 305 | source = "registry+https://github.com/rust-lang/crates.io-index" 306 | checksum = "0e7f4ea97f6f78012141bcdb6a216b2609f0979ada50b20ca5b52dde2eac2bb1" 307 | dependencies = [ 308 | "memchr", 309 | ] 310 | -------------------------------------------------------------------------------- /Cargo.toml: -------------------------------------------------------------------------------- 1 | [workspace] 2 | members = ["./peg-macros", "./peg-runtime"] 3 | 4 | [package] 5 | name = "peg" 6 | version = "0.8.5" 7 | authors = [ "Kevin Mehall " ] 8 | license = "MIT" 9 | repository = "https://github.com/kevinmehall/rust-peg" 10 | description = "A simple Parsing Expression Grammar (PEG) parser generator." 11 | keywords = ["peg", "parser", "parsing", "grammar"] 12 | categories = ["parsing"] 13 | readme = "README.md" 14 | edition = "2021" 15 | rust-version = "1.68.0" # if changed, also update .github/workflows/rust.yml 16 | 17 | [dependencies] 18 | peg-macros = { path = "./peg-macros", version = "= 0.8.5" } 19 | peg-runtime = { path = "./peg-runtime", version = "= 0.8.5" } 20 | 21 | [dev-dependencies] 22 | trybuild = "1.0.80" 23 | version_check = "0.9" 24 | 25 | [[test]] 26 | name = "trybuild" 27 | path = "tests/trybuild.rs" 28 | harness = false 29 | 30 | [features] 31 | default = ["std"] 32 | trace = ["peg-macros/trace"] 33 | std = ["peg-runtime/std"] 34 | unstable = ["peg-runtime/unstable"] -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (C) 2013 Kevin Mehall 2 | 3 | Permission is hereby granted, free of charge, to any 4 | person obtaining a copy of this software and associated 5 | documentation files (the "Software"), to deal in the 6 | Software without restriction, including without 7 | limitation the rights to use, copy, modify, merge, 8 | publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software 10 | is furnished to do so, subject to the following 11 | conditions: 12 | 13 | The above copyright notice and this permission notice 14 | shall be included in all copies or substantial portions 15 | of the Software. 16 | 17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF 18 | ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED 19 | TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A 20 | PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT 21 | SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 22 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR 24 | IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 25 | DEALINGS IN THE SOFTWARE. 26 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parsing Expression Grammars in Rust 2 | 3 | [Documentation](https://docs.rs/peg) | [Release Notes](https://github.com/kevinmehall/rust-peg/releases) 4 | 5 | `rust-peg` is a simple yet flexible parser generator that makes it easy to write robust parsers. Based on the [Parsing Expression Grammar](https://en.wikipedia.org/wiki/Parsing_expression_grammar) formalism, it provides a Rust macro that builds a recursive descent parser from a concise definition of the grammar. 6 | 7 | ## Features 8 | 9 | * Parse input from `&str`, `&[u8]`, `&[T]` or custom types implementing traits 10 | * Customizable reporting of parse errors 11 | * Rules can accept arguments to create reusable rule templates 12 | * Precedence climbing for prefix/postfix/infix expressions 13 | * Helpful `rustc` error messages for errors in the grammar definition or the Rust 14 | code embedded within it 15 | * Rule-level tracing to debug grammars 16 | 17 | ## Example 18 | 19 | Parse a comma-separated list of numbers surrounded by brackets into a `Vec`: 20 | 21 | ```rust 22 | peg::parser!{ 23 | grammar list_parser() for str { 24 | rule number() -> u32 25 | = n:$(['0'..='9']+) {? n.parse().or(Err("u32")) } 26 | 27 | pub rule list() -> Vec 28 | = "[" l:(number() ** ",") "]" { l } 29 | } 30 | } 31 | 32 | pub fn main() { 33 | assert_eq!(list_parser::list("[1,1,2,3,5,8]"), Ok(vec![1, 1, 2, 3, 5, 8])); 34 | } 35 | ``` 36 | 37 | [See the tests for more examples](./tests/run-pass/) 38 | [Grammar rule syntax reference in rustdoc](https://docs.rs/peg) 39 | 40 | ## Comparison with similar parser generators 41 | 42 | | crate | parser type | action code | integration | input type | precedence climbing | parameterized rules | streaming input | 43 | |----------- |------------- |------------- |-------------------- |------------------------ |--------------------- |-------------------- |----------------- | 44 | | peg | PEG | in grammar | proc macro (block) | `&str`, `&[T]`, custom | Yes | Yes | No | 45 | | [pest] | PEG | external | proc macro (file) | `&str` | Yes | No | No | 46 | | [nom] | combinators | in source | library | `&[u8]`, custom | No | Yes | Yes | 47 | | [lalrpop] | LR(1) | in grammar | build script | `&str` | No | Yes | No | 48 | 49 | [pest]: https://github.com/pest-parser/pest 50 | [nom]: https://github.com/geal/nom 51 | [lalrpop]: https://github.com/lalrpop/lalrpop 52 | 53 | ## See also 54 | 55 | * [pegviz] is a UI for visualizing rust-peg's trace output to debug parsers. 56 | * There exist several crates to format diagnostic messages on source code snippets in the terminal, including [chic], [annotate-snippets], [codespan-reporting], and [codemap-diagnostic]. 57 | 58 | [pegviz]: https://github.com/fasterthanlime/pegviz 59 | [chic]: https://crates.io/crates/chic 60 | [annotate-snippets]: https://crates.io/crates/annotate-snippets 61 | [codespan-reporting]: https://crates.io/crates/codespan-reporting 62 | [codemap-diagnostic]: https://crates.io/crates/codemap-diagnostic 63 | 64 | ## Development 65 | 66 | The `rust-peg` grammar is written in `rust-peg`: `peg-macros/grammar.rustpeg`. To avoid the circular dependency, a precompiled grammar is checked in as `peg-macros/grammar.rs`. To regenerate this, run the `./bootstrap.sh` script. 67 | 68 | There is a large test suite which uses [`trybuild`](https://crates.io/crates/trybuild) to test both functionality (`tests/run-pass`) and error messages for incorrect grammars (`tests/compile-fail`). Because `rustc` error messages change, the `compile-fail` tests are only run on the minimum supported Rust version to avoid spurious failures. 69 | 70 | Use `cargo test` to run the entire suite, 71 | or `cargo test -- trybuild trybuild=lifetimes.rs` to test just the indicated file. 72 | Add `--features trace` to trace these tests. 73 | -------------------------------------------------------------------------------- /bootstrap.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | cargo run -p peg-macros -- peg-macros/grammar.rustpeg > peg-macros/grammar_new.rs 5 | 6 | mv peg-macros/grammar.rs peg-macros/grammar_old.rs 7 | cp peg-macros/grammar_new.rs peg-macros/grammar.rs 8 | 9 | if cargo run -p peg-macros -- peg-macros/grammar.rustpeg > peg-macros/grammar_new.rs 10 | then 11 | diff -qs peg-macros/grammar.rs peg-macros/grammar_new.rs 12 | rustfmt peg-macros/grammar.rs 13 | else 14 | echo "Failed" 15 | fi 16 | -------------------------------------------------------------------------------- /peg-macros/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "peg-macros" 3 | version = "0.8.5" 4 | authors = [ "Kevin Mehall " ] 5 | license = "MIT" 6 | repository = "https://github.com/kevinmehall/rust-peg" 7 | description = "Procedural macros for rust-peg. To use rust-peg, see the `peg` crate." 8 | edition = "2021" 9 | 10 | [dependencies] 11 | quote = "1.0" 12 | proc-macro2 = "1.0.24" 13 | peg-runtime = { version = "= 0.8.5", path = "../peg-runtime" } 14 | 15 | [features] 16 | trace = [] 17 | 18 | [lib] 19 | proc-macro = true 20 | name = "peg_macros" 21 | path = "lib.rs" 22 | 23 | [[bin]] 24 | name = "rust-peg" 25 | path = "bin.rs" 26 | test = false 27 | bench = false 28 | -------------------------------------------------------------------------------- /peg-macros/LICENSE: -------------------------------------------------------------------------------- 1 | ../LICENSE -------------------------------------------------------------------------------- /peg-macros/analysis.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::Span; 2 | use std::collections::HashMap; 3 | 4 | use crate::ast::*; 5 | 6 | pub struct GrammarAnalysis<'a> { 7 | pub rules: HashMap, 8 | pub left_recursion: Vec, 9 | pub loop_nullability: Vec, 10 | } 11 | 12 | pub fn check<'a>(grammar: &'a Grammar) -> GrammarAnalysis<'a> { 13 | let mut rules = HashMap::new(); 14 | 15 | // Pick only the first for duplicate rules (the duplicate is reported when translating the rule) 16 | for rule in grammar.iter_rules() { 17 | rules.entry(rule.name.to_string()).or_insert(rule); 18 | } 19 | 20 | let (rule_nullability, left_recursion) = LeftRecursionVisitor::check(grammar, &rules); 21 | let loop_nullability = LoopNullabilityVisitor::check(grammar, &rule_nullability); 22 | 23 | GrammarAnalysis { 24 | rules, 25 | left_recursion, 26 | loop_nullability, 27 | } 28 | } 29 | 30 | /// Check for infinite loops in the form of left recursion. 31 | /// 32 | /// If a PEG expression recurses without first consuming input, it will 33 | /// recurse until the stack overflows. 34 | struct LeftRecursionVisitor<'a> { 35 | stack: Vec, 36 | rules: &'a HashMap, 37 | errors: Vec, 38 | } 39 | 40 | pub struct LeftRecursionError { 41 | pub span: Span, 42 | pub path: Vec, 43 | } 44 | 45 | impl LeftRecursionError { 46 | pub fn msg(&self) -> String { 47 | format!( 48 | "left recursive rules create an infinite loop: {}", 49 | self.path.join(" -> ") 50 | ) 51 | } 52 | } 53 | 54 | impl<'a> LeftRecursionVisitor<'a> { 55 | fn check(grammar: &'a Grammar, rules: &HashMap) -> (HashMap, Vec) { 56 | let mut visitor = LeftRecursionVisitor { 57 | rules, 58 | errors: Vec::new(), 59 | stack: Vec::new(), 60 | }; 61 | 62 | let mut rule_nullability: HashMap = HashMap::new(); 63 | 64 | for rule in grammar.iter_rules() { 65 | let nullable = visitor.walk_rule(rule); 66 | debug_assert!(visitor.stack.is_empty()); 67 | rule_nullability.entry(rule.name.to_string()).or_insert(nullable); 68 | } 69 | 70 | (rule_nullability, visitor.errors) 71 | } 72 | 73 | fn walk_rule(&mut self, rule: &'a Rule) -> bool { 74 | self.stack.push(rule.name.to_string()); 75 | let res = self.walk_expr(&rule.expr); 76 | self.stack.pop().unwrap(); 77 | res 78 | } 79 | 80 | /// Walk the prefix of an expression that can be reached without consuming 81 | /// input. 82 | /// 83 | /// Returns true if the rule is known to match completely without consuming 84 | /// any input. This is a conservative heuristic, if unknown, we return false 85 | /// to avoid reporting false-positives for left recursion. 86 | fn walk_expr(&mut self, this_expr: &SpannedExpr) -> bool { 87 | use self::Expr::*; 88 | match this_expr.expr { 89 | RuleExpr(ref rule_ident, _, _) => { 90 | let name = rule_ident.to_string(); 91 | 92 | if let Some(rule) = self.rules.get(&name) { 93 | if let Some(loop_start) = self 94 | .stack 95 | .iter() 96 | .position(|caller_name| caller_name == &name) 97 | { 98 | let mut recursive_loop = self.stack[loop_start..].to_vec(); 99 | recursive_loop.push(name.clone()); 100 | match rule.cache { 101 | None | Some(Cache::Simple) => 102 | self.errors.push(LeftRecursionError { 103 | path: recursive_loop, 104 | span: rule_ident.span(), 105 | }), 106 | _ => () 107 | 108 | } 109 | return false; 110 | } 111 | self.walk_rule(rule) 112 | } else { 113 | // Missing rule would have already been reported 114 | false 115 | } 116 | } 117 | 118 | ActionExpr(ref elems, ..) => { 119 | for elem in elems { 120 | if !self.walk_expr(&elem.expr) { 121 | return false; 122 | } 123 | } 124 | 125 | true 126 | } 127 | 128 | ChoiceExpr(ref choices) => { 129 | let mut nullable = false; 130 | for expr in choices { 131 | nullable |= self.walk_expr(expr); 132 | } 133 | nullable 134 | } 135 | 136 | OptionalExpr(ref expr) | PosAssertExpr(ref expr) | NegAssertExpr(ref expr) => { 137 | self.walk_expr(expr); 138 | true 139 | } 140 | 141 | Repeat { ref inner, ref bound, .. } => { 142 | let inner_nullable = self.walk_expr(inner); 143 | inner_nullable | !bound.has_lower_bound() 144 | } 145 | 146 | MatchStrExpr(ref expr) | QuietExpr(ref expr) => self.walk_expr(expr), 147 | 148 | PrecedenceExpr { ref levels } => { 149 | let mut nullable = false; 150 | 151 | for level in levels { 152 | for operator in &level.operators { 153 | let mut operator_nullable = true; 154 | for element in &operator.elements { 155 | if !self.walk_expr(&element.expr) { 156 | operator_nullable = false; 157 | break; 158 | } 159 | } 160 | nullable |= operator_nullable; 161 | } 162 | } 163 | 164 | nullable 165 | } 166 | 167 | | LiteralExpr(_) 168 | | PatternExpr(_) 169 | | MethodExpr(_, _) 170 | | CustomExpr(_) 171 | | FailExpr(_) 172 | | MarkerExpr(_) => false, 173 | 174 | PositionExpr => true, 175 | } 176 | } 177 | } 178 | 179 | /// Check for loops whose body can succeed without consuming any input, which 180 | /// will loop infinitely. 181 | struct LoopNullabilityVisitor<'a> { 182 | rule_nullability: &'a HashMap, 183 | errors: Vec, 184 | } 185 | 186 | pub struct LoopNullabilityError { 187 | pub span: Span, 188 | } 189 | 190 | impl LoopNullabilityError { 191 | pub fn msg(&self) -> String { 192 | format!("loops infinitely because loop body can match without consuming input") 193 | } 194 | } 195 | 196 | 197 | impl<'a> LoopNullabilityVisitor<'a> { 198 | fn check(grammar: &'a Grammar, rule_nullability: &HashMap) -> Vec { 199 | let mut visitor = LoopNullabilityVisitor { 200 | rule_nullability, 201 | errors: Vec::new(), 202 | }; 203 | 204 | for rule in grammar.iter_rules() { 205 | visitor.walk_expr(&rule.expr); 206 | } 207 | 208 | visitor.errors 209 | } 210 | 211 | 212 | /// Walk an expr and its children analyzing the nullability of loop bodies. 213 | /// 214 | /// Returns true if the rule is known to match completely without consuming 215 | /// any input. This is a conservative heuristic; if unknown, we return false 216 | /// to avoid reporting false-positives. 217 | /// 218 | /// This is very similar to LeftRecursionVisitor::walk_expr, but walks the 219 | /// entire expression tree rather than just the nullable prefix, and doesn't 220 | /// recurse into calls. 221 | fn walk_expr(&mut self, this_expr: &SpannedExpr) -> bool { 222 | use self::Expr::*; 223 | match this_expr.expr { 224 | RuleExpr(ref rule_ident, _, _) => { 225 | let name = rule_ident.to_string(); 226 | *self.rule_nullability.get(&name).unwrap_or(&false) 227 | } 228 | 229 | ActionExpr(ref elems, ..) => { 230 | let mut nullable = true; 231 | for elem in elems { 232 | nullable &= self.walk_expr(&elem.expr); 233 | } 234 | nullable 235 | } 236 | 237 | ChoiceExpr(ref choices) => { 238 | let mut nullable = false; 239 | for expr in choices { 240 | nullable |= self.walk_expr(expr); 241 | } 242 | nullable 243 | } 244 | 245 | OptionalExpr(ref expr) | PosAssertExpr(ref expr) | NegAssertExpr(ref expr) => { 246 | self.walk_expr(expr); 247 | true 248 | } 249 | 250 | Repeat { ref inner, ref bound, ref sep } => { 251 | let inner_nullable = self.walk_expr(inner); 252 | let sep_nullable = sep.as_ref().map_or(true, |sep| self.walk_expr(sep)); 253 | 254 | // The entire purpose of this analysis: report errors if the loop body is nullable 255 | if inner_nullable && sep_nullable && !bound.has_upper_bound() { 256 | self.errors.push(LoopNullabilityError { span: this_expr.span }); 257 | } 258 | 259 | inner_nullable | !bound.has_lower_bound() 260 | } 261 | 262 | MatchStrExpr(ref expr) | QuietExpr(ref expr) => self.walk_expr(expr), 263 | 264 | PrecedenceExpr { ref levels } => { 265 | let mut nullable = false; 266 | 267 | for level in levels { 268 | for operator in &level.operators { 269 | let mut operator_nullable = true; 270 | for element in &operator.elements { 271 | operator_nullable &= self.walk_expr(&element.expr); 272 | } 273 | nullable |= operator_nullable; 274 | } 275 | } 276 | 277 | nullable 278 | } 279 | 280 | | LiteralExpr(_) 281 | | PatternExpr(_) 282 | | MethodExpr(_, _) 283 | | CustomExpr(_) 284 | | FailExpr(_) 285 | | MarkerExpr(_) => false, 286 | 287 | PositionExpr => true, 288 | } 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /peg-macros/ast.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::{Group, Ident, Literal, Span, TokenStream}; 2 | 3 | #[derive(Debug)] 4 | pub struct Grammar { 5 | pub doc: Option, 6 | pub visibility: Option, 7 | pub name: Ident, 8 | pub lifetime_params: Option>, 9 | pub args: Vec<(Ident, TokenStream)>, 10 | pub items: Vec, 11 | pub input_type: TokenStream, 12 | } 13 | 14 | impl Grammar { 15 | pub fn iter_rules(&self) -> impl Iterator { 16 | self.items.iter().filter_map(|item| match item { 17 | Item::Rule(r) => Some(r), 18 | _ => None, 19 | }) 20 | } 21 | } 22 | 23 | #[derive(Debug)] 24 | pub enum Item { 25 | Use(TokenStream), 26 | Rule(Rule), 27 | } 28 | 29 | #[derive(Debug)] 30 | pub enum Cache { 31 | Simple, 32 | Recursive 33 | } 34 | 35 | #[derive(Debug)] 36 | pub struct Rule { 37 | pub span: Span, 38 | pub name: Ident, 39 | pub ty_params: Option>, 40 | pub params: Vec, 41 | pub expr: SpannedExpr, 42 | pub ret_type: Option, 43 | pub where_clause: Option, 44 | pub doc: Option, 45 | pub visibility: Option, 46 | pub cache: Option, 47 | pub no_eof: bool, 48 | } 49 | 50 | #[derive(Debug)] 51 | pub struct RuleParam { 52 | pub name: Ident, 53 | pub ty: RuleParamTy, 54 | } 55 | 56 | #[derive(Debug)] 57 | pub enum RuleParamTy { 58 | Rust(TokenStream), 59 | Rule(TokenStream), 60 | } 61 | 62 | #[derive(Debug, Clone)] 63 | pub struct TaggedExpr { 64 | pub name: Option, 65 | pub expr: SpannedExpr, 66 | } 67 | #[derive(Debug, Clone)] 68 | pub struct SpannedExpr { 69 | pub span: Span, 70 | pub expr: Expr, 71 | } 72 | 73 | #[derive(Debug, Clone)] 74 | pub enum Expr { 75 | LiteralExpr(Literal), 76 | PatternExpr(Group), 77 | RuleExpr(Ident, Option, Vec), 78 | MethodExpr(Ident, TokenStream), 79 | CustomExpr(Group), 80 | ChoiceExpr(Vec), 81 | OptionalExpr(Box), 82 | Repeat { inner: Box, bound: BoundedRepeat, sep: Option> }, 83 | PosAssertExpr(Box), 84 | NegAssertExpr(Box), 85 | ActionExpr(Vec, Option), 86 | MatchStrExpr(Box), 87 | PositionExpr, 88 | QuietExpr(Box), 89 | FailExpr(Group), 90 | PrecedenceExpr { 91 | levels: Vec, 92 | }, 93 | MarkerExpr(bool), 94 | } 95 | 96 | impl Expr { 97 | pub fn at(self, sp: Span) -> SpannedExpr { 98 | SpannedExpr { expr: self, span:sp } 99 | } 100 | } 101 | 102 | #[derive(Debug, Clone)] 103 | pub enum RuleArg { 104 | Rust(TokenStream), 105 | Peg(SpannedExpr), 106 | } 107 | 108 | #[derive(Debug, Clone)] 109 | pub struct PrecedenceLevel { 110 | pub operators: Vec, 111 | } 112 | 113 | #[derive(Debug, Clone)] 114 | pub struct PrecedenceOperator { 115 | pub span: Span, 116 | pub elements: Vec, 117 | pub action: Group, 118 | } 119 | 120 | #[derive(Debug, Clone)] 121 | pub enum BoundedRepeat { 122 | None, 123 | Plus, 124 | Exact(TokenStream), 125 | Both(Option, Option), 126 | } 127 | 128 | impl BoundedRepeat { 129 | pub fn has_lower_bound(&self) -> bool { 130 | match self { 131 | BoundedRepeat::None | BoundedRepeat::Both(None, _) => false, 132 | BoundedRepeat::Plus | BoundedRepeat::Exact(_) | BoundedRepeat::Both(Some(_), _) => true 133 | } 134 | } 135 | 136 | pub fn has_upper_bound(&self) -> bool { 137 | match self { 138 | BoundedRepeat::None | BoundedRepeat::Plus | BoundedRepeat::Both(_, None) => false, 139 | BoundedRepeat::Exact(_) | BoundedRepeat::Both(_, Some(_)) => true 140 | } 141 | } 142 | } 143 | -------------------------------------------------------------------------------- /peg-macros/bin.rs: -------------------------------------------------------------------------------- 1 | //! Standalone version of rust-peg used for bootstrapping the meta-grammar 2 | 3 | extern crate proc_macro; 4 | extern crate proc_macro2; 5 | extern crate quote; 6 | 7 | use std::env; 8 | use std::fs::File; 9 | use std::io::{stderr, stdin, stdout}; 10 | use std::io::{Read, Write}; 11 | use std::path::Path; 12 | use std::process; 13 | 14 | // This can't use the `peg` crate as it would be a circular dependency, but the generated code in grammar.rs 15 | // requires `::peg` paths. 16 | extern crate peg_runtime as peg; 17 | 18 | mod analysis; 19 | mod ast; 20 | mod grammar; 21 | mod tokens; 22 | mod translate; 23 | 24 | fn main() { 25 | let args = env::args_os().collect::>(); 26 | let progname = &args[0]; 27 | let mut log = stderr(); 28 | 29 | let mut source = String::new(); 30 | 31 | if args.len() == 2 && &args[1] != "-h" { 32 | File::open(Path::new(&args[1])) 33 | .unwrap() 34 | .read_to_string(&mut source) 35 | .unwrap(); 36 | } else if args.len() == 1 { 37 | stdin().read_to_string(&mut source).unwrap(); 38 | } else { 39 | writeln!(log, "Usage: {} [file]", progname.to_string_lossy()).unwrap(); 40 | process::exit(0); 41 | } 42 | 43 | let source_tokens = source.parse().expect("Error tokenizing input"); 44 | let input_tokens = tokens::FlatTokenStream::new(source_tokens); 45 | let grammar = match grammar::peg::peg_grammar(&input_tokens) { 46 | Ok(g) => g, 47 | Err(err) => { 48 | eprintln!("Failed to parse grammar: expected {}", err.expected); 49 | process::exit(1); 50 | } 51 | }; 52 | let parser_tokens = translate::compile_grammar(&grammar); 53 | let mut out = stdout(); 54 | writeln!(&mut out, "// Generated by rust-peg. Do not edit.").unwrap(); 55 | write!(&mut out, "{}", parser_tokens).unwrap(); 56 | } 57 | -------------------------------------------------------------------------------- /peg-macros/grammar.rustpeg: -------------------------------------------------------------------------------- 1 | pub grammar peg() for FlatTokenStream { 2 | 3 | use crate::ast::*; 4 | use crate::ast::Expr::*; 5 | use crate::tokens::FlatTokenStream; 6 | use proc_macro2::{ TokenStream, Ident, Group, Literal, Delimiter, Span }; 7 | 8 | pub rule peg_grammar() -> Grammar 9 | = doc:rust_doc_comment() visibility:rust_visibility() "grammar" name:IDENT() lifetime_params:rust_lifetime_params()? args:grammar_args() "for" input_type:$(rust_type()) "{" items:item()* "}" 10 | { Grammar { doc, visibility, name, lifetime_params, args, input_type, items } } 11 | 12 | rule rust_lifetime_params() -> Vec 13 | = "<" p:(($(LIFETIME())) ++ ",") ","? ">" { p } 14 | 15 | rule grammar_args() -> Vec<(Ident, TokenStream)> 16 | = "(" args:((i:IDENT() ":" t:$(rust_type()) { (i, t) })**",") ","? ")" { args } 17 | 18 | rule peg_rule() -> Rule 19 | = doc:rust_doc_comment() cache:cacheflag() no_eof:no_eof_flag() visibility:rust_visibility() 20 | span:sp() "rule" 21 | header:( 22 | &("_" / "__" / "___") name:IDENT() ("(" ")")? { (name, None, Vec::new()) } 23 | / name:IDENT() ty_params:rust_ty_params()? params:rule_params() { (name, ty_params, params) } 24 | ) 25 | ret_type:("->" t:$(rust_type()) {t})? 26 | where_clause:$(rust_where_clause())? 27 | "=" expr:expression() ";"? 28 | { Rule { span, doc, name:header.0, ty_params:header.1, params:header.2, expr, ret_type, where_clause, visibility, no_eof, cache } } 29 | 30 | rule cacheflag() -> Option = "#" "[" "cache" "]" {Some(Cache::Simple)} / "#" "[" "cache_left_rec" "]" {Some(Cache::Recursive)} / {None} 31 | 32 | rule no_eof_flag() -> bool = "#" "[" "no_eof" "]" {true} / {false} 33 | 34 | rule rule_param_ty() -> RuleParamTy 35 | = "rule" "<" r:$(rust_type()) ">" { RuleParamTy::Rule(r) } 36 | / t:$(rust_type()) { RuleParamTy::Rust(t) } 37 | 38 | rule rule_params() -> Vec 39 | = "(" params:(x:(name:IDENT() ":" ty:rule_param_ty() { RuleParam { name, ty} }) ++ "," ","? {x})? ")" { params.unwrap_or_default() } 40 | 41 | rule item() -> Item 42 | = u:rust_use() { Item::Use(u) } 43 | / r:peg_rule() { Item::Rule(r) } 44 | 45 | rule rust_doc_comment() -> Option = $(("#" "[" "doc" "=" LITERAL() "]")*)? 46 | rule rust_attribute() = "#" "[" rust_path() (DELIM_GROUP() / "=" LITERAL()) "]" 47 | 48 | rule rust_visibility() -> Option = $("pub" PAREN_GROUP()?)? 49 | 50 | rule rust_use() -> TokenStream 51 | = v:$(rust_attribute()* "use" rust_use_tree() ";") { v.to_owned() } 52 | 53 | rule rust_use_tree() 54 | = (rust_path()? "::")? ("*" / "{" (rust_use_tree() ++ "," ","?)? "}") 55 | / rust_path() ("as" (IDENT() / "_"))? 56 | 57 | rule rust_path() 58 | = (("$"? "crate")? "::")? IDENT() ++ "::" 59 | 60 | rule rust_type() 61 | = BRACKET_GROUP() 62 | / "&" LIFETIME()? "mut"? rust_type() 63 | / "dyn" rust_ty_param_bound() ++ "+" 64 | / "impl" rust_ty_param_bound() ++ "+" 65 | / "(" (rust_type() ++ "," ","?)? ")" 66 | / ("<" rust_type() ("as" rust_ty_path())? ">")? rust_ty_path() 67 | 68 | rule rust_ty_path() 69 | = "::"? (IDENT() ("::"? (rust_generic_args() / PAREN_GROUP() ("->" rust_type())?))?) ++ "::" 70 | 71 | rule rust_ty_params() -> Vec 72 | = "<" p:($(rust_generic_param()) ++ ",") ","? ">" { p } 73 | 74 | rule rust_where_clause() 75 | = "where" ( 76 | LIFETIME() (":" LIFETIME() ++ "+")? 77 | / rust_for_lifetimes()? rust_type() ":" rust_ty_param_bound() ++ "+" 78 | ) ** "," ","? 79 | 80 | rule rust_generic_param() 81 | = LIFETIME() (":" LIFETIME() ++ "+")? 82 | / IDENT() (":" rust_ty_param_bound() ++ "+")? 83 | 84 | rule rust_for_lifetimes() 85 | = "for" rust_ty_params() 86 | 87 | rule rust_ty_param_bound() 88 | = LIFETIME() 89 | / "?"? rust_for_lifetimes()? rust_ty_path() 90 | / "(" "?"? rust_for_lifetimes()? rust_ty_path() ")" 91 | 92 | rule rust_generic_args() 93 | = "<" (LIFETIME() / rust_type() / BRACE_GROUP() / LITERAL()) ++ "," ","? ">" 94 | 95 | rule expression() -> SpannedExpr = choice() 96 | 97 | rule choice() -> SpannedExpr = sp:sp() s:sequence() ++ "/" { 98 | if s.len() == 1 { 99 | s.into_iter().next().unwrap() 100 | } else { 101 | ChoiceExpr(s).at(sp) 102 | } 103 | } 104 | 105 | rule sequence() -> SpannedExpr 106 | = sp:sp() elements:labeled()* code:BRACE_GROUP()? { 107 | if let Some(code) = code { 108 | ActionExpr(elements, Some(code)).at(sp) 109 | } else if elements.len() != 1 { 110 | ActionExpr(elements, None).at(sp) 111 | } else { 112 | elements.into_iter().next().unwrap().expr 113 | } 114 | } 115 | 116 | rule labeled() -> TaggedExpr 117 | = label:(l:IDENT() ":" {l})? expression:suffixed() 118 | { TaggedExpr{ name: label, expr: expression } } 119 | 120 | rule suffixed() -> SpannedExpr 121 | = e:prefixed() sp:sp() "?" { OptionalExpr(Box::new(e)).at(sp) } 122 | / e:prefixed() sp:sp() "**" count:repeatcount() sep:primary() { Repeat { inner: Box::new(e), bound: count, sep: Some(Box::new(sep)) }.at(sp) } 123 | / e:prefixed() sp:sp() "++" sep:primary() { Repeat { inner: Box::new(e), bound: BoundedRepeat::Plus, sep: Some(Box::new(sep)) }.at(sp )} 124 | / e:prefixed() sp:sp() "*" count:repeatcount() { Repeat { inner: Box::new(e), bound: count, sep: None }.at(sp) } 125 | / e:prefixed() sp:sp() "+" { Repeat { inner: Box::new(e), bound: BoundedRepeat::Plus, sep: None }.at(sp) } 126 | / prefixed() 127 | 128 | rule repeatcount() -> BoundedRepeat 129 | = "<" n:repeatnum() ">" { BoundedRepeat::Exact(n) } 130 | / "<" min:repeatnum()? "," max:repeatnum()? ">" { BoundedRepeat::Both(min, max) } 131 | / { BoundedRepeat::None } 132 | 133 | rule repeatnum() -> TokenStream = $(INTEGER() / BRACE_GROUP()) 134 | 135 | rule prefixed() -> SpannedExpr 136 | = sp:sp() "$" expression:primary() { MatchStrExpr(Box::new(expression)).at(sp) } 137 | / sp:sp() "&" expression:primary() { PosAssertExpr(Box::new(expression)).at(sp) } 138 | / sp:sp() "!" expression:primary() { NegAssertExpr(Box::new(expression)).at(sp) } 139 | / primary() 140 | 141 | #[cache] 142 | rule primary() -> SpannedExpr 143 | = sp:sp() "precedence" "!" "{" levels:precedence_level()**"--" "}" { PrecedenceExpr{ levels:levels }.at(sp) } 144 | / sp:sp() "position" "!" "(" ")" { PositionExpr.at(sp) } 145 | / sp:sp() "quiet" "!" "{" e:expression() "}" { QuietExpr(Box::new(e)).at(sp) } 146 | / sp:sp() "expected" "!" s:PAREN_GROUP() { FailExpr(s).at(sp) } 147 | / &("_" / "__" / "___") sp:sp() name:IDENT() { RuleExpr(name, None, Vec::new()).at(sp) } 148 | / sp:sp() name:IDENT() generics:$("::" rust_generic_args())? "(" args:(rule_arg() ** ",") ")" { RuleExpr(name, generics, args).at(sp) } 149 | / sp:sp() l:LITERAL() { LiteralExpr(l).at(sp) } 150 | / sp:sp() p:BRACKET_GROUP() { PatternExpr(p).at(sp) } 151 | / "(" sp:sp() "@" ")" { MarkerExpr(true).at(sp) } 152 | / sp:sp() "@" { MarkerExpr(false).at(sp) } 153 | / sp:sp() "##" method:IDENT() args:PAREN_GROUP() { MethodExpr(method, args.stream()).at(sp) } 154 | / sp:sp() "#" code:BRACE_GROUP() { CustomExpr(code).at(sp) } 155 | / "(" expression:expression() ")" { expression } 156 | 157 | rule rule_arg() -> RuleArg 158 | = "<" e:expression() ">" { RuleArg::Peg(e) } 159 | / tt:$( #{|input, pos| input.eat_until(pos, ',')}+ ) { RuleArg::Rust(tt) } 160 | 161 | rule precedence_level() -> PrecedenceLevel 162 | = operators:precedence_op()+ 163 | { PrecedenceLevel{ operators: operators } } 164 | 165 | rule precedence_op() -> PrecedenceOperator 166 | = span:sp() elements:labeled()* action:BRACE_GROUP() 167 | { PrecedenceOperator{ span, elements, action } } 168 | 169 | rule sp() -> Span = #{|input, pos| input.next_span(pos)} 170 | rule KEYWORD() = "pub" / "rule" / "use" / "type" / "where" 171 | rule IDENT() -> Ident = !KEYWORD() i:#{|input, pos| input.ident(pos)} {i} 172 | rule LITERAL() -> Literal = #{|input, pos| input.literal(pos)} 173 | rule PAREN_GROUP() -> Group = #{|input, pos| input.group(pos, Delimiter::Parenthesis)} 174 | rule BRACE_GROUP() -> Group = #{|input, pos| input.group(pos, Delimiter::Brace)} 175 | rule BRACKET_GROUP() -> Group = #{|input, pos| input.group(pos, Delimiter::Bracket)} 176 | rule DELIM_GROUP() -> Group = PAREN_GROUP() / BRACE_GROUP() / BRACKET_GROUP() 177 | rule LIFETIME() = "'" IDENT() 178 | rule INTEGER() = LITERAL() 179 | 180 | } 181 | -------------------------------------------------------------------------------- /peg-macros/lib.rs: -------------------------------------------------------------------------------- 1 | extern crate proc_macro; 2 | extern crate proc_macro2; 3 | extern crate quote; 4 | 5 | use peg::Parse; 6 | use quote::quote_spanned; 7 | 8 | // This can't use the `peg` crate as it would be a circular dependency, but the generated code in grammar.rs 9 | // requires `::peg` paths. 10 | extern crate peg_runtime as peg; 11 | 12 | mod analysis; 13 | mod ast; 14 | mod grammar; 15 | mod tokens; 16 | mod translate; 17 | 18 | /// The main macro for creating a PEG parser. 19 | /// 20 | /// For the grammar syntax, see the `peg` crate documentation. 21 | #[proc_macro] 22 | pub fn parser(input: proc_macro::TokenStream) -> proc_macro::TokenStream { 23 | let tokens = tokens::FlatTokenStream::new(input.into()); 24 | let grammar = match grammar::peg::peg_grammar(&tokens) { 25 | Ok(g) => g, 26 | Err(err) => { 27 | let msg = if tokens.is_eof(err.location.1) { 28 | format!("expected {} at end of input", err.expected) 29 | } else { 30 | format!("expected {}", err.expected) 31 | }; 32 | return quote_spanned!(err.location.0=> compile_error!(#msg);).into(); 33 | } 34 | }; 35 | 36 | translate::compile_grammar(&grammar).into() 37 | } 38 | -------------------------------------------------------------------------------- /peg-macros/tokens.rs: -------------------------------------------------------------------------------- 1 | use peg::{Parse, ParseElem, ParseLiteral, ParseSlice, RuleResult}; 2 | use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree}; 3 | 4 | #[derive(Debug, Clone)] 5 | pub struct FlatTokenStream { 6 | tokens: Vec, 7 | } 8 | 9 | #[derive(Debug, Clone)] 10 | pub enum Token { 11 | Ident(Ident), 12 | Literal(Literal), 13 | Punct(Punct), 14 | Begin(Group, usize), 15 | End(Delimiter, Span), 16 | } 17 | 18 | impl Token { 19 | fn span(&self) -> Span { 20 | match self { 21 | Token::Ident(i) => i.span(), 22 | Token::Literal(l) => l.span(), 23 | Token::Punct(p) => p.span(), 24 | Token::Begin(g, _) => g.span(), 25 | Token::End(_, span) => span.clone(), 26 | } 27 | } 28 | } 29 | 30 | impl FlatTokenStream { 31 | pub fn new(stream: TokenStream) -> FlatTokenStream { 32 | let mut tokens = vec![]; 33 | 34 | fn flatten(tokens: &mut Vec, tree: TokenTree) { 35 | match tree { 36 | TokenTree::Ident(i) => tokens.push(Token::Ident(i)), 37 | TokenTree::Literal(l) => tokens.push(Token::Literal(l)), 38 | TokenTree::Punct(p) => tokens.push(Token::Punct(p)), 39 | TokenTree::Group(g) => { 40 | let start_pos = tokens.len(); 41 | 42 | tokens.push(Token::End(g.delimiter(), g.span())); // placeholder 43 | for tree in g.stream() { 44 | flatten(tokens, tree); 45 | } 46 | tokens.push(Token::End(g.delimiter(), g.span())); 47 | 48 | let end_pos = tokens.len(); 49 | tokens[start_pos] = Token::Begin(g, end_pos); 50 | } 51 | } 52 | } 53 | 54 | for tree in stream { 55 | flatten(&mut tokens, tree); 56 | } 57 | 58 | FlatTokenStream { tokens } 59 | } 60 | 61 | pub fn next_span(&self, pos: usize) -> RuleResult { 62 | match self.tokens.get(pos) { 63 | Some(t) => RuleResult::Matched(pos, t.span()), 64 | _ => RuleResult::Failed, 65 | } 66 | } 67 | 68 | pub fn ident(&self, pos: usize) -> RuleResult { 69 | match self.tokens.get(pos) { 70 | Some(Token::Ident(i)) => RuleResult::Matched(pos + 1, i.clone()), 71 | _ => RuleResult::Failed, 72 | } 73 | } 74 | 75 | pub fn literal(&self, pos: usize) -> RuleResult { 76 | match self.tokens.get(pos) { 77 | Some(Token::Literal(i)) => RuleResult::Matched(pos + 1, i.clone()), 78 | _ => RuleResult::Failed, 79 | } 80 | } 81 | 82 | pub fn group(&self, pos: usize, delim: Delimiter) -> RuleResult { 83 | match self.tokens.get(pos) { 84 | Some(Token::Begin(g, n)) if g.delimiter() == delim => { 85 | RuleResult::Matched(*n, g.clone()) 86 | } 87 | _ => RuleResult::Failed, 88 | } 89 | } 90 | 91 | pub fn eat_until(&self, initial_pos: usize, end: char) -> RuleResult<()> { 92 | let mut pos = initial_pos; 93 | loop { 94 | match self.tokens.get(pos) { 95 | Some(Token::Begin(_, n)) => pos = *n, 96 | Some(Token::Ident(_)) | Some(Token::Literal(_)) => pos += 1, 97 | Some(Token::Punct(p)) if p.as_char() != end => pos += 1, 98 | _ if pos != initial_pos => return RuleResult::Matched(pos, ()), 99 | _ => return RuleResult::Failed, 100 | } 101 | } 102 | } 103 | } 104 | 105 | #[derive(Debug, Clone)] 106 | pub struct Sp(pub Span, pub usize); 107 | 108 | impl ::std::fmt::Display for Sp { 109 | fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> Result<(), ::std::fmt::Error> { 110 | write!(fmt, "{:?} ({})", self.0, self.1) 111 | } 112 | } 113 | 114 | impl Parse for FlatTokenStream { 115 | type PositionRepr = Sp; 116 | fn start(&self) -> usize { 117 | 0 118 | } 119 | 120 | fn is_eof(&self, pos: usize) -> bool { 121 | pos >= self.tokens.len() 122 | } 123 | 124 | fn position_repr(&self, pos: usize) -> Sp { 125 | let span = self.tokens.get(pos) 126 | .map_or_else( 127 | || Span::call_site(), 128 | |t| t.span() 129 | ); 130 | Sp(span, pos) 131 | } 132 | } 133 | 134 | impl<'input> ParseElem<'input> for FlatTokenStream { 135 | type Element = &'input Token; 136 | 137 | fn parse_elem(&'input self, pos: usize) -> RuleResult<&'input Token> { 138 | match self.tokens.get(pos) { 139 | Some(c) => RuleResult::Matched(pos + 1, c), 140 | None => RuleResult::Failed, 141 | } 142 | } 143 | } 144 | 145 | fn delimiter_start(d: Delimiter) -> &'static str { 146 | match d { 147 | Delimiter::Brace => "{", 148 | Delimiter::Bracket => "[", 149 | Delimiter::Parenthesis => "(", 150 | _ => "", 151 | } 152 | } 153 | 154 | fn delimiter_end(d: Delimiter) -> &'static str { 155 | match d { 156 | Delimiter::Brace => "}", 157 | Delimiter::Bracket => "]", 158 | Delimiter::Parenthesis => ")", 159 | _ => "", 160 | } 161 | } 162 | 163 | impl ParseLiteral for FlatTokenStream { 164 | fn parse_string_literal(&self, pos: usize, literal: &str) -> RuleResult<()> { 165 | match self.tokens.get(pos) { 166 | Some(Token::Ident(i)) if i.to_string() == literal => RuleResult::Matched(pos + 1, ()), 167 | Some(Token::Punct(p)) if literal.starts_with(p.as_char()) => { 168 | if literal.len() == 1 { 169 | RuleResult::Matched(pos + 1, ()) 170 | } else if p.spacing() == Spacing::Joint { 171 | self.parse_string_literal(pos + 1, &literal[1..]) 172 | } else { 173 | RuleResult::Failed 174 | } 175 | } 176 | Some(Token::Begin(g, _)) if delimiter_start(g.delimiter()) == literal => { 177 | RuleResult::Matched(pos + 1, ()) 178 | } 179 | Some(Token::End(d, _)) if delimiter_end(*d) == literal => { 180 | RuleResult::Matched(pos + 1, ()) 181 | } 182 | _ => RuleResult::Failed, 183 | } 184 | } 185 | } 186 | 187 | impl<'input> ParseSlice<'input> for FlatTokenStream { 188 | type Slice = TokenStream; 189 | fn parse_slice(&'input self, p1: usize, p2: usize) -> TokenStream { 190 | let mut ts = TokenStream::new(); 191 | let mut pos = p1; 192 | 193 | while pos < p2 { 194 | let (t, next_pos): (TokenTree, usize) = match &self.tokens[pos] { 195 | Token::Ident(i) => (i.clone().into(), pos + 1), 196 | Token::Literal(l) => (l.clone().into(), pos + 1), 197 | Token::Punct(p) => (p.clone().into(), pos + 1), 198 | Token::Begin(g, end) => (g.clone().into(), *end), 199 | Token::End(..) => panic!("$-expr containing unmatched group end"), 200 | }; 201 | ts.extend(Some(t)); 202 | pos = next_pos; 203 | } 204 | 205 | assert_eq!(pos, p2, "$-expr containing unmatched group start"); 206 | 207 | ts 208 | } 209 | } 210 | -------------------------------------------------------------------------------- /peg-macros/translate.rs: -------------------------------------------------------------------------------- 1 | use proc_macro2::Delimiter; 2 | use proc_macro2::{Group, Ident, Literal, Span, TokenStream, TokenTree}; 3 | use std::collections::{HashMap, HashSet}; 4 | 5 | use quote::{format_ident, quote, quote_spanned}; 6 | 7 | pub use self::Expr::*; 8 | use crate::analysis; 9 | use crate::ast::*; 10 | 11 | pub fn report_error(span: Span, msg: String) -> TokenStream { 12 | quote_spanned!(span=>compile_error!(#msg);) 13 | } 14 | 15 | pub fn report_error_expr(span: Span, msg: String) -> TokenStream { 16 | // panic!() to avoid "Mismatched types" error 17 | quote_spanned!(span=> { compile_error!(#msg); panic!() }) 18 | } 19 | 20 | /// Test if the group begins with a specific marker character, and if so, return the remaining tokens. 21 | fn group_check_prefix(group: &Group, prefix: char) -> Option { 22 | let mut iter = group.stream().into_iter(); 23 | match iter.next() { 24 | Some(TokenTree::Punct(p)) if p.as_char() == prefix => Some(iter.collect()), 25 | _ => None, 26 | } 27 | } 28 | 29 | fn extra_args_def(grammar: &Grammar) -> TokenStream { 30 | let args: Vec = grammar 31 | .args 32 | .iter() 33 | .map(|&(ref name, ref tp)| quote!(, #name: #tp)) 34 | .collect(); 35 | quote!(#(#args)*) 36 | } 37 | 38 | fn extra_args_call(grammar: &Grammar) -> TokenStream { 39 | let args: Vec = grammar 40 | .args 41 | .iter() 42 | .map(|&(ref name, _)| quote!(, #name)) 43 | .collect(); 44 | quote!(#(#args)*) 45 | } 46 | 47 | #[derive(Clone)] 48 | struct Context<'a> { 49 | rules: &'a HashMap, 50 | rules_from_args: HashSet, 51 | grammar_lifetime_params: &'a [TokenStream], 52 | input_ty: TokenStream, 53 | parse_state_ty: TokenStream, 54 | extra_args_call: TokenStream, 55 | extra_args_def: TokenStream, 56 | } 57 | 58 | pub(crate) fn compile_grammar(grammar: &Grammar) -> TokenStream { 59 | let analysis = analysis::check(grammar); 60 | 61 | let grammar_lifetime_params = ty_params_slice(&grammar.lifetime_params); 62 | 63 | let context = &Context { 64 | rules: &analysis.rules, 65 | rules_from_args: HashSet::new(), 66 | grammar_lifetime_params, 67 | input_ty: quote!(&'input Input<#(#grammar_lifetime_params),*>), 68 | parse_state_ty: quote!(&mut ParseState<'input #(, #grammar_lifetime_params)*>), 69 | extra_args_call: extra_args_call(grammar), 70 | extra_args_def: extra_args_def(grammar), 71 | }; 72 | 73 | let mut seen_rule_names = HashSet::new(); 74 | 75 | let mut items = vec![]; 76 | for item in &grammar.items { 77 | match item { 78 | Item::Use(tt) => items.push(tt.clone()), 79 | Item::Rule(rule) => { 80 | if !seen_rule_names.insert(rule.name.to_string()) { 81 | items.push(report_error( 82 | rule.name.span(), 83 | format!("duplicate rule `{}`", rule.name), 84 | )); 85 | continue; 86 | } 87 | 88 | if rule.cache.is_some() && !(rule.params.is_empty() && rule.ty_params.is_none()) { 89 | items.push(report_error( 90 | rule.name.span(), 91 | "rules with generics or parameters cannot use #[cache] or #[cache_left_rec]".to_string(), 92 | )); 93 | continue; 94 | } 95 | 96 | if rule.visibility.is_some() { 97 | for param in &rule.params { 98 | if let RuleParamTy::Rule(..) = ¶m.ty { 99 | items.push(report_error( 100 | param.name.span(), 101 | "parameters on `pub rule` must be Rust types".to_string(), 102 | )) 103 | } 104 | } 105 | 106 | items.push(compile_rule_export(context, rule)); 107 | } else if rule.no_eof { 108 | items.push(report_error( 109 | rule.name.span(), 110 | "#[no_eof] is only meaningful for `pub rule`".to_string(), 111 | )); 112 | } 113 | 114 | items.push(compile_rule(context, rule)); 115 | } 116 | } 117 | } 118 | 119 | let parse_state = make_parse_state(grammar); 120 | let Grammar { 121 | name, 122 | doc, 123 | input_type, 124 | visibility, 125 | .. 126 | } = grammar; 127 | 128 | let mut errors: Vec = analysis 129 | .left_recursion 130 | .iter() 131 | .map(|rec| report_error(rec.span, rec.msg())) 132 | .collect(); 133 | 134 | errors.extend( 135 | analysis 136 | .loop_nullability 137 | .iter() 138 | .map(|nl| report_error(nl.span, nl.msg())), 139 | ); 140 | 141 | quote_spanned! { Span::mixed_site() => 142 | #doc 143 | #visibility mod #name { 144 | #[allow(unused_imports)] 145 | use super::*; 146 | type Input<#(#grammar_lifetime_params),*> = #input_type; 147 | type PositionRepr<#(#grammar_lifetime_params),*> = as ::peg::Parse>::PositionRepr; 148 | 149 | #(#errors)* 150 | #parse_state 151 | #(#items)* 152 | } 153 | } 154 | } 155 | 156 | fn make_parse_state(grammar: &Grammar) -> TokenStream { 157 | let span = Span::mixed_site(); 158 | let grammar_lifetime_params = ty_params_slice(&grammar.lifetime_params); 159 | let mut cache_fields_def: Vec = Vec::new(); 160 | let mut cache_fields: Vec = Vec::new(); 161 | for rule in grammar.iter_rules() { 162 | if rule.cache.is_some() && rule.params.is_empty() && rule.ty_params.is_none() { 163 | let name = format_ident!("{}_cache", rule.name); 164 | let ret_ty = rule.ret_type.clone().unwrap_or_else(|| quote!(())); 165 | cache_fields_def.push( 166 | quote_spanned! { span => #name: ::std::collections::HashMap> }, 167 | ); 168 | cache_fields.push(name); 169 | } 170 | } 171 | 172 | quote_spanned! { span => 173 | #[allow(unused_parens)] 174 | struct ParseState<'input #(, #grammar_lifetime_params)*> { 175 | _phantom: ::core::marker::PhantomData<(&'input () #(, &#grammar_lifetime_params ())*)>, 176 | #(#cache_fields_def),* 177 | } 178 | 179 | impl<'input #(, #grammar_lifetime_params)*> ParseState<'input #(, #grammar_lifetime_params)*> { 180 | fn new() -> ParseState<'input #(, #grammar_lifetime_params)*> { 181 | ParseState { 182 | _phantom: ::core::marker::PhantomData, 183 | #(#cache_fields: ::std::collections::HashMap::new()),* 184 | } 185 | } 186 | } 187 | } 188 | } 189 | 190 | fn ty_params_slice(ty_params: &Option>) -> &[TokenStream] { 191 | ty_params.as_ref().map(|x| &x[..]).unwrap_or(&[]) 192 | } 193 | 194 | fn rule_params_list(context: &Context, rule: &Rule) -> Vec { 195 | let Context { 196 | input_ty, 197 | parse_state_ty, 198 | .. 199 | } = context; 200 | let span = rule.span.resolved_at(Span::mixed_site()); 201 | rule.params.iter().map(|param| { 202 | let name = ¶m.name; 203 | match ¶m.ty { 204 | RuleParamTy::Rust(ty) => quote_spanned!{ span => #name: #ty }, 205 | RuleParamTy::Rule(ty) => quote_spanned!{ span => 206 | #name: impl Fn(#input_ty, #parse_state_ty, &mut ::peg::error::ErrorState, usize) -> ::peg::RuleResult<#ty> 207 | }, 208 | } 209 | }).collect() 210 | } 211 | 212 | /// Compile a rule to a function for use internal to the grammar. 213 | /// Returns `RuleResult`. 214 | fn compile_rule(context: &Context, rule: &Rule) -> TokenStream { 215 | let span = rule.span.resolved_at(Span::mixed_site()); 216 | let name = format_ident!("__parse_{}", rule.name, span = span); 217 | let ret_ty = rule.ret_type.clone().unwrap_or_else(|| quote!(())); 218 | let ty_params = ty_params_slice(&rule.ty_params); 219 | let where_clause = rule.where_clause.as_ref().into_iter(); 220 | 221 | let Context { 222 | input_ty, 223 | parse_state_ty, 224 | grammar_lifetime_params, 225 | extra_args_def, 226 | .. 227 | } = context; 228 | 229 | let mut context = context.clone(); 230 | context 231 | .rules_from_args 232 | .extend(rule.params.iter().map(|param| param.name.to_string())); 233 | 234 | let body = compile_expr(&context, &rule.expr, rule.ret_type.is_some()); 235 | 236 | let wrapped_body = if cfg!(feature = "trace") { 237 | let str_rule_name = rule.name.to_string(); 238 | quote_spanned! { span => { 239 | let loc = ::peg::Parse::position_repr(__input, __pos); 240 | println!("[PEG_TRACE] Attempting to match rule `{}` at {}", #str_rule_name, loc); 241 | let __peg_result: ::peg::RuleResult<#ret_ty> = {#body}; 242 | match __peg_result { 243 | ::peg::RuleResult::Matched(epos, _) => { 244 | let eloc = ::peg::Parse::position_repr(__input, epos); 245 | println!("[PEG_TRACE] Matched rule `{}` at {} to {}", #str_rule_name, loc, eloc); 246 | } 247 | ::peg::RuleResult::Failed => { 248 | println!("[PEG_TRACE] Failed to match rule `{}` at {}", #str_rule_name, loc); 249 | } 250 | } 251 | 252 | __peg_result 253 | }} 254 | } else { 255 | body 256 | }; 257 | 258 | let rule_params = rule_params_list(&context, rule); 259 | 260 | let fn_body = match &rule.cache { 261 | None => wrapped_body, 262 | Some(cache_type) => { 263 | let cache_field = format_ident!("{}_cache", rule.name); 264 | 265 | let cache_trace = if cfg!(feature = "trace") { 266 | let str_rule_name = rule.name.to_string(); 267 | quote_spanned! { span => 268 | let loc = ::peg::Parse::position_repr(__input, __pos); 269 | match &entry { 270 | &::peg::RuleResult::Matched(..) => println!("[PEG_TRACE] Cached match of rule `{}` at {}", #str_rule_name, loc), 271 | &Failed => println!("[PEG_TRACE] Cached fail of rule `{}` at {}", #str_rule_name, loc), 272 | }; 273 | } 274 | } else { 275 | quote!() 276 | }; 277 | 278 | match cache_type { 279 | Cache::Simple => quote_spanned! { span => 280 | if let Some(entry) = __state.#cache_field.get(&__pos) { 281 | #cache_trace 282 | return entry.clone(); 283 | } 284 | 285 | let __rule_result = #wrapped_body; 286 | __state.#cache_field.insert(__pos, __rule_result.clone()); 287 | __rule_result 288 | }, 289 | Cache::Recursive => 290 | // `#[cache_left_rec] support for recursive rules using the technique described here: 291 | // 292 | { 293 | quote_spanned! { span => 294 | if let Some(entry) = __state.#cache_field.get(&__pos) { 295 | #cache_trace 296 | return entry.clone(); 297 | } 298 | 299 | __state.#cache_field.insert(__pos, ::peg::RuleResult::Failed); 300 | let mut __last_result = ::peg::RuleResult::Failed; 301 | loop { 302 | let __current_result = { #wrapped_body }; 303 | match __current_result { 304 | ::peg::RuleResult::Failed => break, 305 | ::peg::RuleResult::Matched(__current_endpos, _) => 306 | match __last_result { 307 | ::peg::RuleResult::Matched(__last_endpos, _) if __current_endpos <= __last_endpos => break, 308 | _ => { 309 | __state.#cache_field.insert(__pos, __current_result.clone()); 310 | __last_result = __current_result; 311 | }, 312 | } 313 | } 314 | } 315 | 316 | return __last_result; 317 | } 318 | } 319 | } 320 | } 321 | }; 322 | 323 | quote_spanned! { span => 324 | fn #name<'input #(, #grammar_lifetime_params)* #(, #ty_params)*>( 325 | __input: #input_ty, 326 | __state: #parse_state_ty, 327 | __err_state: &mut ::peg::error::ErrorState, 328 | __pos: usize #extra_args_def #(, #rule_params)*, 329 | ) -> ::peg::RuleResult<#ret_ty> 330 | #(#where_clause)* 331 | { 332 | #![allow(non_snake_case, unused, clippy::redundant_closure_call)] 333 | #fn_body 334 | } 335 | } 336 | } 337 | 338 | /// Compile a rule into the parsing function which will be exported. 339 | /// Returns `Result`. 340 | fn compile_rule_export(context: &Context, rule: &Rule) -> TokenStream { 341 | let span = rule.span.resolved_at(Span::mixed_site()); 342 | 343 | let Rule { 344 | doc, 345 | name, 346 | visibility, 347 | .. 348 | } = rule; 349 | let ret_ty = rule.ret_type.clone().unwrap_or_else(|| quote!(())); 350 | let parse_fn = format_ident!("__parse_{}", rule.name, span = name.span()); 351 | let ty_params = ty_params_slice(&rule.ty_params); 352 | let where_clause = rule.where_clause.as_ref().into_iter(); 353 | let rule_params = rule_params_list(context, rule); 354 | let rule_params_call: Vec = rule 355 | .params 356 | .iter() 357 | .map(|param| { 358 | let param_name = ¶m.name; 359 | quote!(#param_name) 360 | }) 361 | .collect(); 362 | 363 | let Context { 364 | input_ty, 365 | extra_args_call, 366 | extra_args_def, 367 | grammar_lifetime_params, 368 | .. 369 | } = context; 370 | let eof_check = if rule.no_eof { 371 | quote_spanned! { span => true } 372 | } else { 373 | quote_spanned! { span => ::peg::Parse::is_eof(__input, __pos) } 374 | }; 375 | 376 | // Parse once. If it succeeds or throws an error, return that. 377 | // If it fails, parse again to determine the set of all tokens 378 | // that were expected at the failure position. 379 | 380 | quote_spanned! { span => 381 | #doc 382 | #visibility fn #name<'input #(, #grammar_lifetime_params)* #(, #ty_params)*>( 383 | __input: #input_ty #extra_args_def #(, #rule_params)* 384 | ) -> ::core::result::Result< 385 | #ret_ty, 386 | ::peg::error::ParseError> 387 | > 388 | #(#where_clause)* 389 | { 390 | #![allow(non_snake_case, unused)] 391 | 392 | let mut __err_state = ::peg::error::ErrorState::new(::peg::Parse::start(__input)); 393 | let mut __state = ParseState::new(); 394 | match #parse_fn(__input, &mut __state, &mut __err_state, ::peg::Parse::start(__input) #extra_args_call #(, #rule_params_call)*) { 395 | ::peg::RuleResult::Matched(__pos, __value) => { 396 | if #eof_check { 397 | return Ok(__value) 398 | } else { 399 | __err_state.mark_failure(__pos, "EOF"); 400 | } 401 | } 402 | _ => () 403 | } 404 | 405 | __state = ParseState::new(); 406 | __err_state.reparse_for_error(); 407 | 408 | match #parse_fn(__input, &mut __state, &mut __err_state, ::peg::Parse::start(__input) #extra_args_call #(, #rule_params_call)*) { 409 | ::peg::RuleResult::Matched(__pos, __value) => { 410 | if #eof_check { 411 | panic!("Parser is nondeterministic: succeeded when reparsing for error position"); 412 | return Ok(__value); // dead code, but needed for type inference 413 | } else { 414 | __err_state.mark_failure(__pos, "EOF"); 415 | } 416 | } 417 | _ => () 418 | } 419 | 420 | Err(__err_state.into_parse_error(__input)) 421 | } 422 | } 423 | } 424 | 425 | fn name_or_ignore(n: Option<&Ident>) -> TokenStream { 426 | match n { 427 | Some(n) => quote!(#n), 428 | None => quote!(_), 429 | } 430 | } 431 | 432 | fn ordered_choice(span: Span, mut rs: impl DoubleEndedIterator) -> TokenStream { 433 | rs.next_back().map(|last| rs.rfold(last, |fallback, preferred| { 434 | quote_spanned! { span => { 435 | let __choice_res = #preferred; 436 | match __choice_res { 437 | ::peg::RuleResult::Matched(__pos, __value) => ::peg::RuleResult::Matched(__pos, __value), 438 | ::peg::RuleResult::Failed => #fallback 439 | } 440 | }} 441 | })).expect("ordered choice must not be empty") 442 | } 443 | 444 | fn labeled_seq(context: &Context, exprs: &[TaggedExpr], inner: TokenStream) -> TokenStream { 445 | exprs.iter().rfold(inner, |then, expr| { 446 | compile_expr_continuation(context, &expr.expr, expr.name.as_ref(), then) 447 | }) 448 | } 449 | 450 | fn compile_expr_continuation( 451 | context: &Context, 452 | e: &SpannedExpr, 453 | result_name: Option<&Ident>, 454 | continuation: TokenStream, 455 | ) -> TokenStream { 456 | let span = e.span.resolved_at(Span::mixed_site()); 457 | 458 | let result_pat = name_or_ignore(result_name); 459 | match e.expr { 460 | LiteralExpr(ref s) => compile_literal_expr(s, continuation), 461 | 462 | PatternExpr(ref pattern) => { 463 | let result_name = result_name 464 | .cloned() 465 | .unwrap_or_else(|| Ident::new("__ch", span)); 466 | compile_pattern_expr( 467 | pattern, 468 | result_name, 469 | quote_spanned! { span => 470 | { let __pos = __next; { #continuation } } 471 | }, 472 | ) 473 | } 474 | 475 | _ => { 476 | let seq_res = compile_expr(context, e, result_name.is_some()); 477 | quote_spanned! { span => { 478 | let __seq_res = #seq_res; 479 | match __seq_res { 480 | ::peg::RuleResult::Matched(__pos, #result_pat) => { #continuation } 481 | ::peg::RuleResult::Failed => ::peg::RuleResult::Failed, 482 | } 483 | }} 484 | } 485 | } 486 | } 487 | 488 | fn compile_literal_expr(s: &Literal, continuation: TokenStream) -> TokenStream { 489 | let span = s.span().resolved_at(Span::mixed_site()); 490 | let escaped_str = s.to_string(); 491 | quote_spanned! { span => 492 | match ::peg::ParseLiteral::parse_string_literal(__input, __pos, #s) { 493 | ::peg::RuleResult::Matched(__pos, __val) => { #continuation } 494 | ::peg::RuleResult::Failed => { __err_state.mark_failure(__pos, #escaped_str); ::peg::RuleResult::Failed } 495 | } 496 | } 497 | } 498 | 499 | fn compile_pattern_expr( 500 | pattern_group: &Group, 501 | result_name: Ident, 502 | success_res: TokenStream, 503 | ) -> TokenStream { 504 | let span = pattern_group.span().resolved_at(Span::mixed_site()); 505 | let pat_str = pattern_group.to_string(); 506 | let failure_res = quote_spanned! { span => { __err_state.mark_failure(__pos, #pat_str); ::peg::RuleResult::Failed } }; 507 | 508 | let (pattern, in_set, not_in_set) = 509 | if let Some(pattern) = group_check_prefix(pattern_group, '^') { 510 | (pattern, failure_res, success_res) 511 | } else { 512 | (pattern_group.stream(), success_res, failure_res) 513 | }; 514 | 515 | let pattern = Group::new(Delimiter::None, pattern); 516 | 517 | quote_spanned! { span => 518 | match ::peg::ParseElem::parse_elem(__input, __pos) { 519 | ::peg::RuleResult::Matched(__next, #result_name) => match #result_name { 520 | #pattern => #in_set, 521 | _ => #not_in_set, 522 | } 523 | ::peg::RuleResult::Failed => { __err_state.mark_failure(__pos, #pat_str); ::peg::RuleResult::Failed } 524 | } 525 | } 526 | } 527 | 528 | fn compile_expr(context: &Context, e: &SpannedExpr, result_used: bool) -> TokenStream { 529 | let span = e.span.resolved_at(Span::mixed_site()); 530 | 531 | match e.expr { 532 | LiteralExpr(ref s) => compile_literal_expr( 533 | s, 534 | quote_spanned! { span => 535 | ::peg::RuleResult::Matched(__pos, __val) 536 | }, 537 | ), 538 | 539 | PatternExpr(ref pattern_group) => { 540 | let res_name = Ident::new("__ch", span); 541 | let res = if result_used { 542 | quote!(#res_name) 543 | } else { 544 | quote_spanned! { span => () } 545 | }; 546 | compile_pattern_expr( 547 | pattern_group, 548 | res_name, 549 | quote_spanned! { span => 550 | ::peg::RuleResult::Matched(__next, #res) 551 | }, 552 | ) 553 | } 554 | 555 | RuleExpr(ref rule_name, ref generics, ref rule_args) 556 | if context.rules_from_args.contains(&rule_name.to_string()) => 557 | { 558 | if !rule_args.is_empty() { 559 | return report_error_expr( 560 | rule_name.span(), 561 | "rule closure does not accept arguments".to_string(), 562 | ); 563 | } 564 | 565 | if generics.is_some() { 566 | return report_error_expr( 567 | rule_name.span(), 568 | "rule closure cannot have generics".to_string() 569 | ); 570 | } 571 | 572 | quote_spanned! { span=> #rule_name(__input, __state, __err_state, __pos) } 573 | } 574 | 575 | RuleExpr(ref rule_name, ref generics, ref rule_args) => { 576 | let rule_name_str = rule_name.to_string(); 577 | 578 | let rule_def = if let Some(rule_def) = context.rules.get(&rule_name_str) { 579 | rule_def 580 | } else { 581 | return report_error_expr( 582 | rule_name.span(), 583 | format!("undefined rule `{}`", rule_name_str), 584 | ); 585 | }; 586 | 587 | if result_used && rule_def.ret_type.is_none() { 588 | let msg = format!( 589 | "using result of rule `{}`, which does not return a value", 590 | rule_name_str 591 | ); 592 | return report_error_expr(rule_name.span(), msg); 593 | } 594 | 595 | if rule_def.params.len() != rule_args.len() { 596 | return report_error_expr( 597 | rule_name.span(), 598 | format!( 599 | "this rule takes {} parameters but {} parameters were supplied", 600 | rule_def.params.len(), 601 | rule_args.len() 602 | ), 603 | ); 604 | } 605 | 606 | let func = format_ident!("__parse_{}", rule_name, span = rule_name.span()); 607 | let extra_args_call = &context.extra_args_call; 608 | 609 | let rule_args_call: Vec = rule_args 610 | .iter() 611 | .map(|arg| match arg { 612 | RuleArg::Peg(e) => { 613 | let expr = compile_expr(context, e, true); 614 | quote_spanned! { span=> |__input, __state, __err_state, __pos| { #expr } } 615 | } 616 | RuleArg::Rust(e) => e.clone(), 617 | }) 618 | .collect(); 619 | 620 | if result_used { 621 | quote_spanned! { span=> #func #generics (__input, __state, __err_state, __pos #extra_args_call #(, #rule_args_call)*) } 622 | } else { 623 | quote_spanned! { span=> 624 | match #func #generics (__input, __state, __err_state, __pos #extra_args_call #(, #rule_args_call)*){ 625 | ::peg::RuleResult::Matched(pos, _) => ::peg::RuleResult::Matched(pos, ()), 626 | ::peg::RuleResult::Failed => ::peg::RuleResult::Failed, 627 | } 628 | } 629 | } 630 | } 631 | 632 | MethodExpr(ref method, ref args) => { 633 | quote_spanned! { span=> __input.#method(__pos, #args) } 634 | } 635 | 636 | CustomExpr(ref code) => { 637 | let code = code.stream(); 638 | quote_spanned! { span=> ::peg::call_custom_closure((#code), __input, __pos) } 639 | } 640 | 641 | ChoiceExpr(ref exprs) => ordered_choice( 642 | span, 643 | exprs 644 | .iter() 645 | .map(|expr| compile_expr(context, expr, result_used)), 646 | ), 647 | 648 | OptionalExpr(ref e) => { 649 | let optional_res = compile_expr(context, e, result_used); 650 | 651 | if result_used { 652 | quote_spanned! { span=> 653 | match #optional_res { 654 | ::peg::RuleResult::Matched(__newpos, __value) => { ::peg::RuleResult::Matched(__newpos, Some(__value)) }, 655 | ::peg::RuleResult::Failed => { ::peg::RuleResult::Matched(__pos, None) }, 656 | } 657 | } 658 | } else { 659 | quote_spanned! { span=> 660 | match #optional_res { 661 | ::peg::RuleResult::Matched(__newpos, _) => { ::peg::RuleResult::Matched(__newpos, ()) }, 662 | ::peg::RuleResult::Failed => { ::peg::RuleResult::Matched(__pos, ()) }, 663 | } 664 | } 665 | } 666 | } 667 | 668 | Repeat { 669 | ref inner, 670 | ref bound, 671 | ref sep, 672 | } => { 673 | let inner = compile_expr(context, inner, result_used); 674 | 675 | let (min, max) = match bound { 676 | BoundedRepeat::None => (None, None), 677 | BoundedRepeat::Plus => (Some(quote!(1)), None), 678 | BoundedRepeat::Exact(ref code) => (Some(code.clone()), Some(code.clone())), 679 | BoundedRepeat::Both(ref min, ref max) => (min.clone(), max.clone()), 680 | }; 681 | 682 | let match_sep = if let Some(sep) = sep { 683 | let sep_inner = compile_expr(context, sep, false); 684 | quote_spanned! { span=> 685 | let __pos = if __repeat_value.is_empty() { __pos } else { 686 | let __sep_res = #sep_inner; 687 | match __sep_res { 688 | ::peg::RuleResult::Matched(__newpos, _) => { __newpos }, 689 | ::peg::RuleResult::Failed => break, 690 | } 691 | }; 692 | } 693 | } else { 694 | quote!() 695 | }; 696 | 697 | let result = if result_used { 698 | quote_spanned! { span=> __repeat_value } 699 | } else { 700 | quote!(()) 701 | }; 702 | 703 | let (repeat_vec, repeat_step) = 704 | if result_used || min.is_some() || max.is_some() || sep.is_some() { 705 | ( 706 | Some(quote_spanned! { span => let mut __repeat_value = vec!(); }), 707 | Some(quote_spanned! { span => __repeat_value.push(__value); }), 708 | ) 709 | } else { 710 | (None, None) 711 | }; 712 | 713 | let max_check = max.map(|max| { 714 | quote_spanned! { span=> if __repeat_value.len() >= #max { break } } 715 | }); 716 | 717 | let result_check = if let Some(min) = min { 718 | quote_spanned! { span=> 719 | if __repeat_value.len() >= #min { 720 | ::peg::RuleResult::Matched(__repeat_pos, #result) 721 | } else { 722 | ::peg::RuleResult::Failed 723 | } 724 | } 725 | } else { 726 | quote_spanned! { span=> ::peg::RuleResult::Matched(__repeat_pos, #result) } 727 | }; 728 | 729 | quote_spanned! { span=> { 730 | let mut __repeat_pos = __pos; 731 | #repeat_vec 732 | 733 | loop { 734 | let __pos = __repeat_pos; 735 | 736 | #match_sep 737 | #max_check 738 | 739 | let __step_res = #inner; 740 | match __step_res { 741 | ::peg::RuleResult::Matched(__newpos, __value) => { 742 | __repeat_pos = __newpos; 743 | #repeat_step 744 | }, 745 | ::peg::RuleResult::Failed => { 746 | break; 747 | } 748 | } 749 | } 750 | 751 | #result_check 752 | }} 753 | } 754 | 755 | PosAssertExpr(ref e) => { 756 | let assert_res = compile_expr(context, e, result_used); 757 | quote_spanned! { span=> { 758 | __err_state.suppress_fail += 1; 759 | let __assert_res = #assert_res; 760 | __err_state.suppress_fail -= 1; 761 | match __assert_res { 762 | ::peg::RuleResult::Matched(_, __value) => ::peg::RuleResult::Matched(__pos, __value), 763 | ::peg::RuleResult::Failed => ::peg::RuleResult::Failed, 764 | } 765 | }} 766 | } 767 | 768 | NegAssertExpr(ref e) => { 769 | let assert_res = compile_expr(context, e, false); 770 | quote_spanned! { span=> { 771 | __err_state.suppress_fail += 1; 772 | let __assert_res = #assert_res; 773 | __err_state.suppress_fail -= 1; 774 | match __assert_res { 775 | ::peg::RuleResult::Failed => ::peg::RuleResult::Matched(__pos, ()), 776 | ::peg::RuleResult::Matched(..) => ::peg::RuleResult::Failed, 777 | } 778 | }} 779 | } 780 | 781 | ActionExpr(ref exprs, ref code) => labeled_seq(context, exprs, { 782 | if let Some(code) = code { 783 | let code_span = code.span().resolved_at(Span::mixed_site()); 784 | 785 | // Peek and see if the first token in the block is '?'. If so, it's a conditional block 786 | if let Some(body) = group_check_prefix(code, '?') { 787 | quote_spanned! {code_span => 788 | match (||{ #body })() { 789 | Ok(res) => ::peg::RuleResult::Matched(__pos, res), 790 | Err(expected) => { 791 | __err_state.mark_failure(__pos, expected); 792 | ::peg::RuleResult::Failed 793 | }, 794 | } 795 | } 796 | } else { 797 | quote_spanned! {code_span => ::peg::RuleResult::Matched(__pos, (||#code)()) } 798 | } 799 | } else { 800 | quote_spanned! { span => ::peg::RuleResult::Matched(__pos, ()) } 801 | } 802 | }), 803 | MatchStrExpr(ref expr) => { 804 | let inner = compile_expr(context, expr, false); 805 | quote_spanned! { span => { 806 | let str_start = __pos; 807 | match #inner { 808 | ::peg::RuleResult::Matched(__newpos, _) => { ::peg::RuleResult::Matched(__newpos, ::peg::ParseSlice::parse_slice(__input, str_start, __newpos)) }, 809 | ::peg::RuleResult::Failed => ::peg::RuleResult::Failed, 810 | } 811 | }} 812 | } 813 | PositionExpr => { 814 | quote_spanned! { span => ::peg::RuleResult::Matched(__pos, __pos) } 815 | } 816 | QuietExpr(ref expr) => { 817 | let inner = compile_expr(context, expr, result_used); 818 | quote_spanned! { span => { 819 | __err_state.suppress_fail += 1; 820 | let res = #inner; 821 | __err_state.suppress_fail -= 1; 822 | res 823 | }} 824 | } 825 | FailExpr(ref expected) => { 826 | quote_spanned! { span => { __err_state.mark_failure(__pos, #expected); ::peg::RuleResult::Failed }} 827 | } 828 | 829 | PrecedenceExpr { ref levels } => { 830 | let mut pre_rules = Vec::new(); 831 | let mut level_code = Vec::new(); 832 | let mut span_capture: Option<(TokenStream, TokenStream, TokenStream, &Group)> = None; 833 | 834 | for (prec, level) in levels.iter().enumerate() { 835 | let prec = prec as i32; 836 | 837 | let mut post_rules = Vec::new(); 838 | 839 | for op in &level.operators { 840 | let op_span = op.span.resolved_at(Span::mixed_site()); 841 | 842 | if op.elements.is_empty() { 843 | return report_error(op_span, "incomplete rule".to_string()); 844 | } 845 | 846 | let left_arg = &op.elements[0]; 847 | let l_arg = name_or_ignore(left_arg.name.as_ref()); 848 | 849 | let right_arg = &op.elements[op.elements.len() - 1]; 850 | let r_arg = name_or_ignore(right_arg.name.as_ref()); 851 | 852 | let action = &op.action; 853 | let action = quote_spanned!(op.action.span()=>(||#action)()); 854 | 855 | let action = if let Some((lpos_name, val_name, rpos_name, wrap_action)) = 856 | &span_capture 857 | { 858 | let wrap_action_span = wrap_action.span().resolved_at(Span::mixed_site()); 859 | quote_spanned!(wrap_action_span => (|#lpos_name, #val_name, #rpos_name|#wrap_action)(__lpos, #action, __pos)) 860 | } else { 861 | action 862 | }; 863 | 864 | match (&left_arg.expr.expr, &right_arg.expr.expr) { 865 | (&PositionExpr, &PositionExpr) if op.elements.len() == 3 => { 866 | // wrapper rule to capture expression span 867 | match &op.elements[1].expr.expr { 868 | &MarkerExpr(..) => (), 869 | _ => { 870 | return report_error(op_span, "span capture rule must be `l:position!() n:@ r:position!()".to_string()); 871 | } 872 | } 873 | 874 | span_capture = Some(( 875 | name_or_ignore(op.elements[0].name.as_ref()), 876 | name_or_ignore(op.elements[1].name.as_ref()), 877 | name_or_ignore(op.elements[2].name.as_ref()), 878 | &op.action, 879 | )); 880 | } 881 | (&MarkerExpr(la), &MarkerExpr(ra)) if op.elements.len() >= 3 => { 882 | //infix 883 | let new_prec = match (la, ra) { 884 | (true, false) => prec + 1, // left associative 885 | (false, true) => prec, // right associative 886 | _ => return report_error(op_span, "precedence rules must use `@` and `(@)` to indicate associativity".to_string()) 887 | }; 888 | 889 | post_rules.push( 890 | labeled_seq(context, &op.elements[1..op.elements.len()-1], { 891 | quote_spanned!{ op_span => 892 | if let ::peg::RuleResult::Matched(__pos, #r_arg) = __recurse(__pos, #new_prec, __state, __err_state) { 893 | let #l_arg = __infix_result; 894 | __infix_result = #action; 895 | ::peg::RuleResult::Matched(__pos, ()) 896 | } else { ::peg::RuleResult::Failed } 897 | } 898 | }) 899 | ); 900 | } 901 | (&MarkerExpr(_), _) if op.elements.len() >= 2 => { 902 | // postfix 903 | post_rules.push(labeled_seq( 904 | context, 905 | &op.elements[1..op.elements.len()], 906 | { 907 | quote_spanned! { op_span => 908 | let #l_arg = __infix_result; 909 | __infix_result = #action; 910 | ::peg::RuleResult::Matched(__pos, ()) 911 | } 912 | }, 913 | )); 914 | } 915 | (_, &MarkerExpr(a)) if op.elements.len() >= 2 => { 916 | // prefix 917 | let new_prec = match a { 918 | true => prec, 919 | false => prec + 1, 920 | }; 921 | pre_rules.push( 922 | labeled_seq(context, &op.elements[..op.elements.len()-1], { 923 | quote_spanned!{ op_span => 924 | if let ::peg::RuleResult::Matched(__pos, #r_arg) = __recurse(__pos, #new_prec, __state, __err_state) { 925 | ::peg::RuleResult::Matched(__pos, #action) 926 | } else { ::peg::RuleResult::Failed } 927 | } 928 | }) 929 | ); 930 | } 931 | _ => { 932 | // atom 933 | pre_rules.push(labeled_seq(context, &op.elements, { 934 | quote_spanned! { op_span => ::peg::RuleResult::Matched(__pos, #action) } 935 | })); 936 | } 937 | }; 938 | } 939 | 940 | if !post_rules.is_empty() { 941 | level_code.push(quote_spanned! { span => 942 | if #prec >= __min_prec { 943 | #( 944 | if let ::peg::RuleResult::Matched(__pos, ()) = #post_rules { 945 | return (__infix_result, ::peg::RuleResult::Matched(__pos, ())); 946 | } 947 | )* 948 | } 949 | }); 950 | } 951 | } 952 | 953 | let (enter, leave) = if cfg!(feature = "trace") { 954 | ( 955 | quote_spanned! {span => println!("[PEG_TRACE] Entering level {}", min_prec);}, 956 | quote_spanned! {span => println!("[PEG_TRACE] Leaving level {}", min_prec);}, 957 | ) 958 | } else { 959 | (quote!(), quote!()) 960 | }; 961 | 962 | // The closures below must be defined within the function call to which they are passed 963 | // due to https://github.com/rust-lang/rust/issues/41078 964 | 965 | quote_spanned! { span => { 966 | fn __infix_parse( 967 | state: &mut S, 968 | err_state: &mut ::peg::error::ErrorState, 969 | min_prec: i32, 970 | lpos: usize, 971 | prefix_atom: &dyn Fn(usize, &mut S, &mut ::peg::error::ErrorState, &dyn Fn(usize, i32, &mut S, &mut ::peg::error::ErrorState) -> ::peg::RuleResult) -> ::peg::RuleResult, 972 | level_code: &dyn Fn(usize, usize, i32, T, &mut S, &mut ::peg::error::ErrorState, &dyn Fn(usize, i32, &mut S, &mut ::peg::error::ErrorState) -> ::peg::RuleResult) -> (T, ::peg::RuleResult<()>), 973 | ) -> ::peg::RuleResult { 974 | let initial = { 975 | prefix_atom(lpos, state, err_state, &|pos, min_prec, state, err_state| { 976 | __infix_parse(state, err_state, min_prec, pos, prefix_atom, level_code) 977 | }) 978 | }; 979 | 980 | if let ::peg::RuleResult::Matched(pos, mut infix_result) = initial { 981 | #enter 982 | let mut repeat_pos = pos; 983 | loop { 984 | let (val, res) = level_code( 985 | repeat_pos, 986 | lpos, 987 | min_prec, 988 | infix_result, 989 | state, 990 | err_state, 991 | &|pos, min_prec, state, err_state| { 992 | __infix_parse(state, err_state, min_prec, pos, prefix_atom, level_code) 993 | } 994 | ); 995 | infix_result = val; 996 | 997 | if let ::peg::RuleResult::Matched(pos, ()) = res { 998 | repeat_pos = pos; 999 | continue; 1000 | } 1001 | 1002 | break; 1003 | } 1004 | #leave 1005 | ::peg::RuleResult::Matched(repeat_pos, infix_result) 1006 | } else { 1007 | ::peg::RuleResult::Failed 1008 | } 1009 | } 1010 | 1011 | __infix_parse(__state, __err_state, 0, __pos, 1012 | &|__pos, __state, __err_state, __recurse| { 1013 | let __lpos = __pos; 1014 | #( 1015 | if let ::peg::RuleResult::Matched(__pos, __v) = #pre_rules { 1016 | return ::peg::RuleResult::Matched(__pos, __v); 1017 | } 1018 | )* 1019 | 1020 | ::peg::RuleResult::Failed 1021 | }, 1022 | &|__pos, __lpos, __min_prec, mut __infix_result, __state, __err_state, __recurse| { 1023 | #(#level_code)* 1024 | (__infix_result, ::peg::RuleResult::Failed) 1025 | } 1026 | ) 1027 | }} 1028 | } 1029 | MarkerExpr { .. } => { 1030 | report_error(span, "`@` is only allowed in `precedence!{}`".to_string()) 1031 | } 1032 | } 1033 | } 1034 | -------------------------------------------------------------------------------- /peg-runtime/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "peg-runtime" 3 | version = "0.8.5" 4 | authors = [ "Kevin Mehall " ] 5 | license = "MIT" 6 | repository = "https://github.com/kevinmehall/rust-peg" 7 | description = "Runtime support for rust-peg grammars. To use rust-peg, see the `peg` crate." 8 | edition = "2021" 9 | 10 | [lib] 11 | path = "lib.rs" 12 | 13 | [features] 14 | std = [] 15 | unstable = [] -------------------------------------------------------------------------------- /peg-runtime/LICENSE: -------------------------------------------------------------------------------- 1 | ../LICENSE -------------------------------------------------------------------------------- /peg-runtime/error.rs: -------------------------------------------------------------------------------- 1 | //! Parse error reporting 2 | 3 | use crate::{Parse, RuleResult}; 4 | use std::fmt::{self, Debug, Display}; 5 | 6 | #[cfg(feature = "std")] 7 | use std::collections::BTreeSet; 8 | 9 | #[cfg(not(feature = "std"))] 10 | use {alloc::collections::BTreeSet, alloc::vec::Vec}; 11 | 12 | /// A set of literals or names that failed to match 13 | #[derive(PartialEq, Eq, Debug, Clone)] 14 | pub struct ExpectedSet { 15 | expected: BTreeSet<&'static str>, 16 | } 17 | 18 | impl ExpectedSet { 19 | /// Iterator of expected literals 20 | pub fn tokens<'a>(&'a self) -> impl Iterator + 'a { 21 | self.expected.iter().map(|x| *x) 22 | } 23 | } 24 | 25 | impl Display for ExpectedSet { 26 | fn fmt(&self, fmt: &mut fmt::Formatter) -> Result<(), fmt::Error> { 27 | if self.expected.is_empty() { 28 | write!(fmt, "")?; 29 | } else if self.expected.len() == 1 { 30 | write!(fmt, "{}", self.expected.iter().next().unwrap())?; 31 | } else { 32 | let mut errors = self.tokens().collect::>(); 33 | errors.sort(); 34 | let mut iter = errors.into_iter(); 35 | 36 | write!(fmt, "one of {}", iter.next().unwrap())?; 37 | for elem in iter { 38 | write!(fmt, ", {}", elem)?; 39 | } 40 | } 41 | 42 | Ok(()) 43 | } 44 | } 45 | 46 | /// A parse failure. 47 | #[derive(PartialEq, Eq, Debug, Clone)] 48 | pub struct ParseError { 49 | /// The furthest position the parser reached in the input before failing. 50 | pub location: L, 51 | 52 | /// The set of literals that failed to match at that position. 53 | pub expected: ExpectedSet, 54 | } 55 | 56 | impl Display for ParseError { 57 | fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> ::std::result::Result<(), ::std::fmt::Error> { 58 | write!( 59 | fmt, 60 | "error at {}: expected {}", 61 | self.location, self.expected 62 | ) 63 | } 64 | } 65 | 66 | #[cfg(any(feature = "std", feature = "unstable"))] 67 | impl ::std::error::Error for ParseError { 68 | fn description(&self) -> &str { 69 | "parse error" 70 | } 71 | } 72 | 73 | #[doc(hidden)] 74 | pub struct ErrorState { 75 | /// Furthest failure we've hit so far. 76 | pub max_err_pos: usize, 77 | 78 | /// Are we inside a lookahead/quiet block? If so, failure is disabled. 79 | /// Non-zero => yes, to support nested blocks. 80 | pub suppress_fail: usize, 81 | 82 | /// Are we reparsing after a failure? If so, compute and store expected set of all alternative expectations 83 | /// when we are at offset `max_err_pos`. 84 | pub reparsing_on_error: bool, 85 | 86 | /// The set of tokens we expected to find when we hit the failure. Updated when `reparsing_on_error`. 87 | pub expected: ExpectedSet, 88 | } 89 | 90 | impl ErrorState { 91 | pub fn new(initial_pos: usize) -> Self { 92 | ErrorState { 93 | max_err_pos: initial_pos, 94 | suppress_fail: 0, 95 | reparsing_on_error: false, 96 | expected: ExpectedSet { 97 | expected: BTreeSet::new(), 98 | }, 99 | } 100 | } 101 | 102 | /// Set up for reparsing to record the details of the furthest failure. 103 | pub fn reparse_for_error(&mut self) { 104 | self.suppress_fail = 0; 105 | self.reparsing_on_error = true; 106 | } 107 | 108 | #[inline(never)] 109 | pub fn mark_failure_slow_path(&mut self, pos: usize, expected: &'static str) { 110 | if pos == self.max_err_pos { 111 | self.expected.expected.insert(expected); 112 | } 113 | } 114 | 115 | /// Flag a failure. 116 | #[inline(always)] 117 | pub fn mark_failure(&mut self, pos: usize, expected: &'static str) -> RuleResult<()> { 118 | if self.suppress_fail == 0 { 119 | if self.reparsing_on_error { 120 | self.mark_failure_slow_path(pos, expected); 121 | } else if pos > self.max_err_pos { 122 | self.max_err_pos = pos; 123 | } 124 | } 125 | RuleResult::Failed 126 | } 127 | 128 | pub fn into_parse_error(self, input: &I) -> ParseError { 129 | ParseError { 130 | location: Parse::position_repr(input, self.max_err_pos.into()), 131 | expected: self.expected, 132 | } 133 | } 134 | } 135 | -------------------------------------------------------------------------------- /peg-runtime/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(not(feature = "std"), no_std)] 2 | #![cfg_attr(feature = "unstable", feature(error_in_core))] 3 | 4 | use std::fmt::Display; 5 | 6 | pub mod error; 7 | mod slice; 8 | pub mod str; 9 | 10 | /// The result type used internally in the parser. 11 | /// 12 | /// You'll only need this if implementing the `Parse*` traits for a custom input 13 | /// type, or using the `#{}` syntax to embed a custom Rust snippet within the parser. 14 | /// 15 | /// The public API of a parser adapts errors to `std::result::Result` instead of using this type. 16 | #[derive(Clone, PartialEq, PartialOrd, Eq, Ord, Debug, Hash)] 17 | pub enum RuleResult { 18 | /// Success, with final location 19 | Matched(usize, T), 20 | 21 | /// Failure (furthest failure location is not yet known) 22 | Failed, 23 | } 24 | 25 | /// A type that can be used as input to a parser. 26 | #[allow(clippy::needless_lifetimes)] 27 | pub trait Parse { 28 | type PositionRepr: Display; 29 | fn start<'input>(&'input self) -> usize; 30 | fn is_eof<'input>(&'input self, p: usize) -> bool; 31 | fn position_repr<'input>(&'input self, p: usize) -> Self::PositionRepr; 32 | } 33 | 34 | /// A parser input type supporting the `[...]` syntax. 35 | pub trait ParseElem<'input>: Parse { 36 | /// Type of a single atomic element of the input, for example a character or token 37 | type Element: Copy; 38 | 39 | /// Get the element at `pos`, or `Failed` if past end of input. 40 | fn parse_elem(&'input self, pos: usize) -> RuleResult; 41 | } 42 | 43 | /// A parser input type supporting the `"literal"` syntax. 44 | pub trait ParseLiteral: Parse { 45 | /// Attempt to match the `literal` string at `pos`, returning whether it 46 | /// matched or failed. 47 | fn parse_string_literal(&self, pos: usize, literal: &str) -> RuleResult<()>; 48 | } 49 | 50 | /// A parser input type supporting the `$()` syntax. 51 | pub trait ParseSlice<'input>: Parse { 52 | /// Type of a slice of the input. 53 | type Slice; 54 | 55 | /// Get a slice of input. 56 | fn parse_slice(&'input self, p1: usize, p2: usize) -> Self::Slice; 57 | } 58 | 59 | #[cfg(not(feature = "std"))] 60 | extern crate alloc; 61 | #[cfg(not(feature = "std"))] 62 | extern crate core as std; 63 | 64 | // needed for type inference on the `#{|input, pos| ..}` closure, since there 65 | // are different type inference rules on closures in function args. 66 | #[doc(hidden)] 67 | pub fn call_custom_closure(f: impl FnOnce(I, usize) -> RuleResult, input: I, pos: usize) -> RuleResult { 68 | f(input, pos) 69 | } -------------------------------------------------------------------------------- /peg-runtime/slice.rs: -------------------------------------------------------------------------------- 1 | use super::{Parse, ParseElem, ParseLiteral, ParseSlice, RuleResult}; 2 | 3 | impl Parse for [T] { 4 | type PositionRepr = usize; 5 | #[inline] 6 | fn start(&self) -> usize { 7 | 0 8 | } 9 | 10 | #[inline] 11 | fn is_eof(&self, pos: usize) -> bool { 12 | pos >= self.len() 13 | } 14 | 15 | #[inline] 16 | fn position_repr(&self, pos: usize) -> usize { 17 | pos 18 | } 19 | } 20 | 21 | impl<'input, T: 'input + Copy> ParseElem<'input> for [T] { 22 | type Element = T; 23 | 24 | #[inline] 25 | fn parse_elem(&'input self, pos: usize) -> RuleResult { 26 | match self[pos..].first() { 27 | Some(c) => RuleResult::Matched(pos + 1, *c), 28 | None => RuleResult::Failed, 29 | } 30 | } 31 | } 32 | 33 | impl ParseLiteral for [u8] { 34 | #[inline] 35 | fn parse_string_literal(&self, pos: usize, literal: &str) -> RuleResult<()> { 36 | let l = literal.len(); 37 | if self.len() >= pos + l && &self[pos..pos + l] == literal.as_bytes() { 38 | RuleResult::Matched(pos + l, ()) 39 | } else { 40 | RuleResult::Failed 41 | } 42 | } 43 | } 44 | 45 | impl<'input, T: 'input> ParseSlice<'input> for [T] { 46 | type Slice = &'input [T]; 47 | #[inline] 48 | fn parse_slice(&'input self, p1: usize, p2: usize) -> &'input [T] { 49 | &self[p1..p2] 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /peg-runtime/str.rs: -------------------------------------------------------------------------------- 1 | //! Utilities for `str` input 2 | 3 | use super::{Parse, ParseElem, ParseLiteral, ParseSlice, RuleResult}; 4 | use std::fmt::Display; 5 | 6 | /// Line and column within a string 7 | #[derive(PartialEq, Eq, Debug, Clone, Copy)] 8 | pub struct LineCol { 9 | /// Line (1-indexed) 10 | pub line: usize, 11 | 12 | /// Column (1-indexed) 13 | pub column: usize, 14 | 15 | /// Byte offset from start of string (0-indexed) 16 | pub offset: usize, 17 | } 18 | 19 | impl Display for LineCol { 20 | fn fmt(&self, fmt: &mut ::std::fmt::Formatter) -> ::std::result::Result<(), ::std::fmt::Error> { 21 | write!(fmt, "{}:{}", self.line, self.column) 22 | } 23 | } 24 | 25 | impl Parse for str { 26 | type PositionRepr = LineCol; 27 | #[inline] 28 | fn start(&self) -> usize { 29 | 0 30 | } 31 | 32 | #[inline] 33 | fn is_eof(&self, pos: usize) -> bool { 34 | pos >= self.len() 35 | } 36 | 37 | fn position_repr(&self, pos: usize) -> LineCol { 38 | let before = &self[..pos]; 39 | let line = before.as_bytes().iter().filter(|&&c| c == b'\n').count() + 1; 40 | let column = before.chars().rev().take_while(|&c| c != '\n').count() + 1; 41 | LineCol { 42 | line, 43 | column, 44 | offset: pos, 45 | } 46 | } 47 | } 48 | 49 | impl<'input> ParseElem<'input> for str { 50 | type Element = char; 51 | 52 | #[inline] 53 | fn parse_elem(&'input self, pos: usize) -> RuleResult { 54 | match self[pos..].chars().next() { 55 | Some(c) => RuleResult::Matched(pos + c.len_utf8(), c), 56 | None => RuleResult::Failed, 57 | } 58 | } 59 | } 60 | 61 | impl ParseLiteral for str { 62 | #[inline] 63 | fn parse_string_literal(&self, pos: usize, literal: &str) -> RuleResult<()> { 64 | let l = literal.len(); 65 | if self.len() >= pos + l && &self.as_bytes()[pos..pos + l] == literal.as_bytes() { 66 | RuleResult::Matched(pos + l, ()) 67 | } else { 68 | RuleResult::Failed 69 | } 70 | } 71 | } 72 | 73 | impl<'input> ParseSlice<'input> for str { 74 | type Slice = &'input str; 75 | #[inline] 76 | fn parse_slice(&'input self, p1: usize, p2: usize) -> &'input str { 77 | &self[p1..p2] 78 | } 79 | } 80 | -------------------------------------------------------------------------------- /src/lib.rs: -------------------------------------------------------------------------------- 1 | #![cfg_attr(not(feature = "std"), no_std)] 2 | 3 | //! `rust-peg` is a simple yet flexible parser generator that makes it easy to 4 | //! write robust parsers. Based on the [Parsing Expression 5 | //! Grammar][wikipedia-peg] formalism, it provides a Rust macro that builds a 6 | //! recursive descent parser from a concise definition of the grammar. 7 | //! 8 | //! [wikipedia-peg]: https://en.wikipedia.org/wiki/Parsing_expression_grammar 9 | //! 10 | //! ## Features 11 | //! 12 | //! * Parse input from `&str`, `&[u8]`, `&[T]` or custom types implementing 13 | //! traits 14 | //! * Customizable reporting of parse errors 15 | //! * Rules can accept arguments to create reusable rule templates 16 | //! * Precedence climbing for prefix/postfix/infix expressions 17 | //! * Helpful `rustc` error messages for errors in the grammar definition or the 18 | //! Rust code embedded within it 19 | //! * Rule-level tracing to debug grammars 20 | //! 21 | //! ## Overview 22 | //! 23 | //! The `peg::parser!{}` macro encloses a `grammar NAME() for INPUT_TYPE { ... 24 | //! }` definition containing a set of rules which match components of your 25 | //! language. 26 | //! 27 | //! Rules are defined with `rule NAME(PARAMETERS) -> RETURN_TYPE = PEG_EXPR`. 28 | //! The body of the rule, following the `=`, is a PEG expression, definining how 29 | //! the input is matched to produce a value. 30 | //! 31 | //! PEG expressions are evaluated at a particular position of the input. When an 32 | //! expression matches, it advances the position and optionally returns a value. 33 | //! The expression syntax and behavior is [documented 34 | //! below](#expression-reference). 35 | //! 36 | //! The macro expands to a Rust `mod` containing a function for each rule marked 37 | //! `pub` in the grammar. To parse an input sequence, call one of these 38 | //! functions. The call returns a `Result` carrying either the 39 | //! successfully parsed value returned by the rule, or a `ParseError` containing 40 | //! the failure position and the set of tokens expected there. 41 | //! 42 | //! ## Example 43 | //! 44 | //! Parse a comma-separated list of numbers surrounded by brackets into a `Vec`: 45 | //! 46 | //! ```rust 47 | //! peg::parser!{ 48 | //! grammar list_parser() for str { 49 | //! rule number() -> u32 50 | //! = n:$(['0'..='9']+) {? n.parse().or(Err("u32")) } 51 | //! 52 | //! pub rule list() -> Vec 53 | //! = "[" l:(number() ** ",") "]" { l } 54 | //! } 55 | //! } 56 | //! 57 | //! pub fn main() { 58 | //! assert_eq!(list_parser::list("[1,1,2,3,5,8]"), Ok(vec![1, 1, 2, 3, 5, 8])); 59 | //! } 60 | //! ``` 61 | //! 62 | //! ## Expression Reference 63 | //! 64 | //! ### Atoms 65 | //! 66 | //! * `"keyword"` - _Literal:_ match a literal string. 67 | //! * `['0'..='9']` - _Pattern:_ match a single element that matches a Rust `match`-style 68 | //! pattern. [(details)](#pattern-expressions) 69 | //! * `[^ '0'..='9']` - _Inverted pattern:_ match a single element that does not match a Rust `match`-style 70 | //! pattern. [(details)](#pattern-expressions) 71 | //! * `some_rule()` - _Rule:_ match a rule defined elsewhere in the grammar and return its 72 | //! result. Arguments in the parentheses are Rust expressions. 73 | //! * `_` or `__` or `___` - _Rule (underscore):_ As a special case, rule names 74 | //! consisting of underscores can be defined and invoked without parentheses. These are 75 | //! conventionally used to match whitespace between tokens. 76 | //! * `(e)` - _Parentheses:_ wrap an expression into a group to override 77 | //! normal precedence. Returns the same value as the inner expression. (Use 78 | //! an _Action_ block to set the return value for a sequence). 79 | //! 80 | //! ### Combining 81 | //! 82 | //! * `e1 e2 e3` - _Sequence:_ match expressions in sequence (`e1` followed by `e2` followed by 83 | //! `e3`), ignoring the return values. 84 | //! * `a:e1 e2 b:e3 c:e4 { rust }` - _Action:_ match `e1`, `e2`, `e3`, `e4` in 85 | //! sequence, like above. If they match successfully, run the Rust code in 86 | //! the block and return its return value. The variable names before the 87 | //! colons in the sequence are bound to the results of the 88 | //! corresponding expressions. It is important that the Rust code embedded 89 | //! in the grammar is deterministic and free of side effects, as it may be 90 | //! called multiple times. 91 | //! * `a:e1 b:e2 c:e3 {? rust }` - _Conditional action:_ Like above, but the 92 | //! Rust block returns a `Result` instead of a value directly. On 93 | //! `Ok(v)`, it matches successfully and returns `v`. On `Err(e)`, the match 94 | //! of the entire expression fails and it tries alternatives or reports a 95 | //! parse failure with the `&str` `e`. 96 | //! * `e1 / e2 / e3` - _Ordered choice:_ try to match `e1`. If the match succeeds, return its 97 | //! result, otherwise try `e2`, and so on. 98 | //! 99 | //! ### Repetition 100 | //! * `expression?` - _Optional:_ match zero or one repetitions of `expression`. Returns an 101 | //! `Option`. 102 | //! * `expression*` - _Repeat:_ match zero or more repetitions of `expression` and return the 103 | //! results as a `Vec`. 104 | //! * `expression+` - _One-or-more:_ match one or more repetitions of `expression` and return the 105 | //! results as a `Vec`. 106 | //! * `expression*` - _Range repeat:_ match between `n` and `m` repetitions of `expression` 107 | //! return the results as a `Vec`. [(details)](#repeat-ranges) 108 | //! * `expression ** delim` - _Delimited repeat:_ match zero or more repetitions of `expression` 109 | //! delimited with `delim` and return the results as a `Vec`. 110 | //! * `expression ** delim` - _Delimited repeat (range):_ match between `n` and `m` repetitions of `expression` 111 | //! delimited with `delim` and return the results as a `Vec`. [(details)](#repeat-ranges) 112 | //! * `expression ++ delim` - _Delimited repeat (one or more):_ match one or more repetitions of `expression` 113 | //! delimited with `delim` and return the results as a `Vec`. 114 | //! 115 | //! ### Special 116 | //! * `$(e)` - _Slice:_ match the expression `e`, and return the slice of the input 117 | //! corresponding to the match. 118 | //! * `&e` - _Positive lookahead:_ Match only if `e` matches at this position, 119 | //! without consuming any characters. 120 | //! * `!e` - _Negative lookahead:_ Match only if `e` does not match at this 121 | //! position, without consuming any characters. 122 | //! * `position!()` - return a `usize` representing the current offset into 123 | //! the input without consuming anything. 124 | //! * `quiet!{ e }` - match the expression `e`, but don't report literals within it as "expected" in 125 | //! error messages. 126 | //! * `expected!("something")` - fail to match, and report the specified string as expected 127 | //! at the current location. 128 | //! * `precedence!{ ... }` - Parse infix, prefix, or postfix expressions by precedence climbing. 129 | //! [(details)](#precedence-climbing) 130 | //! * `#{|input, pos| ... }` - _Custom:_ The provided closure is passed the full input and current 131 | //! parse position, and returns a [`RuleResult`]. 132 | //! 133 | //! ## Expression details 134 | //! 135 | //! ### Pattern expressions 136 | //! 137 | //! The `[pat]` syntax expands into a [Rust `match` 138 | //! pattern](https://doc.rust-lang.org/book/ch18-03-pattern-syntax.html) against the next character 139 | //! (or element) of the input. 140 | //! 141 | //! When the pattern begins with `^`, the matching behavior is inverted: 142 | //! the expression succeeds only if the pattern does *not* match. 143 | //! `[^' ']` matches any character other than a space. 144 | //! 145 | //! To match sets of characters, use Rust's `..=` inclusive range pattern 146 | //! syntax and `|` to match multiple patterns. For example `['a'..='z' | 'A'..='Z']` matches an 147 | //! upper or lower case ASCII alphabet character. 148 | //! 149 | //! If your input type is a slice of an enum type, a pattern could match an enum variant like 150 | //! `[Token::Operator('+')]`. 151 | //! 152 | //! Variables captured by the pattern are accessible in a subsequent action 153 | //! block: `[Token::Integer(i)] { i }`. 154 | //! 155 | //! The pattern expression also evaluates to the matched element, which can be 156 | //! captured into a variable or used as the return value of a rule: `c:['+'|'-']`. 157 | //! 158 | //! Like Rust `match`, pattern expressions support guard expressions: 159 | //! `[c if c.is_ascii_digit()]`. 160 | //! 161 | //! `[_]` matches any single element. As this always matches except at end-of-file, combining it 162 | //! with negative lookahead as `![_]` is the idiom for matching EOF in PEG. 163 | //! 164 | //! ### Repeat ranges 165 | //! 166 | //! The repeat operators `*` and `**` can be followed by an optional range specification of the 167 | //! form `` (exact), `` (min-inclusive), `<,m>` (max-inclusive) or `` (range-inclusive), where `n` and `m` are either 168 | //! integers, or a Rust `usize` expression enclosed in `{}`. 169 | //! 170 | //! ### Precedence climbing 171 | //! 172 | //! `precedence!{ rules... }` provides a convenient way to parse infix, prefix, and postfix 173 | //! operators using the [precedence 174 | //! climbing](http://eli.thegreenplace.net/2012/08/02/parsing-expressions-by-precedence-climbing) 175 | //! algorithm. 176 | //! 177 | //! ```rust,no_run 178 | //! # peg::parser!{grammar doc() for str { 179 | //! # pub rule number() -> i64 = "..." { 0 } 180 | //! pub rule arithmetic() -> i64 = precedence!{ 181 | //! x:(@) "+" y:@ { x + y } 182 | //! x:(@) "-" y:@ { x - y } 183 | //! -- 184 | //! x:(@) "*" y:@ { x * y } 185 | //! x:(@) "/" y:@ { x / y } 186 | //! -- 187 | //! x:@ "^" y:(@) { x.pow(y as u32) } 188 | //! -- 189 | //! n:number() { n } 190 | //! "(" e:arithmetic() ")" { e } 191 | //! } 192 | //! # }} 193 | //! # fn main() {} 194 | //! ``` 195 | //! 196 | //! Each `--` introduces a new precedence level that binds more tightly than previous precedence 197 | //! levels. The levels consist of one or more operator rules each followed by a Rust action 198 | //! expression. 199 | //! 200 | //! The `(@)` and `@` are the operands, and the parentheses indicate associativity. An operator 201 | //! rule beginning and ending with `@` is an infix expression. Prefix and postfix rules have one 202 | //! `@` at the beginning or end, and atoms do not include `@`. 203 | //! 204 | //! ## Input types 205 | //! 206 | //! The first line of the grammar declares an input type. This is normally 207 | //! `str`, but `rust-peg` handles input types through a series of traits. The 208 | //! library comes with implementations for `str`, `[u8]`, and `[T]`. Define the 209 | //! traits below to use your own types as input to `peg` grammars: 210 | //! 211 | //! * [`Parse`] is the base trait required for all inputs. The others are only required to use the 212 | //! corresponding expressions. 213 | //! * [`ParseElem`] implements the `[_]` pattern operator, with a method returning the next item of 214 | //! the input to match. 215 | //! * [`ParseLiteral`] implements matching against a `"string"` literal. 216 | //! * [`ParseSlice`] implements the `$()` operator, returning a slice from a span of indexes. 217 | //! 218 | //! As a more complex example, the body of the `peg::parser!{}` macro itself is 219 | //! parsed with `peg`, using a [definition of these traits][gh-flat-token-tree] 220 | //! for a type that wraps Rust's `TokenTree`. 221 | //! 222 | //! [gh-flat-token-tree]: https://github.com/kevinmehall/rust-peg/blob/master/peg-macros/tokens.rs 223 | //! 224 | //! ## End-of-file handling 225 | //! 226 | //! Normally, parsers report an error if the top-level rule matches without consuming all the input. 227 | //! To allow matching a prefix of the input, add the `#[no_eof]` attribute before `pub rule`. 228 | //! Take care to not miss a malformed `x` at the last position if the rule ends with a `x()*` 229 | //! repeat expression. 230 | //! 231 | //! ## Rule parameters 232 | //! 233 | //! Rules can be parameterized with types, lifetimes, and values, just like Rust functions. 234 | //! 235 | //! In addition to Rust values, rules can also accept PEG expression fragments as arguments by using 236 | //! `rule` as a parameter type. When calling such a rule, use `<>` around a PEG expression in the 237 | //! argument list to capture the expression and pass it to the rule. 238 | //! 239 | //! For example: 240 | //! 241 | //! ```rust,no_run 242 | //! # peg::parser!{grammar doc() for str { 243 | //! rule num_radix(radix: u32) -> u32 244 | //! = n:$(['0'..='9']+) {? u32::from_str_radix(n, radix).or(Err("number")) } 245 | //! 246 | //! rule list(x: rule) -> Vec = "[" v:(x() ** ",") ","? "]" {v} 247 | //! 248 | //! pub rule octal_list() -> Vec = list() 249 | //! # }} 250 | //! # fn main() {} 251 | //! ``` 252 | //! 253 | //! ## Failure reporting 254 | //! 255 | //! When a match fails, position information is automatically recorded to report a set of 256 | //! "expected" tokens that would have allowed the parser to advance further. 257 | //! 258 | //! Some rules should never appear in error messages, and can be suppressed with `quiet!{e}`: 259 | //! ```rust,no_run 260 | //! # peg::parser!{grammar doc() for str { 261 | //! rule whitespace() = quiet!{[' ' | '\n' | '\t']+} 262 | //! # }} 263 | //! # fn main() {} 264 | //! ``` 265 | //! 266 | //! If you want the "expected" set to contain a more helpful string instead of character sets, you 267 | //! can use `quiet!{}` and `expected!()` together: 268 | //! 269 | //! ```rust,no_run 270 | //! # peg::parser!{grammar doc() for str { 271 | //! rule identifier() 272 | //! = quiet!{[ 'a'..='z' | 'A'..='Z']['a'..='z' | 'A'..='Z' | '0'..='9' ]*} 273 | //! / expected!("identifier") 274 | //! # }} 275 | //! # fn main() {} 276 | //! ``` 277 | //! 278 | //! ## Imports 279 | //! 280 | //! ```rust,no_run 281 | //! mod ast { 282 | //! pub struct Expr; 283 | //! } 284 | //! 285 | //! peg::parser!{grammar doc() for str { 286 | //! use self::ast::Expr; 287 | //! }} 288 | //! # fn main() {} 289 | //! ``` 290 | //! 291 | //! The grammar may begin with a series of `use` declarations, just like in Rust, which are 292 | //! included in the generated module. Unlike normal `mod {}` blocks, `use super::*` is inserted by 293 | //! default, so you don't have to deal with this most of the time. 294 | //! 295 | //! ## Rustdoc comments 296 | //! 297 | //! `rustdoc` comments with `///` before a `grammar` or `pub rule` are propagated to the resulting 298 | //! module or function: 299 | //! 300 | //! ```rust,no_run 301 | //! # peg::parser!{grammar doc() for str { 302 | //! /// Parse an array expression. 303 | //! pub rule array() -> Vec = "[...]" { vec![] } 304 | //! # }} 305 | //! # fn main() {} 306 | //! ``` 307 | //! 308 | //! As with all procedural macros, non-doc comments are ignored by the lexer and can be used like 309 | //! in any other Rust code. 310 | //! 311 | //! ## Caching and left recursion 312 | //! 313 | //! A `rule` without parameters can be prefixed with `#[cache]` if it is likely 314 | //! to be checked repeatedly in the same position. This memoizes the rule result 315 | //! as a function of input position, in the style of a [packrat 316 | //! parser][wp-peg-packrat]. 317 | //! 318 | //! [wp-peg-packrat]: https://en.wikipedia.org/wiki/Parsing_expression_grammar#Implementing_parsers_from_parsing_expression_grammars 319 | //! 320 | //! However, idiomatic code avoids structures that parse the same input 321 | //! repeatedly, so the use of `#[cache]` is often not a performance win. Simple 322 | //! rules may also be faster to re-match than the additional cost of the hash 323 | //! table lookup and insert. 324 | //! 325 | //! For example, a complex rule called `expr` might benefit from caching if used 326 | //! like `expr() "x" / expr() "y" / expr() "z"`, but this could be rewritten to 327 | //! `expr() ("x" / "y" / "z")` which would be even faster. 328 | //! 329 | //! `#[cache_left_rec]` extends the `#[cache]` mechanism with the ability to resolve 330 | //! left-recursive rules, which are otherwise an error. 331 | //! 332 | //! The `precedence!{}` syntax is another way to handle nested operators and avoid 333 | //! repeatedly matching an expression rule. 334 | //! 335 | //! ## Tracing 336 | //! 337 | //! If you pass the `peg/trace` feature to Cargo when building your project, a 338 | //! trace of the rules attempted and matched will be printed to stdout when 339 | //! parsing. For example, 340 | //! ```sh 341 | //! $ cargo run --features peg/trace 342 | //! ... 343 | //! [PEG_TRACE] Matched rule type at 8:5 344 | //! [PEG_TRACE] Attempting to match rule ident at 8:12 345 | //! [PEG_TRACE] Attempting to match rule letter at 8:12 346 | //! [PEG_TRACE] Failed to match rule letter at 8:12 347 | //! ... 348 | //! ``` 349 | 350 | extern crate peg_macros; 351 | extern crate peg_runtime as runtime; 352 | 353 | pub use peg_macros::parser; 354 | pub use runtime::*; 355 | -------------------------------------------------------------------------------- /tests/compile-fail/cache_with_args.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!(grammar foo() for str { 4 | #[cache] 5 | rule foo(x: u32) = "foo" //~ ERROR 6 | 7 | #[cache] 8 | rule ltarg<'a>() -> &'a str = { "" } //~ ERROR 9 | }); 10 | 11 | fn main() {} -------------------------------------------------------------------------------- /tests/compile-fail/cache_with_args.stderr: -------------------------------------------------------------------------------- 1 | error: rules with generics or parameters cannot use #[cache] or #[cache_left_rec] 2 | --> $DIR/cache_with_args.rs:5:10 3 | | 4 | 5 | rule foo(x: u32) = "foo" //~ ERROR 5 | | ^^^ 6 | 7 | error: rules with generics or parameters cannot use #[cache] or #[cache_left_rec] 8 | --> $DIR/cache_with_args.rs:8:10 9 | | 10 | 8 | rule ltarg<'a>() -> &'a str = { "" } //~ ERROR 11 | | ^^^^^ 12 | -------------------------------------------------------------------------------- /tests/compile-fail/duplicate_rule.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!(grammar foo() for str { //~ ERROR the name `__parse_foo` is defined multiple times [E0428] 4 | rule foo() = "foo" 5 | 6 | rule foo() = "xyz" //~ ERROR duplicate rule `foo` 7 | }); 8 | 9 | fn main() {} -------------------------------------------------------------------------------- /tests/compile-fail/duplicate_rule.stderr: -------------------------------------------------------------------------------- 1 | error: duplicate rule `foo` 2 | --> $DIR/duplicate_rule.rs:6:10 3 | | 4 | 6 | rule foo() = "xyz" //~ ERROR duplicate rule `foo` 5 | | ^^^ 6 | -------------------------------------------------------------------------------- /tests/compile-fail/incomplete_grammar.rs: -------------------------------------------------------------------------------- 1 | 2 | peg::parser!(); 3 | 4 | peg::parser!( 5 | grammar parser() for str 6 | ); 7 | 8 | fn main() {} -------------------------------------------------------------------------------- /tests/compile-fail/incomplete_grammar.stderr: -------------------------------------------------------------------------------- 1 | error: expected one of "#", "grammar", "pub" at end of input 2 | --> tests/compile-fail/incomplete_grammar.rs:2:1 3 | | 4 | 2 | peg::parser!(); 5 | | ^^^^^^^^^^^^^^ 6 | | 7 | = note: this error originates in the macro `peg::parser` (in Nightly builds, run with -Z macro-backtrace for more info) 8 | 9 | error: expected one of "::", "<", "{" at end of input 10 | --> tests/compile-fail/incomplete_grammar.rs:4:1 11 | | 12 | 4 | / peg::parser!( 13 | 5 | | grammar parser() for str 14 | 6 | | ); 15 | | |_^ 16 | | 17 | = note: this error originates in the macro `peg::parser` (in Nightly builds, run with -Z macro-backtrace for more info) 18 | -------------------------------------------------------------------------------- /tests/compile-fail/left_recursion_without_cache.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!(grammar foo() for str { 4 | rule rec() = rec() //~ ERROR left recursive rules create an infinite loop: rec -> rec 5 | 6 | rule foo() 7 | = "foo" foo() 8 | / bar() //~ ERROR left recursive rules create an infinite loop: bar -> foo -> bar 9 | 10 | rule bar() 11 | = "bar" bar() 12 | / foo() //~ ERROR left recursive rules create an infinite loop: foo -> bar -> foo 13 | 14 | rule prec() = precedence! { 15 | prec() { () } //~ ERROR 16 | } 17 | }); 18 | 19 | fn main() {} 20 | -------------------------------------------------------------------------------- /tests/compile-fail/left_recursion_without_cache.stderr: -------------------------------------------------------------------------------- 1 | error: left recursive rules create an infinite loop: rec -> rec 2 | --> $DIR/left_recursion_without_cache.rs:4:18 3 | | 4 | 4 | rule rec() = rec() //~ ERROR left recursive rules create an infinite loop: rec -> rec 5 | | ^^^ 6 | 7 | error: left recursive rules create an infinite loop: foo -> bar -> foo 8 | --> $DIR/left_recursion_without_cache.rs:12:11 9 | | 10 | 12 | / foo() //~ ERROR left recursive rules create an infinite loop: foo -> bar -> foo 11 | | ^^^ 12 | 13 | error: left recursive rules create an infinite loop: bar -> foo -> bar 14 | --> $DIR/left_recursion_without_cache.rs:8:11 15 | | 16 | 8 | / bar() //~ ERROR left recursive rules create an infinite loop: bar -> foo -> bar 17 | | ^^^ 18 | 19 | error: left recursive rules create an infinite loop: prec -> prec 20 | --> $DIR/left_recursion_without_cache.rs:15:9 21 | | 22 | 15 | prec() { () } //~ ERROR 23 | | ^^^^ 24 | -------------------------------------------------------------------------------- /tests/compile-fail/nullable_loop.rs: -------------------------------------------------------------------------------- 1 | peg::parser!(grammar e() for str { 2 | rule nested() = ("a"*)* //~ ERROR 3 | 4 | rule nested_ok() = ("a"+)* 5 | 6 | rule nullable() = "x"? 7 | 8 | rule call() = "foo" nullable()* //~ ERROR 9 | 10 | rule more_complex() = ("x" / "a"? "b"?)*<2,> //~ ERROR 11 | }); 12 | 13 | fn main() {} 14 | -------------------------------------------------------------------------------- /tests/compile-fail/nullable_loop.stderr: -------------------------------------------------------------------------------- 1 | error: loops infinitely because loop body can match without consuming input 2 | --> $DIR/nullable_loop.rs:2:27 3 | | 4 | 2 | rule nested() = ("a"*)* //~ ERROR 5 | | ^ 6 | 7 | error: loops infinitely because loop body can match without consuming input 8 | --> $DIR/nullable_loop.rs:8:35 9 | | 10 | 8 | rule call() = "foo" nullable()* //~ ERROR 11 | | ^ 12 | 13 | error: loops infinitely because loop body can match without consuming input 14 | --> $DIR/nullable_loop.rs:10:44 15 | | 16 | 10 | rule more_complex() = ("x" / "a"? "b"?)*<2,> //~ ERROR 17 | | ^ 18 | -------------------------------------------------------------------------------- /tests/compile-fail/rule_args_errors.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!(grammar foo() for str { 4 | rule foo(x: i32, y: rule<()>) = "foo" 5 | rule ok() = foo(1, <[_] {}>) 6 | 7 | rule too_few() = foo(1) //~ ERROR 8 | rule too_many() = foo(1, <[_] {}>, 2) //~ ERROR 9 | 10 | pub rule pub_rule_arg(x: rule<()>) = "foo" //~ ERROR 11 | }); 12 | 13 | fn main() {} -------------------------------------------------------------------------------- /tests/compile-fail/rule_args_errors.stderr: -------------------------------------------------------------------------------- 1 | error: this rule takes 2 parameters but 1 parameters were supplied 2 | --> $DIR/rule_args_errors.rs:7:22 3 | | 4 | 7 | rule too_few() = foo(1) //~ ERROR 5 | | ^^^ 6 | 7 | error: this rule takes 2 parameters but 3 parameters were supplied 8 | --> $DIR/rule_args_errors.rs:8:23 9 | | 10 | 8 | rule too_many() = foo(1, <[_] {}>, 2) //~ ERROR 11 | | ^^^ 12 | 13 | error: parameters on `pub rule` must be Rust types 14 | --> $DIR/rule_args_errors.rs:10:27 15 | | 16 | 10 | pub rule pub_rule_arg(x: rule<()>) = "foo" //~ ERROR 17 | | ^ 18 | -------------------------------------------------------------------------------- /tests/compile-fail/rust_action_syntax_error.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!(grammar foo() for str { 4 | rule foo() = { + } //~ ERROR expected expression, found `+` 5 | }); 6 | 7 | fn main() {} 8 | -------------------------------------------------------------------------------- /tests/compile-fail/rust_action_syntax_error.stderr: -------------------------------------------------------------------------------- 1 | error: expected expression, found `+` 2 | --> $DIR/rust_action_syntax_error.rs:4:20 3 | | 4 | 4 | rule foo() = { + } //~ ERROR expected expression, found `+` 5 | | ^ expected expression 6 | -------------------------------------------------------------------------------- /tests/compile-fail/rust_action_type_error.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | struct X; 4 | struct Y; 5 | 6 | peg::parser!(grammar foo() for str { 7 | rule foo() -> X = "a" { Y } //~ ERROR 8 | }); 9 | 10 | fn main() {} 11 | -------------------------------------------------------------------------------- /tests/compile-fail/rust_action_type_error.stderr: -------------------------------------------------------------------------------- 1 | error[E0308]: mismatched types 2 | --> tests/compile-fail/rust_action_type_error.rs:7:27 3 | | 4 | 7 | rule foo() -> X = "a" { Y } //~ ERROR 5 | | ^^^^^ 6 | | | 7 | | expected struct `X`, found struct `Y` 8 | | arguments to this enum variant are incorrect 9 | | 10 | help: the type constructed contains `Y` due to the type of the argument passed 11 | --> tests/compile-fail/rust_action_type_error.rs:7:27 12 | | 13 | 7 | rule foo() -> X = "a" { Y } //~ ERROR 14 | | ^^^^^ this argument influences the type of `{{root}}` 15 | note: tuple variant defined here 16 | --> peg-runtime/lib.rs 17 | | 18 | | Matched(usize, T), 19 | | ^^^^^^^ 20 | = note: this error originates in the macro `peg::parser` (in Nightly builds, run with -Z macro-backtrace for more info) 21 | -------------------------------------------------------------------------------- /tests/compile-fail/syntax_error.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!(grammar foo() for str { 4 | fn asdf() {} //~ ERROR expected one of "#", "crate", "pub", "rule", "use", "}" 5 | }); 6 | 7 | fn main() {} 8 | -------------------------------------------------------------------------------- /tests/compile-fail/syntax_error.stderr: -------------------------------------------------------------------------------- 1 | error: expected one of "#", "pub", "rule", "use", "}" 2 | --> tests/compile-fail/syntax_error.rs:4:5 3 | | 4 | 4 | fn asdf() {} //~ ERROR expected one of "#", "crate", "pub", "rule", "use", "}" 5 | | ^^ 6 | -------------------------------------------------------------------------------- /tests/compile-fail/use_undefined_result.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!(grammar foo() for str { 4 | rule foo() = "asdf" 5 | 6 | rule bar() -> u32 = x:foo() { 0 } //~ ERROR using result of rule `foo`, which does not return a value 7 | }); 8 | 9 | fn main() {} 10 | -------------------------------------------------------------------------------- /tests/compile-fail/use_undefined_result.stderr: -------------------------------------------------------------------------------- 1 | error: using result of rule `foo`, which does not return a value 2 | --> $DIR/use_undefined_result.rs:6:27 3 | | 4 | 6 | rule bar() -> u32 = x:foo() { 0 } //~ ERROR using result of rule `foo`, which does not return a value 5 | | ^^^ 6 | -------------------------------------------------------------------------------- /tests/compile-fail/use_undefined_rule.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!(grammar foo() for str { 4 | rule bar() = foo() //~ ERROR undefined rule `foo` 5 | }); 6 | 7 | fn main() {} 8 | -------------------------------------------------------------------------------- /tests/compile-fail/use_undefined_rule.stderr: -------------------------------------------------------------------------------- 1 | error: undefined rule `foo` 2 | --> $DIR/use_undefined_rule.rs:4:18 3 | | 4 | 4 | rule bar() = foo() //~ ERROR undefined rule `foo` 5 | | ^^^ 6 | -------------------------------------------------------------------------------- /tests/run-pass/arithmetic.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | use arithmetic::expression; 3 | 4 | peg::parser!( grammar arithmetic() for str { 5 | pub rule expression() -> i64 6 | = sum() 7 | 8 | rule sum() -> i64 9 | = l:product() "+" r:product() { l+r } 10 | / product() 11 | 12 | rule product() -> i64 13 | = l:atom() "*" r:atom() { l*r } 14 | / atom() 15 | 16 | rule atom() -> i64 17 | = number() 18 | / "(" v:sum() ")" { v } 19 | 20 | rule number() -> i64 21 | = n:$(['0'..='9']+) { n.parse().unwrap() } 22 | }); 23 | 24 | fn main() { 25 | assert_eq!(expression("1+1"), Ok(2)); 26 | assert_eq!(expression("5*5"), Ok(25)); 27 | assert_eq!(expression("222+3333"), Ok(3555)); 28 | assert_eq!(expression("2+3*4"), Ok(14)); 29 | assert_eq!(expression("(2+2)*3"), Ok(12)); 30 | assert!(expression("(22+)+1").is_err()); 31 | assert!(expression("1++1").is_err()); 32 | assert!(expression("3)+1").is_err()); 33 | } 34 | -------------------------------------------------------------------------------- /tests/run-pass/arithmetic_ast.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | use peg::parser; 3 | 4 | #[derive(Clone, PartialEq, Eq, Debug)] 5 | pub enum Expression { 6 | Number(i64), 7 | Sum(Box, Box), 8 | Product(Box, Box), 9 | } 10 | 11 | parser!{ 12 | /// Doc comment 13 | grammar arithmetic() for str { 14 | /// Top level parser rule 15 | /// This doc comment has multiple lines to test support for that as well 16 | pub rule expression() -> Expression 17 | = sum() 18 | 19 | rule _ = [' ' | '\n']* 20 | 21 | rule sum() -> Expression 22 | = l:product() _ "+" _ r:product() { Expression::Sum(Box::new(l), Box::new(r)) } 23 | / product() 24 | 25 | rule product() -> Expression 26 | = l:atom() _ "*" _ r:atom() { Expression::Product(Box::new(l), Box::new(r)) } 27 | / atom() 28 | 29 | rule atom() -> Expression 30 | = number() 31 | / "(" _ v:sum() _ ")" { v } 32 | 33 | rule number() -> Expression 34 | = n:$(['0'..='9']+) { Expression::Number(n.parse().unwrap()) } 35 | }} 36 | 37 | fn main() { 38 | assert_eq!(arithmetic::expression("1+1"), Ok(Expression::Sum( 39 | Box::new(Expression::Number(1)), 40 | Box::new(Expression::Number(1))) 41 | )); 42 | assert_eq!(arithmetic::expression("5*5"), Ok(Expression::Product( 43 | Box::new(Expression::Number(5)), 44 | Box::new(Expression::Number(5))) 45 | )); 46 | assert_eq!(arithmetic::expression("2+3*4"), Ok(Expression::Sum( 47 | Box::new(Expression::Number(2)), 48 | Box::new(Expression::Product( 49 | Box::new(Expression::Number(3)), 50 | Box::new(Expression::Number(4)) 51 | )), 52 | ))); 53 | assert_eq!(arithmetic::expression("(2+3) * 4"), Ok(Expression::Product( 54 | Box::new(Expression::Sum( 55 | Box::new(Expression::Number(2)), 56 | Box::new(Expression::Number(3)), 57 | )), 58 | Box::new(Expression::Number(4)) 59 | ))); 60 | assert!(arithmetic::expression("(22+)+1").is_err()); 61 | assert!(arithmetic::expression("1++1").is_err()); 62 | assert!(arithmetic::expression("3)+1").is_err()); 63 | } 64 | -------------------------------------------------------------------------------- /tests/run-pass/arithmetic_infix.rs: -------------------------------------------------------------------------------- 1 | 2 | extern crate peg; 3 | 4 | peg::parser!( grammar arithmetic() for str { 5 | rule number() -> i64 6 | = n:$(['0'..='9']+) { n.parse().unwrap() } 7 | 8 | pub(crate) rule calculate() -> i64 = precedence!{ 9 | x:(@) "+" y:@ { x + y } 10 | x:(@) "-" y:@ { x - y } 11 | "-" v:@ { - v } 12 | -- 13 | x:(@) "*" y:@ { x * y } 14 | x:(@) "/" y:@ { x / y } 15 | -- 16 | x:@ "^" y:(@) { x.pow(y as u32) } 17 | v:@ "!" { (1..v+1).product() } 18 | -- 19 | "(" v:calculate() ")" { v } 20 | n:number() {n} 21 | } 22 | }); 23 | 24 | fn main() { 25 | assert_eq!(arithmetic::calculate("3+3*3+3"), Ok(15)); 26 | assert_eq!(arithmetic::calculate("2+2^2^2^2/2+2"), Ok(32772)); 27 | assert_eq!(arithmetic::calculate("1024/2/2/2+1"), Ok(129)); 28 | assert_eq!(arithmetic::calculate("1024/(1+1)/2/2+1"), Ok(129)); 29 | assert_eq!(arithmetic::calculate("-1-2*-2"), Ok(3)); 30 | assert_eq!(arithmetic::calculate("1+3!+1"), Ok(8)); 31 | } -------------------------------------------------------------------------------- /tests/run-pass/arithmetic_infix_ast.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!( grammar arithmetic() for str { 4 | rule ident() -> &'input str = $(['a'..='z']+) 5 | rule haskell_op() -> String = "`" i:ident() "`" [' '|'\n']* { i.to_owned() } 6 | rule plus() = "+" [' '|'\n']* 7 | 8 | pub rule expression() -> InfixAst = precedence!{ 9 | x:(@) plus() y:@ { InfixAst::Add(Box::new(x), Box::new(y)) } 10 | -- 11 | x:(@) op:haskell_op() y:@ { InfixAst::Op(op, Box::new(x), Box::new(y)) } 12 | -- 13 | i:ident() [' '|'\n']* { InfixAst::Ident(i.to_owned()) } 14 | } 15 | }); 16 | 17 | #[derive(Debug, PartialEq, Eq, Clone)] 18 | pub enum InfixAst { 19 | Ident(String), 20 | Add(Box, Box), 21 | Op(String, Box, Box) 22 | } 23 | 24 | fn main(){ 25 | assert_eq!(arithmetic::expression("a + b `x` c").unwrap(), 26 | InfixAst::Add( 27 | Box::new(InfixAst::Ident("a".to_owned())), 28 | Box::new(InfixAst::Op("x".to_owned(), 29 | Box::new(InfixAst::Ident("b".to_owned())), 30 | Box::new(InfixAst::Ident("c".to_owned())) 31 | )) 32 | ) 33 | ) 34 | } -------------------------------------------------------------------------------- /tests/run-pass/arithmetic_infix_ast_span.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!( grammar arithmetic() for str { 4 | rule ident() -> &'input str = $(['a'..='z']+) 5 | 6 | pub rule expression() -> Node = precedence!{ 7 | start:position!() node:@ end:position!() { Node { start, node, end} } 8 | -- 9 | x:(@) "+" y:@ { Op::Add(Box::new(x), Box::new(y)) } 10 | -- 11 | x:(@) "*" y:@ { Op::Mul(Box::new(x), Box::new(y)) } 12 | -- 13 | i:ident() [' '|'\n']* { Op::Ident(i.to_owned()) } 14 | } 15 | }); 16 | 17 | #[derive(Debug, PartialEq, Eq, Clone)] 18 | pub struct Node { 19 | node: Op, 20 | start: usize, 21 | end: usize, 22 | } 23 | 24 | #[derive(Debug, PartialEq, Eq, Clone)] 25 | pub enum Op { 26 | Ident(String), 27 | Add(Box, Box), 28 | Mul(Box, Box), 29 | } 30 | 31 | fn main(){ 32 | assert_eq!(arithmetic::expression("a+b*c").unwrap(), 33 | Node { 34 | start: 0, 35 | end: 5, 36 | node: Op::Add( 37 | Box::new(Node { 38 | start: 0, 39 | end: 1, 40 | node: Op::Ident("a".into()) 41 | }), 42 | Box::new(Node { 43 | start: 2, 44 | end: 5, 45 | node: Op::Mul( 46 | Box::new(Node { 47 | start: 2, 48 | end: 3, 49 | node: Op::Ident("b".into()) 50 | }), 51 | Box::new(Node { 52 | start: 4, 53 | end: 5, 54 | node: Op::Ident("c".into()) 55 | }) 56 | ) 57 | }) 58 | ) 59 | } 60 | ); 61 | } -------------------------------------------------------------------------------- /tests/run-pass/arithmetic_with_left_recursion.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | use arithmetic::sum; 4 | 5 | peg::parser!( grammar arithmetic() for str { 6 | #[cache_left_rec] 7 | pub rule sum() -> i64 8 | = l:sum() "+" r:number() { l+r } 9 | / number() 10 | 11 | rule number() -> i64 12 | = n:$(['0'..='9']+) { n.parse().unwrap() } 13 | }); 14 | 15 | fn main() { 16 | assert_eq!(sum("1"), Ok(1)); 17 | assert_eq!(sum("1+1"), Ok(2)); 18 | assert_eq!(sum("1+1+1"), Ok(3)); 19 | assert_eq!(sum("1+2+3"), Ok(6)); 20 | } 21 | -------------------------------------------------------------------------------- /tests/run-pass/assembly_ast_dyn_type_param_bounds.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | use peg::parser; 3 | 4 | // C++ in Rust 5 | trait Operation<'a>: std::fmt::Debug {} 6 | trait Operand<'a>: std::fmt::Debug + AsDynOperand<'a> {} 7 | trait Location<'a>: Operand<'a> {} 8 | impl<'a, T: ?Sized + Location<'a>> Operand<'a> for T {} 9 | 10 | // Thanks to quinedot for their comprehensive write-up on dyn Traits. 11 | // https://quinedot.github.io/rust-learning/dyn-trait-combining.html#manual-supertrait-upcasting 12 | trait AsDynOperand<'a> { 13 | fn as_dyn_operand(self: Box) -> Box + 'a>; 14 | } 15 | 16 | impl<'a, T: /* Sized + */ Operand<'a> + 'a> AsDynOperand<'a> for T { 17 | fn as_dyn_operand(self: Box) -> Box + 'a> { 18 | self 19 | } 20 | } 21 | 22 | 23 | 24 | #[derive(Debug)] 25 | pub struct Program<'a>(Vec + 'a>>); 26 | 27 | #[derive(Debug)] 28 | struct Add<'a> { 29 | result: Box + 'a>, 30 | lhs: Box + 'a>, 31 | rhs: Box + 'a>, 32 | } 33 | impl<'a> Operation<'a> for Add<'a> {} 34 | 35 | #[derive(Debug)] 36 | struct Sub<'a> { 37 | result: Box + 'a>, 38 | lhs: Box + 'a>, 39 | rhs: Box + 'a>, 40 | } 41 | impl<'a> Operation<'a> for Sub<'a> {} 42 | 43 | #[derive(Debug)] 44 | struct Register<'a>(&'a str); 45 | impl<'a> Location<'a> for Register<'a> {} 46 | 47 | #[derive(Debug)] 48 | struct Global<'a>(&'a str); 49 | impl<'a> Location<'a> for Global<'a> {} 50 | 51 | #[derive(Debug)] 52 | struct Literal(i32); 53 | impl<'a> Operand<'a> for Literal {} 54 | 55 | parser!{ 56 | grammar assembly() for str { 57 | pub rule program() -> Program<'input> 58 | = op:operation() ** "\n" { Program(op) } 59 | 60 | rule _ = [' ']* 61 | 62 | rule operation() -> Box + 'input> 63 | = a:add() {a} / s:sub() {s} 64 | 65 | rule add() -> Box> 66 | = result:location() _ "=" _ "add" _ lhs:operand() _ rhs:operand() { Box::new(Add{ result, lhs, rhs }) } 67 | 68 | rule sub() -> Box> 69 | = result:location() _ "=" _ "sub" _ lhs:operand() _ rhs:operand() { Box::new(Sub{ result, lhs, rhs }) } 70 | 71 | rule location() -> Box + 'input> 72 | = r:register() {r} / g:global() {g} 73 | 74 | rule register() -> Box> 75 | = "%" _ id:identifier() { Box::new(Register(id)) } 76 | 77 | rule global() -> Box> 78 | = "@" _ id:identifier() { Box::new(Global(id)) } 79 | 80 | rule identifier() -> &'input str 81 | = $(['a'..='z' | 'A'..='Z']+) 82 | 83 | rule operand() -> Box + 'input> 84 | = l:location() {l.as_dyn_operand()} / l:literal() {Box::new(l)} 85 | 86 | rule literal() -> Literal 87 | = n:$(['0'..='9']+) {? 88 | let n = n.parse::().map_err(|_| "invalid int literal")?; 89 | Ok(Literal(n)) 90 | } 91 | 92 | }} 93 | 94 | fn main() { 95 | let parsed = assembly::program("%apple = add 1 @g 96 | @b = add 2 %a 97 | %c = sub 82 @b 98 | @dog = sub @b 12").unwrap(); 99 | let expected = Program(vec![ 100 | Box::new(Add{ 101 | result: Box::new(Register("apple")), 102 | lhs: Box::new(Literal(1)), 103 | rhs: Box::new(Global("g")) 104 | }), 105 | Box::new(Add{ 106 | result: Box::new(Global("b")), 107 | lhs: Box::new(Literal(2)), 108 | rhs: Box::new(Register("a")) 109 | }), 110 | Box::new(Sub{ 111 | result: Box::new(Register("c")), 112 | lhs: Box::new(Literal(82)), 113 | rhs: Box::new(Global("b")) 114 | }), 115 | Box::new(Sub{ 116 | result: Box::new(Global("dog")), 117 | lhs: Box::new(Global("b")), 118 | rhs: Box::new(Literal(12)) 119 | }), 120 | ]); 121 | assert_eq!(format!("{parsed:?}"), format!("{expected:?}")); 122 | } 123 | -------------------------------------------------------------------------------- /tests/run-pass/borrow_from_input.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!(grammar borrows() for str { 4 | use std::borrow::{ToOwned, Cow}; 5 | 6 | pub rule borrowed() -> &'input str 7 | = $(['a'..='z']+) 8 | 9 | pub rule lifetime_parameter() -> Cow<'input, str> 10 | = x:$(['a'..='z']+) { x.into() } 11 | / "COW" { "cow".to_owned().into() } 12 | }); 13 | 14 | use self::borrows::*; 15 | 16 | fn main() { 17 | assert_eq!(borrowed("abcd"), Ok("abcd")); 18 | assert_eq!(&*lifetime_parameter("abcd").unwrap(), "abcd"); 19 | assert_eq!(&*lifetime_parameter("COW").unwrap(), "cow"); 20 | } -------------------------------------------------------------------------------- /tests/run-pass/bytes.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | use peg::parser; 3 | 4 | parser!{ 5 | grammar byteparser() for [u8] { 6 | pub rule commands() -> Vec<&'input[u8]> = command()* 7 | rule command() -> &'input [u8] = ">" val:$([b' ' ..= b'~']+) [0] { val } 8 | } 9 | } 10 | 11 | fn main() { 12 | assert_eq!(byteparser::commands(b">asdf\0>xyz\0"), Ok(vec![&b"asdf"[..], &b"xyz"[..]])); 13 | } 14 | -------------------------------------------------------------------------------- /tests/run-pass/conditional_block.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!( grammar parse() for str { 4 | 5 | pub rule dec_byte() -> u8 6 | = match_str:$(['0'..='9']*<,3>) {? 7 | let val: u64 = match_str.parse().unwrap(); 8 | 9 | // only let this rule match if the value is in range 0..255 10 | if val <= 255 { 11 | Ok(val as u8) 12 | } else { 13 | // the message explains what the rule expected and is used in the parse error 14 | Err("decimal byte") 15 | } 16 | } 17 | 18 | rule tag() -> &'input str 19 | = $(['a'..='z']+) 20 | 21 | pub rule xml() 22 | = "<" open:tag() ">" xml()* "" {? 23 | if open == close { 24 | Ok(()) 25 | } else { 26 | // TODO this has to be a `&'static str`, so we can't use a dynamic string 27 | Err("matching close tag") 28 | } 29 | } 30 | 31 | pub rule return_early() -> i32 = vs:$([_]+) {? 32 | let v = vs.parse::().map_err(|_| "number")?; 33 | if v > 100 { 34 | return Err("smaller number"); 35 | } 36 | Ok(v) 37 | } 38 | }); 39 | 40 | fn main() { 41 | assert_eq!(parse::dec_byte("0"), Ok(0)); 42 | assert_eq!(parse::dec_byte("255"), Ok(255)); 43 | assert_eq!(parse::dec_byte("1"), Ok(1)); 44 | assert!(parse::dec_byte("256").is_err()); 45 | assert!(parse::dec_byte("1234").is_err()); 46 | 47 | assert!(parse::xml("").is_ok()); 48 | assert!(parse::xml("").is_ok()); 49 | assert!(parse::xml("").is_err()); 50 | assert!(parse::xml("").is_err()); 51 | 52 | assert!(parse::return_early("a").unwrap_err().expected.tokens().any(|e| e == "number")); 53 | assert!(parse::return_early("123").unwrap_err().expected.tokens().any(|e| e == "smaller number")); 54 | assert_eq!(parse::return_early("99").unwrap(), 99); 55 | } 56 | -------------------------------------------------------------------------------- /tests/run-pass/crate_import.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!{ 4 | pub grammar foo_parser() for str { 5 | use crate::types::Foo; 6 | 7 | pub rule foo() -> Foo 8 | = "foo" { Foo } 9 | } 10 | } 11 | 12 | mod types { 13 | #[derive(PartialEq, Debug)] 14 | pub struct Foo; 15 | } 16 | 17 | 18 | fn main() { 19 | assert_eq!(foo_parser::foo("foo"), Ok(crate::types::Foo)); 20 | } 21 | -------------------------------------------------------------------------------- /tests/run-pass/custom_expr.rs: -------------------------------------------------------------------------------- 1 | use peg::RuleResult; 2 | 3 | peg::parser!( grammar test() for str { 4 | rule position() -> usize = #{|input, pos| RuleResult::Matched(pos, pos)} 5 | pub rule test1() -> usize = ['a']* p1:position() ['b']* { p1 } 6 | 7 | pub rule fail() -> usize = #{|input, pos| RuleResult::Failed} 8 | 9 | rule custom_literal(literal: &str) = #{|input, pos| { 10 | let l = literal.len(); 11 | if input.len() >= pos + l && &input.as_bytes()[pos..pos + l] == literal.as_bytes() { 12 | RuleResult::Matched(pos + l, ()) 13 | } else { 14 | RuleResult::Failed 15 | } 16 | }} 17 | pub rule test2() = custom_literal("foo") "_" custom_literal("bar") 18 | }); 19 | 20 | fn main() { 21 | assert_eq!(test::test1("aaaabb"), Ok(4)); 22 | assert_eq!(test::fail("aaaabb").unwrap_err().location.offset, 0); 23 | 24 | assert_eq!(test::test2("foo_bar"), Ok(())); 25 | } 26 | -------------------------------------------------------------------------------- /tests/run-pass/errors.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!{ grammar parser() for str { 4 | pub rule one_letter() = ['a'..='z'] 5 | 6 | pub rule parse() -> usize 7 | = v:( "a" / "\n" )* { v.len() } 8 | 9 | pub rule error_pos() = ("a" / "\n" / "\r")* 10 | 11 | pub rule q() = (quiet!{ 12 | ("a" / "b" / "c") ("1" / "2") 13 | } / expected!("letter followed by number"))+ 14 | 15 | pub rule var(s: &'static str) = expected!(s) 16 | }} 17 | 18 | fn main() { 19 | // errors at eof 20 | assert_eq!(parser::one_letter("t"), Ok(())); 21 | 22 | let err = parser::one_letter("tt").unwrap_err(); 23 | assert_eq!(err.location.line, 1); 24 | assert_eq!(err.location.column, 2); 25 | assert_eq!(err.location.offset, 1); 26 | assert_eq!(format!("{}", err.expected), "EOF"); 27 | 28 | // expected character set 29 | let err = parser::parse(r#" 30 | aaaa 31 | aaaaaa 32 | aaaabaaaa 33 | "#).unwrap_err(); 34 | 35 | assert_eq!(err.location.line, 4); 36 | assert_eq!(err.location.column, 5); 37 | assert_eq!(err.location.offset, 17); 38 | assert_eq!(format!("{}", err.expected), r#"one of "\n", "a", EOF"#); 39 | 40 | // error position reporting 41 | let err = parser::error_pos("aab\n").unwrap_err(); 42 | assert_eq!(err.location.line, 1); 43 | assert_eq!(err.location.column, 3); 44 | assert_eq!(err.location.offset, 2); 45 | assert_eq!(err.expected.to_string(), r#"one of "\n", "\r", "a", EOF"#); 46 | 47 | let err = parser::error_pos("aa\naaaa\nbaaa\n").unwrap_err(); 48 | assert_eq!(err.location.line, 3); 49 | assert_eq!(err.location.column, 1); 50 | 51 | let err = parser::error_pos("aa\naaaa\naaab\naa").unwrap_err(); 52 | assert_eq!(err.location.line, 3); 53 | assert_eq!(err.location.column, 4); 54 | 55 | let err = parser::error_pos("aa\r\naaaa\r\naaab\r\naa").unwrap_err(); 56 | assert_eq!(err.location.line, 3); 57 | assert_eq!(err.location.column, 4); 58 | 59 | parser::q("a1").unwrap(); 60 | parser::q("a1b2").unwrap(); 61 | let err = parser::q("a1bb").unwrap_err(); 62 | assert_eq!(err.location.offset, 2); 63 | assert_eq!(err.expected.to_string(), "one of EOF, letter followed by number"); 64 | 65 | let err = parser::var("", "asdf").unwrap_err(); 66 | assert_eq!(err.expected.to_string(), "asdf"); 67 | 68 | } 69 | -------------------------------------------------------------------------------- /tests/run-pass/generic_fn_traits.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!( 4 | grammar parser() for str { 5 | pub rule foo u32 + Copy>(f: F) -> u32 6 | = s:$(['0'..='9']+) { f(s) } 7 | pub rule bar(f: impl Fn(&str) -> u32 + Copy,) -> u32 8 | = s:$(['0'..='9']+) { f(s) } 9 | pub rule baz(f: fn(&str) -> u32) -> u32 10 | = s:$(['0'..='9']+) { f(s) } 11 | } 12 | ); 13 | 14 | fn main() { 15 | let n = parser::foo("123", |s| s.parse().unwrap()).unwrap(); 16 | assert_eq!(n, 123); 17 | let n = parser::bar("123", |s| s.parse().unwrap()).unwrap(); 18 | assert_eq!(n, 123); 19 | let n = parser::baz("123", |s| s.parse().unwrap()).unwrap(); 20 | assert_eq!(n, 123); 21 | } 22 | -------------------------------------------------------------------------------- /tests/run-pass/grammar_with_args_and_cache.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser! { 4 | grammar lol(config: bool) for str { 5 | #[cache_left_rec] 6 | rule one() -> () 7 | = one() / "foo" 8 | } 9 | } 10 | 11 | fn main() {} 12 | -------------------------------------------------------------------------------- /tests/run-pass/keyval.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | use std::collections::HashMap; 3 | 4 | peg::parser!( grammar keyval() for str { 5 | rule number() -> i64 6 | = n:$(['0'..='9']+) { n.parse().unwrap() } 7 | 8 | pub rule keyvals() -> HashMap 9 | = kvs:keyval() ++ "\n" { 10 | kvs.iter().cloned().collect::>() 11 | } 12 | 13 | rule keyval() -> (i64, i64) 14 | = k:number() ":" + v:number() { (k, v) } 15 | }); 16 | 17 | fn main() { 18 | let mut expected = HashMap::new(); 19 | expected.insert(1, 3); 20 | expected.insert(2, 4); 21 | assert_eq!(keyval::keyvals("1:3\n2:4"), Ok(expected)); 22 | } 23 | -------------------------------------------------------------------------------- /tests/run-pass/lifetimes.rs: -------------------------------------------------------------------------------- 1 | #[derive(Copy, Clone)] 2 | pub struct Token<'text>(&'text str); 3 | 4 | peg::parser!{ 5 | grammar tokenparser<'t>() for [Token<'t>] { 6 | pub rule program() -> Vec<&'t str> = list() 7 | 8 | // add this indirection to ensure that rule args work with a global lifetime 9 | rule commasep(x: rule) -> Vec = v:(x() ** [Token(",")]) [Token(",")]? { v } 10 | 11 | rule list() -> Vec<&'t str> = [Token("(")] l:commasep() [Token(")")] { l } 12 | rule string() -> &'t str = [Token(inner)] { inner } 13 | 14 | #[cache] 15 | rule cached() -> Token<'t> = [a] { a } 16 | } 17 | } 18 | 19 | peg::parser!{ 20 | grammar unused_args<'a>() for () { } 21 | } 22 | 23 | fn main() { 24 | let input = "(one,two)"; 25 | assert_eq!( 26 | tokenparser::program( 27 | &[Token(&input[0..1]), Token(&input[1..4]), Token(&input[4..5]), Token(&input[5..8]), Token(&input[8..9])], 28 | ), 29 | Ok(vec!["one", "two"]) 30 | ); 31 | } 32 | -------------------------------------------------------------------------------- /tests/run-pass/memoization.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!{ grammar memo() for str { 4 | #[cache] 5 | rule r() -> &'input str 6 | = s:$(['a'..='z']+) { s } 7 | 8 | pub rule parse() 9 | = r() "+" r() { () } 10 | / r() " " r() { () } 11 | }} 12 | 13 | fn main() { 14 | assert_eq!(memo::parse("abc zzz"), Ok(())); 15 | } 16 | -------------------------------------------------------------------------------- /tests/run-pass/no_eof.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | use peg::parser; 3 | 4 | parser!{ 5 | pub grammar g() for [u8] { 6 | #[no_eof] 7 | pub rule foo() = "foo" 8 | } 9 | } 10 | 11 | fn main() { 12 | assert_eq!(g::foo(b"foobar"), Ok(())); 13 | } 14 | -------------------------------------------------------------------------------- /tests/run-pass/optional.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!( grammar test_grammar() for str { 4 | pub rule options() -> Option<()> 5 | = "abc" v:"def"? {v} 6 | 7 | pub rule option_unused_result() = "a"? / "b" 8 | }); 9 | 10 | use self::test_grammar::*; 11 | 12 | fn main() { 13 | assert_eq!(options("abc"), Ok(None)); 14 | assert_eq!(options("abcdef"), Ok(Some(()))); 15 | assert!(options("def").is_err()); 16 | } -------------------------------------------------------------------------------- /tests/run-pass/pattern.rs: -------------------------------------------------------------------------------- 1 | peg::parser!( grammar test() for str { 2 | pub rule alphanumeric() = ['a'..='z' | 'A'..='Z' | '0'..='9']* 3 | pub rule inverted_pat() -> &'input str = "(" s:$([^')']*) ")" {s} 4 | 5 | pub rule capture() -> char = ['a'..='z'] 6 | pub rule capture2() -> (char, char) = a:['a'..='z'] b:['0'..='9'] { (a, b) } 7 | 8 | pub rule open_range() -> char = ['a'..] 9 | 10 | pub rule if_guard() -> char = [x if x.is_ascii_digit()] 11 | }); 12 | 13 | fn main() { 14 | assert!(test::alphanumeric("azAZ09").is_ok()); 15 | assert!(test::alphanumeric("@").is_err()); 16 | 17 | assert_eq!(test::inverted_pat("(asdf)"), Ok("asdf")); 18 | 19 | assert_eq!(test::capture("x"), Ok('x')); 20 | assert_eq!(test::capture2("a1"), Ok(('a', '1'))); 21 | 22 | assert_eq!(test::if_guard("1"), Ok('1')); 23 | assert!(test::if_guard("a").is_err()); 24 | 25 | } 26 | 27 | -------------------------------------------------------------------------------- /tests/run-pass/pos_neg_assert.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!( grammar lookahead() for str { 4 | pub rule consonants() 5 | = (!['a'|'e'|'i'|'o'|'u']['a'..='z'])+ 6 | 7 | pub rule neg_lookahead_err() = !(['a']['b']) ['a']['x'] 8 | 9 | pub rule lookahead_result() -> &'input str 10 | = v:&($(['a'..='c']*)) "abcd" { v } 11 | }); 12 | 13 | fn main() { 14 | // negative lookahead 15 | assert!(lookahead::consonants("qwrty").is_ok()); 16 | assert!(lookahead::consonants("rust").is_err()); 17 | 18 | // expected characters in negative lookahead should not be reported in parse error messages 19 | let err = lookahead::neg_lookahead_err("ac").err().unwrap(); 20 | assert_eq!(err.expected.tokens().count(), 1, "expected set includes: {}", err.expected); 21 | assert_eq!(err.location.offset, 1); 22 | 23 | // positive lookahead 24 | assert_eq!(lookahead::lookahead_result("abcd"), Ok("abc")); 25 | assert!(lookahead::lookahead_result("abc").is_err()); 26 | } 27 | -------------------------------------------------------------------------------- /tests/run-pass/position.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!(grammar test_grammar() for str { 4 | pub rule position() -> (usize, usize, usize) 5 | = start:position!() ['a']* middle:position!() ['b']* end:position!() { (start, middle, end) } 6 | }); 7 | 8 | use self::test_grammar::*; 9 | 10 | fn main() { 11 | assert_eq!(position("aaaabbb").unwrap(), (0, 4, 7)); 12 | } 13 | -------------------------------------------------------------------------------- /tests/run-pass/raw_ident.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | use peg::parser; 3 | 4 | parser!{ 5 | pub grammar g() for str { 6 | pub rule r#break() = "foo" 7 | } 8 | } 9 | 10 | fn main() { 11 | assert_eq!(g::r#break("foo"), Ok(())); 12 | } 13 | -------------------------------------------------------------------------------- /tests/run-pass/renamed_imports.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | const FOO: i32 = 42; 4 | 5 | peg::parser!(grammar test_grammar() for str { 6 | use super::FOO as F1; 7 | use super::{FOO as F2}; 8 | pub rule renamed_imports() -> (i32, i32) = { (F1, F2) } 9 | }); 10 | 11 | use self::test_grammar::*; 12 | 13 | fn main() { 14 | assert_eq!(renamed_imports("").unwrap(), (42, 42)); 15 | } -------------------------------------------------------------------------------- /tests/run-pass/repeats.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!( grammar repeats() for str { 4 | rule number() -> i64 5 | = n:$(['0'..='9']+) { n.parse().unwrap() } 6 | 7 | pub rule list() -> Vec 8 | = number() ** "," 9 | 10 | rule digit() -> i64 11 | = n:$(['0'..='9']) {n.parse().unwrap() } 12 | 13 | pub rule repeat_n() -> Vec 14 | = digit()*<4> 15 | 16 | pub rule repeat_min() -> Vec 17 | = digit()*<2,> 18 | 19 | pub rule repeat_max() -> Vec 20 | = digit()*<,2> 21 | 22 | pub rule repeat_min_max() -> Vec 23 | = digit()*<2,3> 24 | 25 | pub rule repeat_sep_3() -> Vec 26 | = digit() **<3> "," 27 | 28 | pub rule repeat_variable() -> Vec<&'input str> 29 | = (count:digit() s:$(['a'..='z'|'0'..='9']*<{count as usize}>) {s})* 30 | }); 31 | 32 | use repeats::*; 33 | 34 | fn main() { 35 | assert_eq!(list("5"), Ok(vec![5])); 36 | assert_eq!(list("1,2,3,4"), Ok(vec![1,2,3,4])); 37 | 38 | assert!(repeat_n("123").is_err()); 39 | assert_eq!(repeat_n("1234"), Ok(vec![1,2,3,4])); 40 | assert!(repeat_n("12345").is_err()); 41 | 42 | assert!(repeat_min("").is_err()); 43 | assert!(repeat_min("1").is_err()); 44 | assert_eq!(repeat_min("12"), Ok(vec![1,2])); 45 | assert_eq!(repeat_min("123"), Ok(vec![1,2,3])); 46 | 47 | assert_eq!(repeat_max(""), Ok(vec![])); 48 | assert_eq!(repeat_max("1"), Ok(vec![1])); 49 | assert_eq!(repeat_max("12"), Ok(vec![1,2])); 50 | assert!(repeat_max("123").is_err()); 51 | 52 | assert!(repeat_min_max("").is_err()); 53 | assert!(repeat_min_max("1").is_err()); 54 | assert_eq!(repeat_min_max("12"), Ok(vec![1,2])); 55 | assert_eq!(repeat_min_max("123"), Ok(vec![1,2,3])); 56 | assert!(repeat_min_max("1234").is_err()); 57 | 58 | assert!(repeat_sep_3("1,2").is_err()); 59 | assert!(repeat_sep_3("1,2,3,4").is_err()); 60 | assert_eq!(repeat_sep_3("1,2,3"), Ok(vec![1,2,3])); 61 | 62 | assert_eq!(repeat_variable("1a3abc222"), Ok(vec!["a", "abc", "22"])); 63 | } -------------------------------------------------------------------------------- /tests/run-pass/return_type.rs: -------------------------------------------------------------------------------- 1 | use std::fmt::Debug; 2 | 3 | // `--features trace` code names the return type, so doesn't work with `impl Trait` 4 | #[cfg(not(feature = "trace"))] 5 | peg::parser!{ 6 | grammar g() for str { 7 | pub rule returns_impl_trait() -> impl Debug 8 | = "" { Box::new(5) } 9 | } 10 | } 11 | 12 | fn main() { 13 | #[cfg(not(feature = "trace"))] 14 | assert_eq!(format!("{:?}", g::returns_impl_trait("")), "Ok(5)"); 15 | } 16 | -------------------------------------------------------------------------------- /tests/run-pass/rule_args.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!( grammar ra() for str { 4 | use peg::ParseLiteral; 5 | 6 | rule number() -> i64 7 | = n:$(['0'..='9']+) { n.parse().unwrap() } 8 | 9 | rule commasep(x: rule) -> Vec = v:(x() ** ",") ","? {v} 10 | rule bracketed(x: rule) -> T = "[" v:x() "]" {v} 11 | 12 | pub rule list() -> Vec = commasep() 13 | pub rule array() -> Vec = bracketed()>) 14 | 15 | rule keyword(id: &'static str) = ##parse_string_literal(id) !['0'..='9' | 'a'..='z' | 'A'..='Z' | '_'] 16 | rule ident() = ['a'..='z']+ 17 | rule _ = [' ']* 18 | pub rule ifelse() = keyword("if") _ ident() _ keyword("then") _ ident() _ keyword("else") _ ident() 19 | 20 | pub rule repeated_a(i: usize) = ['a']*<{i}> 21 | 22 | rule i(literal: &'static str) = input:$([_]*<{literal.len()}>) {? if input.eq_ignore_ascii_case(literal) { Ok(()) } else { Err(literal) } } 23 | 24 | pub rule test_i() = i("foo") i("bar") 25 | 26 | rule recursive(r: rule<()>) = " " recursive(r) // Issue #226 27 | rule complex_args(val1: u32, val2: Option) = { assert_eq!(val1, 10); assert_eq!(val2, Some(8)) } 28 | pub rule use_complex_args() = complex_args(u32::max(5, 10), [1,1,3,5,8,13].iter().cloned().find(|x| { x % 2 == 0 })) 29 | 30 | pub rule lt_arg<'a>() = "" 31 | pub rule lt_arg_mut<'a>(x: &'a mut ()) = "" 32 | pub rule ty_arg(x: &T) = "" 33 | pub rule ty_arg_bound(x: T) = "" 34 | pub rule ty_arg_bound2<'a, T: std::marker::Copy + ?Sized + 'a>(x: T) = "" 35 | pub rule ty_arg_bound_ret() -> T = {? "".parse().or(Err("oops")) } 36 | }); 37 | 38 | use ra::*; 39 | 40 | fn main() { 41 | assert_eq!(list("1,2,3,4"), Ok(vec![1,2,3,4])); 42 | assert_eq!(array("[1,1,2,3,5,]"), Ok(vec![1,1,2,3,5])); 43 | 44 | assert!(ifelse("if foo then x else y").is_ok()); 45 | assert!(ifelse("iffoothenxelsey").is_err()); 46 | 47 | assert!(repeated_a("aa", 2).is_ok()); 48 | assert!(repeated_a("aaa", 2).is_err()); 49 | assert!(repeated_a("aaaaa", 5).is_ok()); 50 | 51 | assert!(test_i("fOoBaR").is_ok()); 52 | assert!(test_i("fOoBaZ").is_err()); 53 | assert!(test_i("fOoX").is_err()); 54 | 55 | use_complex_args("").ok(); 56 | } -------------------------------------------------------------------------------- /tests/run-pass/rule_generic.rs: -------------------------------------------------------------------------------- 1 | peg::parser!( grammar test() for str { 2 | rule number() -> T = s:$(['0'..='9']+) {? s.parse().or(Err("number")) } 3 | 4 | pub rule numbers() -> (u8, i32) 5 | = n1:number::() "," n2:number::() { (n1, n2) } 6 | }); 7 | 8 | fn main() { 9 | assert_eq!(test::numbers("42,1234"), Ok((42, 1234))); 10 | } 11 | -------------------------------------------------------------------------------- /tests/run-pass/rule_where_clause.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | use std::{ 3 | str::FromStr, 4 | fmt::Debug, 5 | }; 6 | 7 | peg::parser!( 8 | grammar parser() for str { 9 | use std::cell::Cell; 10 | pub rule nums() -> C 11 | where C: Default + Extend, 12 | T: FromStr, 13 | ::Err: Debug, 14 | = c:({ Cell::new(C::default()) }) 15 | (ch:$(['0'..='9']) { 16 | let mut mutc = c.take(); 17 | mutc.extend(Some(ch.parse::().unwrap())); 18 | c.set(mutc); 19 | })+ 20 | { c.take() } 21 | } 22 | ); 23 | 24 | fn main() { 25 | assert_eq!(parser::nums::, u8>("3729"), Ok(vec![3, 7, 2, 9])); 26 | } 27 | 28 | -------------------------------------------------------------------------------- /tests/run-pass/rust_use_tree.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!( 4 | grammar parser() for str { 5 | #[allow(unused_imports)] 6 | use ::std::{ 7 | borrow::{ 8 | Borrow as _, 9 | Cow, 10 | }, 11 | collections::*, 12 | }; 13 | 14 | rule val() = ['a'..='z']+ 15 | 16 | pub rule foo() -> HashMap<&'input str, Cow<'input, str>> 17 | = kvs:(k:$(val()) ":" v:$(val()) {(k, v.into())})++"," 18 | { HashMap::from_iter(kvs) } 19 | } 20 | ); 21 | 22 | fn main() { 23 | assert_eq!(parser::foo("a:b,c:d"), Ok(std::collections::HashMap::from_iter([ 24 | ("a", std::borrow::Cow::from("b")), 25 | ("c", std::borrow::Cow::from("d")), 26 | ]))); 27 | } 28 | 29 | -------------------------------------------------------------------------------- /tests/run-pass/test-hygiene.rs: -------------------------------------------------------------------------------- 1 | use ::peg as realpeg; 2 | struct Result; 3 | struct ParseResult; 4 | struct Parse; 5 | struct Input; 6 | struct ParseState; 7 | struct ErrorState; 8 | struct Vec; 9 | struct HashMap; 10 | mod peg {} 11 | 12 | realpeg::parser!{ 13 | grammar p() for str { 14 | pub rule number() -> f64 = n:$(['0'..='9']+) { n.parse().unwrap() } 15 | 16 | #[cache] 17 | pub rule cached() = "x" 18 | 19 | pub rule prec() -> () = precedence!{ 20 | "x" { () } 21 | } 22 | } 23 | } 24 | 25 | fn main() { 26 | assert_eq!(p::number("12345"), Ok(12345.0)) 27 | } 28 | 29 | -------------------------------------------------------------------------------- /tests/run-pass/tokens.rs: -------------------------------------------------------------------------------- 1 | #[derive(Copy, Clone)] 2 | pub enum Token { 3 | Open, 4 | Number(i32), 5 | Comma, 6 | Close, 7 | } 8 | 9 | peg::parser!{ 10 | grammar tokenparser() for [Token] { 11 | pub rule list() -> (i32, i32) = [Token::Open] [Token::Number(a)] [Token::Comma] [Token::Number(b)] [Token::Close] { (a, b) } 12 | } 13 | } 14 | 15 | fn main() { 16 | assert_eq!(tokenparser::list(&[Token::Open, Token::Number(5), Token::Comma, Token::Number(7), Token::Close]), Ok((5, 7))); 17 | } 18 | -------------------------------------------------------------------------------- /tests/run-pass/tokens_struct.rs: -------------------------------------------------------------------------------- 1 | use peg::{Parse, ParseElem, RuleResult}; 2 | 3 | /// The default implementation of the parsing traits for `[T]` expects `T` to be 4 | /// `Copy`, as in the `[u8]` or simple enum cases. This wrapper exposes the 5 | /// elements by `&T` reference, which is `Copy`. 6 | pub struct SliceByRef<'a, T>(pub &'a [T]); 7 | 8 | impl<'a , T> Parse for SliceByRef<'a, T> { 9 | type PositionRepr = usize; 10 | fn start(&self) -> usize { 11 | 0 12 | } 13 | 14 | fn is_eof(&self, pos: usize) -> bool { 15 | pos >= self.0.len() 16 | } 17 | 18 | fn position_repr(&self, pos: usize) -> usize { 19 | pos 20 | } 21 | } 22 | 23 | impl<'a, T: 'a> ParseElem<'a> for SliceByRef<'a, T> { 24 | type Element = &'a T; 25 | 26 | fn parse_elem(&'a self, pos: usize) -> RuleResult<&'a T> { 27 | match self.0[pos..].first() { 28 | Some(c) => RuleResult::Matched(pos + 1, c), 29 | None => RuleResult::Failed, 30 | } 31 | } 32 | } 33 | 34 | #[derive(PartialEq)] 35 | pub enum TokenType { 36 | Word, 37 | Number, 38 | } 39 | 40 | pub struct Token { 41 | pub token_type: TokenType, 42 | pub term: String, 43 | } 44 | 45 | peg::parser!{ 46 | grammar tokenparser<'a>() for SliceByRef<'a, Token> { 47 | // The [] syntax works just like (and expands into) an arm of a match 48 | // in regular Rust, so you can use a pattern that matches one field 49 | // and ignores the rest 50 | pub rule word_by_field() = [ Token { token_type: TokenType::Word, .. } ] 51 | 52 | // Or capture the token as a variable and then test it with an if guard. 53 | pub rule word_by_eq() = [t if t.token_type == TokenType::Word] 54 | 55 | // You could wrap this in a rule that accepts the TokenType as an argument 56 | rule tok(ty: TokenType) -> &'input Token = [t if t.token_type == ty] 57 | pub rule number() = tok(TokenType::Number) 58 | } 59 | } 60 | 61 | fn main() { 62 | let word_tok = vec![ 63 | Token { token_type: TokenType::Word, term: "foo".into() } 64 | ]; 65 | 66 | let number_tok = vec![ 67 | Token { token_type: TokenType::Number, term: "123".into() } 68 | ]; 69 | 70 | assert!(tokenparser::word_by_field(&SliceByRef(&word_tok[..])).is_ok()); 71 | assert!(tokenparser::word_by_eq(&SliceByRef(&word_tok[..])).is_ok()); 72 | assert!(tokenparser::number(&SliceByRef(&number_tok[..])).is_ok()); 73 | } 74 | -------------------------------------------------------------------------------- /tests/run-pass/utf8.rs: -------------------------------------------------------------------------------- 1 | extern crate peg; 2 | 3 | peg::parser!(grammar test_grammar() for str { 4 | pub rule boundaries() -> String 5 | = n:$("foo") { n.to_string() } 6 | }); 7 | 8 | use self::test_grammar::*; 9 | 10 | // before we were testing string matches using .slice(), which 11 | // threw an ugly panic!() when we compared unequal character 12 | // boundaries.. this popped up while parsing unicode 13 | fn main() { 14 | assert!(boundaries("f↙↙↙↙").is_err()); 15 | } 16 | -------------------------------------------------------------------------------- /tests/trybuild.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | let args: Vec<_> = std::env::args().collect(); 3 | let t = trybuild::TestCases::new(); 4 | 5 | t.pass("tests/run-pass/*.rs"); 6 | 7 | let expected_rust_ver = env!("CARGO_PKG_RUST_VERSION"); 8 | let run_anyway = args.iter().any(|a| a == "--compile-fail"); 9 | 10 | let run_compile_fail = run_anyway || version_check::is_exact_version(expected_rust_ver).unwrap_or(true); 11 | if run_compile_fail { 12 | t.compile_fail("tests/compile-fail/*.rs"); 13 | } 14 | 15 | // Trybuild runs the configured tests on drop 16 | drop(t); 17 | 18 | if !run_compile_fail { 19 | eprintln!("!!! Skipped compile-fail tests !!!"); 20 | eprintln!("These tests are only checked on rust version {expected_rust_ver} because"); 21 | eprintln!("the error message text may change between compiler versions."); 22 | eprintln!(""); 23 | eprintln!("Run `cargo +{expected_rust_ver} test` to run these tests."); 24 | eprintln!(""); 25 | } 26 | } 27 | --------------------------------------------------------------------------------