├── .travis.yml ├── AUTHORS ├── CONTRIBUTING.md ├── LICENSE ├── README.md ├── action_test.go ├── check.go ├── check_test.go ├── error.go ├── example ├── calc │ ├── calc.go │ └── calc.peggy └── label_names │ ├── label_names.go │ └── label_names.peggy ├── gen.go ├── gen_test.go ├── go.go ├── go.mod ├── go.sum ├── gok.sh ├── grammar.go ├── grammar.y ├── lex.go ├── main.go ├── parse_test.go ├── peg ├── fail.go ├── fail_test.go ├── loc.go ├── loc_test.go ├── peg.go └── pretty.go ├── rule.go └── string.go /.travis.yml: -------------------------------------------------------------------------------- 1 | language: go 2 | 3 | go: 1.12 4 | 5 | notifications: 6 | email: false 7 | 8 | env: 9 | - PATH=$HOME/gopath/bin:$PATH 10 | 11 | install: 12 | - go get golang.org/x/tools/cmd/goyacc 13 | - go get golang.org/x/lint/golint 14 | - go get -t -v ./... && go build -v ./... 15 | 16 | script: 17 | - ./gok.sh 18 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | # This is the list of Peggy authors for copyright purposes. 2 | # 3 | # This does not necessarily list everyone who has contributed code, since in 4 | # some cases, their employer may be the copyright holder. To see the full list 5 | # of contributors, see the revision history in source control. 6 | Google Inc. 7 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # How to Contribute 2 | 3 | We'd love to accept your patches and contributions to this project. There are 4 | just a few small guidelines you need to follow. 5 | 6 | ## Contributor License Agreement 7 | 8 | Contributions to this project must be accompanied by a Contributor License 9 | Agreement. You (or your employer) retain the copyright to your contribution, 10 | this simply gives us permission to use and redistribute your contributions as 11 | part of the project. Head over to to see 12 | your current agreements on file or to sign a new one. 13 | 14 | You generally only need to submit a CLA once, so if you've already submitted one 15 | (even if it was for a different project), you probably don't need to do it 16 | again. 17 | 18 | ## Code reviews 19 | 20 | All submissions, including submissions by project members, require review. We 21 | use GitHub pull requests for this purpose. Consult 22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more 23 | information on using pull requests. 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2017, The Peggy Authors 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions are 5 | met: 6 | 7 | * Redistributions of source code must retain the above copyright 8 | notice, this list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above 11 | copyright notice, this list of conditions and the following disclaimer 12 | in the documentation and/or other materials provided with the 13 | distribution. 14 | 15 | * Neither the name of Google Inc. nor the names of its 16 | contributors may be used to endorse or promote products derived from 17 | this software without specific prior written permission. 18 | 19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://travis-ci.org/eaburns/peggy.svg?branch=master)](https://travis-ci.org/eaburns/peggy) 2 | 3 | # Introduction 4 | 5 | Peggy is a Parsing Expression Grammar 6 | ([PEG](https://en.wikipedia.org/wiki/Parsing_expression_grammar)) 7 | parser generator. 8 | 9 | The generated parser is a 10 | [packrat parser](https://en.wikipedia.org/wiki/Parsing_expression_grammar#Implementing_parsers_from_parsing_expression_grammars). 11 | However, the implementation is somewhat novel (I believe). 12 | 13 | # Background 14 | 15 | Packrat parsers work by doing a recursive descent on the grammar rules, 16 | backtracking when a rule fails to accept. 17 | To prevent exponential backtracking, a memo table remembers 18 | the parse result for each rule, for each point in the input. 19 | This way when the backtracking encounters a subtree of grammar already tried 20 | it can compute the result in constant time by looking up the memo table 21 | instead of computing the parse again. 22 | 23 | Because the memo table, packrat parsers for PEG grammars 24 | parse in time linear in the size of the input 25 | and use memory linear in the size of the input too. 26 | (Note that other common parser generators, 27 | such as yacc for LALR(1) grammars 28 | are linear time in the size of the input 29 | and linear space in the _depth of the parse_, 30 | which can be smaller than the input size.) 31 | 32 | A common way to implement the memo table is to use a hash table. 33 | The key is a pair of the grammar rule plus the input position, 34 | and the value is the result (result of any parser actions) 35 | of parsing the keyed rule at the keyed input position 36 | and the number of runes consumed, or whether the parse failed. 37 | 38 | A problem that I've found with this approach is that 39 | for grammars that tend to do a lot of backtracking, 40 | a significant amount of time is spent modifying and accessing the memo table. 41 | Hash tables lookups and inserts are expected constant time, 42 | but in the face of much backtracking, the constant time can add up. 43 | 44 | In addition, hash tables tend to be implemented with linked structures 45 | which take up additional memory to hold pointers and allocation overhead. 46 | Finally, as they grow large, linked sturctures take more time to scan 47 | by a garbage collector. 48 | 49 | I originally implemented Peggy to parse the constructed language 50 | [Lojban](https://mw.lojban.org/papri/Lojban) 51 | (see [johaus](http://github.com/eaburns/johaus)). 52 | My initial hash table based implementation performed very poorly on large texts 53 | because of the issues described above: 54 | profiling showed a singificant amount of time spent 55 | on map accesses and garbage collection scanning, 56 | and memory use was too high to parse some texts (4kb) 57 | on my laptop (8GB ram). 58 | 59 | I noticed similar issues with the JavaScript- and Java-based PEG parsers for Lojban. 60 | 61 | Peggy takes a different approach that was tuned for this use-case. 62 | 63 | ## Peggy's approach 64 | 65 | Peggy computes the result of a parse in two passes instead of one. 66 | The first pass determines whether the grammar accepts, 67 | and builds a table that tracks for each rule tried at each position: 68 | whether the rule accepted and if so how much input was consumed, 69 | or if it failed, how much input was consumed to the deepest failure. 70 | These values can be stored in an array using only integers. 71 | 72 | If the first pass acceptes the input, a second pass can quickly follow the table 73 | to try only rules that accept and compute the result of the actions of the rule. 74 | 75 | If the first pass fails to accept, another pass can follow the table 76 | and compute a tree tracking which rules failed at the deepest point of the parse. 77 | These can be used to build precise syntax error messages. 78 | 79 | The advantage of Peggy's approach is that 80 | the first pass only performs a single allocation: the table — an array of integers. 81 | Accessing the table is just indexing into an array of intergers, 82 | which is cheaper than most hash table lookups. 83 | Since the array only contains integers and no pointers, 84 | it needn't be scanned by the garbage collector. 85 | And finally, whenever a hash table would be relatively densely populated, 86 | an array can be memory efficient. 87 | 88 | For the Lojban grammar, this made the difference 89 | between being able to parse full texts 90 | (a 4KB text that needed >8GB of memory was reduced to needing only 2GB, 91 | and multiple minutes were reduced to mere seconds). 92 | 93 | ## Disadvantages 94 | 95 | There are disadvantages to the Peggy approach: 96 | 97 | 1) The interface is not as simple to use. 98 | However, I hope that you will not find it too difficult. 99 | See the example in the next section for a fairly short wrapper 100 | that warps the Peggy calls into a single, more typical Go function call. 101 | 102 | 2) For grammars that do not rely as heavily on the memo table 103 | a hash table could be much more memory efficient. 104 | 105 | I would like to expand this list, so please send pull requests 106 | if you have other disadvantages of this approach that should be here. 107 | 108 | Now, let's see how to use it. 109 | 110 | # Input file format 111 | 112 | A Peggy input file is UTF-8 encoded. 113 | 114 | A Peggy grammar file consists of a _prelude_ followed by a set of _rules_. 115 | The prelude is valid Go code enclosed between { and }. 116 | This code is emitted at the beginning of the generated parser .go file. 117 | It should begin with a package statement then any imports used by the parser. 118 | Any other valid Go code is also permitted. 119 | 120 | After the prelude is a set of _rules_ that define the grammar. 121 | Each rule begins with an _identifier_ that is the name of the rule. 122 | After the name is an optional string giving the rule a human-readable name 123 | and marking it as a _leaf_ rule for error reporting (more below). 124 | After the optional string is the token <-. 125 | Next is the expression that defines the rule. 126 | 127 | **Example** 128 | ``` 129 | A <- "Hello," _ ( "World!" / "世界" ) 130 | _ <- ( p:. &{ isUnicodeSpace(p) } )+ 131 | ``` 132 | 133 | # Expressions 134 | 135 | Expressions define the grammar. 136 | The input to each expression is a sequence of runes. 137 | The expression either accepts or rejects the input. 138 | If the expression accepts, it consumes zero or more runes of input, 139 | and evaluates to a result (a Go value). 140 | 141 | The types of expressions, in order of precedence, are: 142 | * Choice 143 | * Action 144 | * Sequence 145 | * Label 146 | * Predicate 147 | * Repetition 148 | * Literal, Code Predicate, Identifier, and Subexpression 149 | 150 | ## Choice 151 | 152 | A choice is a sequence of expressions separated by `/`. 153 | Unlike context free grammars, choices in PEG are ordered. 154 | 155 | It is an error if the result types of the subexpressions are not all the same. 156 | 157 | **Accepts:** 158 | A choice accepts if any of its expressions accept. 159 | 160 | **Consumes:** 161 | A choice consumes the runes consumed by its first accepting subexpression 162 | from left-to-right. 163 | 164 | **Result:** 165 | The result of a choice has the type and value of its first accepting subexpression 166 | from left-to-right. 167 | 168 | **Example:** 169 | ``` 170 | A / "Hello" / foo:Bar { return string(foo) } 171 | ``` 172 | 173 | ## Sequences 174 | 175 | A sequence is two a sequence of expressions separated by whitespace. 176 | 177 | **Accepts:** 178 | A sequence accepts if each of its subexpressions accepts 179 | on the input remaining after each preceeding subexpression consumes. 180 | 181 | **Consumes:** 182 | The sequence consumes from the input 183 | the sum of the number of runes of all its subexpressions. 184 | 185 | ***Result:** 186 | It is an error if the type of the result of the first expression 187 | is not the same as the type of the result of the second. 188 | 189 | If the first expression is a `string`, the type of the sequence is `string`, 190 | and the result is the concatenation of the results of the expressions. 191 | 192 | If the first expression is any non-`string` type, T, 193 | the type of the result of the sequence is `[]T`, 194 | and the result itself is the slice from 195 | `append()`ing the results of the subexpressions. 196 | 197 | **Example:** 198 | ``` 199 | "Hello," Space "World" Punctiation 200 | ``` 201 | 202 | ## Labels 203 | 204 | A label is an identifier followed by : followed by an expression. 205 | 206 | Labels are used to create new identifiers used by actions and code predicates. 207 | 208 | The scope of a label is its branch in the nearest, containing choice expression, 209 | or in the entire rule if there is no choice expression. 210 | 211 | For example, 212 | 213 | R <- a:A / a:A / a:A / a:A 214 | 215 | All `a`s refer to different labels, as they are all scoped to different branches of the choice, `/`. 216 | 217 | Similarly, in this expression, 218 | 219 | R <- a:A / (a:A / a:A) 220 | 221 | all `a`s are different labels. 222 | However, 223 | 224 | R <- a:A / a:A a:A 225 | 226 | is an error, as `a` is re-defined in the right-hand branch of the choice, `/`. 227 | 228 | **Accepts:** 229 | A label accepts if its subexpression accepts. 230 | 231 | **Consumes:** 232 | A label consumes the runs of its subexpression. 233 | 234 | **Result:** 235 | The result type and value of a label are that of its subexpression. 236 | 237 | **Example:** 238 | ``` 239 | hello:"Hello" "," Space world:( "World" / "世界" ) 240 | ``` 241 | 242 | ## Predicates 243 | 244 | A predicate is a & or ! operator followed by an expression. 245 | 246 | **Accepts:** 247 | A predicate with the operator & accepts if its subexpression accepts. 248 | 249 | A predicate with the operator ! accepts if its subexpression dose not accept. 250 | 251 | **Consumes:** 252 | Predicatse consume no runes. 253 | 254 | **Result:** 255 | The result of a predicate is the empty string. 256 | 257 | **Example:** 258 | ``` 259 | !Keyword [a-ZA-Z_] [a-ZA-Z0-9_]* 260 | ``` 261 | 262 | ## Repetition 263 | 264 | A repetition is an expression followed by either a *, +, or ? operator. 265 | 266 | **Accepts:** 267 | A repetition with an operator * or ? always accepts. 268 | 269 | A repetition with the operator + accepts if its subexpression accepts. 270 | 271 | **Consumes:** 272 | A repetition with an operator * or + consumes all matches of its subexpression. 273 | 274 | A repetition with the operator ? consumes at most one match of its subexpression. 275 | 276 | **Result:** 277 | If the type of the subexpression is `string`, the result of a repetition is `string`, 278 | and the value is the consumed runes. 279 | 280 | Otherwise, if the type of the subexpression is a type `T`: 281 | * if the operator is * or +, the type of the result is `[]T` 282 | and the value is a slice containing all `append`ed subexpression results. 283 | * if the operatior is ?, the type of the result is `*T` 284 | and the value is a pointer to the subexpression result if it accepted 285 | or `nil`. 286 | 287 | **Example:** 288 | ``` 289 | [a-ZA-Z0-9_]* ":"? 290 | ``` 291 | 292 | ## Literals 293 | 294 | Literals are String Literals, Character Classes, and Dot. 295 | 296 | ### String Literals 297 | 298 | String literals are lexically the same as 299 | [Go String Literals](https://golang.org/ref/spec#String_literals). 300 | 301 | **Accepts:** 302 | A string literal accepts if the next runes of input are exactly those of the string. 303 | 304 | **Consumes:** 305 | A stirng literal consumes the matching runes of input. 306 | 307 | **Result:** 308 | The result is the `string` of consumed runes. 309 | 310 | **Example:** 311 | ``` 312 | "Hello\nWorld!" 313 | ``` 314 | 315 | ### Character Classes 316 | 317 | A character class is a sequence of characters 318 | between [ and the next, unescaped occurrence of ]. 319 | Escapes are treated as per strings. 320 | 321 | Character classes are much like that of common regular expression libraries. 322 | 323 | **Accepts:** 324 | A character class accepts if the next rune of input is within the class. 325 | 326 | If the first character after the opening [ is a ^, 327 | then the character class's acceptance is negated. 328 | 329 | A pair of characters surrounding on either side of a - define a _span_. 330 | the character class will accept any rune with a number (codepoint) 331 | between (and including) the two characters 332 | It is an error if the first is not smaller than the last. 333 | 334 | All other characters in the class are treated as a list of accepted runes. 335 | 336 | **Consumes:** 337 | A character class consumes one rune of input. 338 | 339 | **Result:** 340 | The result is the `string` of the consumed rune. 341 | 342 | **Example:** 343 | ``` 344 | [a-ZA-Z0-9_] 345 | ``` 346 | 347 | ### Dot 348 | 349 | The character . is an expression. 350 | 351 | **Accepts:** 352 | A dot expression accepts if the input is not empty and the next rune is valid. 353 | 354 | **Consumes:** 355 | A dot expression consumes a single rune. 356 | 357 | **Result:** 358 | The result is the `string` of the consumed rune. 359 | 360 | **Example:** 361 | ``` 362 | . 363 | ``` 364 | 365 | ## Code predicates 366 | 367 | A code predicate is an operator & or ! followed by a Go expression enclosed in { and }. 368 | The expression must result in a boolean value, 369 | and must be syntactically valid as the condition of an 370 | [if statement](https://golang.org/ref/spec#If_statements). 371 | 372 | Label expressions in scope of the code predicate define identifiers accessible in the Go code. 373 | The value of the identifier is a `string` of the input consumed by the labeled expression. 374 | If the labeled expression has yet to accept at the time the code predicate is evalutade, the string is empty. 375 | 376 | **Accepts:** 377 | 378 | A code predicate with the operator & accepts if the expression evaluates to `true`. 379 | 380 | A code predicate with the operator ! accepts if the expression evaluates to `false`. 381 | 382 | **Consumes:** 383 | A code predicate consumes no runes of input. 384 | 385 | **Result:** 386 | The result of a code predicate is the empty string. 387 | 388 | **Example:** 389 | ``` 390 | p:. &{ isUnicodeSpace(p) } 391 | ``` 392 | 393 | ## Identifiers 394 | 395 | Identifiers begin with any unicode letter or _ 396 | followed by a sequence of zero or more letters, numbers, or _. 397 | Identifiers name a rule of the grammar. 398 | It is an error if the identifier is not the name of the rule of the grammar. 399 | 400 | **Accepts:** 401 | An identifier accepts if its named rule accepts. 402 | 403 | **Consumes:** 404 | An identifier consumes the runes of its named rule. 405 | 406 | **Result:** 407 | The result of an identifier has the type and value of that of its named rule. 408 | 409 | **Example:** 410 | ``` 411 | HelloWorld <- Hello "," Space World 412 | Hello <- "Hello" / "こんいちは" 413 | World <- "World" / "世界" 414 | Space <- ( p:. &{ isUnicodeSpace(p) } )+ 415 | ``` 416 | 417 | ## Subexpressions 418 | 419 | A subexpression is an expression enclosed between ( and ). 420 | They are primarily used for grouping. 421 | 422 | **Accepts:** 423 | A subexpression accepts if its inner expression accepts. 424 | 425 | **Consumes:** 426 | A subexpression consumes the runes of its inner expression. 427 | 428 | **Result:** 429 | The result type and value of a subexpression are that of its inner expression. 430 | 431 | **Example:** 432 | ``` 433 | "Hello, " ( "World" / "世界" ) 434 | ``` 435 | 436 | ## Actions 437 | 438 | Actions are an expression followed by Go code between { and }. 439 | The Go code must be valid as the 440 | [body of a function](https://golang.org/ref/spec#Block). 441 | The Go code must end in a 442 | [return statement](https://golang.org/ref/spec#Return_statements), 443 | and the returned value must be one of: 444 | * [a type conversion](https://golang.org/ref/spec#Conversions) 445 | * [a type assertion](https://golang.org/ref/spec#Type_assertions) 446 | * [a function literal](https://golang.org/ref/spec#Function_literals) 447 | * [a composite literal](https://golang.org/ref/spec#Composite_literals) 448 | * [an &-composite literal](https://golang.org/ref/spec#Address_operators) 449 | * [an int literal](https://golang.org/ref/spec#Integer_literals) 450 | * [a float literal](https://golang.org/ref/spec#Floating-point_literals) 451 | * [a rune literal](https://golang.org/ref/spec#Rune_literals) 452 | * [a string literal](https://golang.org/ref/spec#String_literals) 453 | 454 | Label expressions in scope of the action define identifiers accessible in the Go code. 455 | The value of the identifier is the value of the labeled expression if it accepted. 456 | If the labeled expression has yet to accept at the time the action is evaluated, 457 | the value is the zero value of the corresponding type. 458 | 459 | In addition there are several other special identifiers accessable to the code: 460 | * `parser` is a pointer to the Peggy `Parser`. 461 | * `start` is the byte offset in the input at which this expression first accepted. 462 | * `end` is the byte offset in the input just after this expression last accepted. 463 | 464 | **Accepts:** 465 | An action accepts if its subexpression accepts. 466 | 467 | **Consumes:** 468 | An action consumes the runes of its subexpression. 469 | 470 | **Result:** 471 | The result of an action has the type of the last return statement 472 | at the end of the block of Go code. 473 | The value is the value returned by the Go code. 474 | 475 | **Example:** 476 | ``` 477 | hello:("Hello" / "こんいちは") ", " world:("World" / "世界") { 478 | return HelloWorld{ 479 | Hello: hello, 480 | World: world, 481 | } 482 | } 483 | ``` 484 | 485 | # Generated code 486 | 487 | The output file path is specified by the `-o` command-line option. 488 | 489 | All package-level definitions in the generated begin with a prefix, defaulting to `_`. This default makes the definitions unexported. The prefix can be overridden with the `-p` command-line option. 490 | 491 | The generated file has a `Parser` type passed to the various parser functions, 492 | and contains between 2 and 4 of functions for each rule defining 493 | several parser _passes_. The passes are: 494 | 1. the _accepts_ pass, 495 | 2. the _fail_ pass, 496 | 3. optionally the _action_ pass, and 497 | 4. optionally the _node_ pass. 498 | 499 | A typical flow to use a Peggy-generated parser is to: 500 | * Create a new instance of the `Parser` type on a given input. 501 | * Call the accepts function for the root-level grammar rule. 502 | ** If the rule did not accept, there was a syntax error: 503 | call the fail function of the rule to get an `*peg.Fail` tree, 504 | and pass that to `peg.SimpleError` to get an `error` 505 | describing the syntax error. 506 | ** If the rule accepted, call the action function of the rule 507 | to get the result of the parse (an AST, evaluation, whatever), 508 | or call the node pass to get a `*peg.Node` of the syntax tree. 509 | 510 | Here is an example: 511 | 512 | ``` 513 | // Parse returns the AST generated by the grammar rule actions. 514 | func Parse(input string) (AstNode, error) { 515 | parser := _NewParser(input) 516 | if pos, perr := _RuleAccepts(parser, 0); pos < 0 { 517 | _, failTree := _RuleFail(parser, 0, perr) 518 | return nil, peg.SimpleError(input, failTree) 519 | } 520 | // Or, instead call _RuleNode(parser, 0) 521 | // and return a *peg.Node with the syntax tree. 522 | _, astRoot := _RuleAction(parser, 0) 523 | return astRoot, nil 524 | } 525 | ``` 526 | 527 | There are a lot of steps. 528 | This allows advanced uses not described here ☺. 529 | (But see, for example, 530 | [this file](https://github.com/eaburns/johaus/blob/master/parser/error.go) 531 | that showcases how to use the `*peg.Fail` tree to construct more precise error messages). 532 | 533 | Now let's see what the generated code for each of the passes looks like in moredetail. 534 | 535 | ## The Parser type 536 | 537 | The `Parser` type is mostly intended to be treated as opaque. 538 | It maintains information about the parse to communicate between the multiple passes. 539 | 540 | The `Parser` type will have a field named `data` of type `interface{}`, 541 | which is ignored by the generated code. 542 | This field may be used in code predicates or actions to store auxiliary information. 543 | Such a use is considered advanced, and is not recommended 544 | unless you have a thorough understanding of the generated parser. 545 | 546 | ## Accepts pass 547 | 548 | The accepts pass generates a function for each rule of the grammer with a signature of the form: 549 | ``` 550 | func Accepts(parser *Parser, start int) (deltaPos, deltaErr int) 551 | ``` 552 | 553 | The function determines whether the rule accepts the input 554 | beginning from the byte-offset `start`. 555 | If it accepts `deltaPos` is a non-negative number of bytes accepted. 556 | If it does not accept `deltaErr` is the number of bytes from start 557 | until the last rune of input that could not be consumed. 558 | 559 | The primary purpose of the accept pass is to determine 560 | whether the language defined by the grammar accepts the input. 561 | The `Parser` maintains state from the accept pass that enables a subsequent 562 | fail, action, or node pass to compute its result without backtracking on rules. 563 | 564 | ## Fail pass 565 | 566 | The fail pass generates a function for each rule of the grammar twith a signature of the form: 567 | ``` 568 | func Fail(parser *Parser, start, errPos int) (int, *peg.Fail) 569 | ``` 570 | 571 | The functions of the fail pass assume that the `Parser` has already been used 572 | as the argument of a corresponding accept pass, 573 | and that the accept pass failed to accept. 574 | 575 | Each function returns the `*peg.Fail` tree of all attempted rules 576 | that failed to accept the input beginning from `start`, 577 | which failed no earlier than `errPos` bytes into the input. 578 | 579 | The description is somewhat advanced. 580 | Suffice it to say, this computes a data structured used by the `peg` package 581 | to compute a parse error string with the `peg.SimpleError` function. 582 | More advanced users can inspect the `*peg.Fail` tree 583 | to create more precise or informative parse errors. 584 | 585 | ## Action pass 586 | 587 | The action pass generates a function for each rule of the grammar twith a signature of the form: 588 | ``` 589 | func Action(parser *Parser, start int) (int, *) 590 | ``` 591 | 592 | The functions of the action pass assume that the `Parser` has already been used 593 | as the argument of a corresponding accept pass, 594 | and that the accept pass accepted the rule at this position. 595 | 596 | Each function returns the number of consumed runes 597 | and a pointer to a value of the rule expression's result type. 598 | 599 | ## Node pass 600 | 601 | The node pass generates a function for each rule of the grammar twith a signature of the form: 602 | ``` 603 | func Node(parser *Parser, start int) (int, *peg.Node) 604 | ```` 605 | 606 | The functions of the node pass assume that the `Parser` has already been used 607 | as the argument of a corresponding accept pass, 608 | and that the accept pass accepted the rule at this position. 609 | 610 | Each function returns the number of consumed runes 611 | and a *peg.Node that is the root of the syntax tree of the parse. 612 | 613 | (Peggy is not an official Google product.) -------------------------------------------------------------------------------- /action_test.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "encoding/json" 5 | "io" 6 | "os" 7 | "os/exec" 8 | "reflect" 9 | "testing" 10 | 11 | "github.com/eaburns/pretty" 12 | ) 13 | 14 | type actionTest struct { 15 | name string 16 | grammar string 17 | cases []actionTestCase 18 | } 19 | 20 | type actionTestCase struct { 21 | input string 22 | want interface{} 23 | } 24 | 25 | var actionTests = []actionTest{ 26 | { 27 | name: "literal", 28 | grammar: `A <- "abc☺XYZ"`, 29 | cases: []actionTestCase{ 30 | {"abc☺XYZ", "abc☺XYZ"}, 31 | }, 32 | }, 33 | { 34 | name: "char class", 35 | grammar: `A <- [a-zA-Z0-9☺]`, 36 | cases: []actionTestCase{ 37 | {"a", "a"}, 38 | {"☺", "☺"}, 39 | {"Z", "Z"}, 40 | {"5", "5"}, 41 | }, 42 | }, 43 | { 44 | name: "any char", 45 | grammar: `A <- .`, 46 | cases: []actionTestCase{ 47 | {"a", "a"}, 48 | {"☺", "☺"}, 49 | {"Z", "Z"}, 50 | {"5", "5"}, 51 | }, 52 | }, 53 | { 54 | name: "star", 55 | grammar: `A <- "abc"*`, 56 | cases: []actionTestCase{ 57 | {"", ""}, 58 | {"abc", "abc"}, 59 | {"abcabc", "abcabc"}, 60 | {"abcabcabcabc", "abcabcabcabc"}, 61 | }, 62 | }, 63 | { 64 | name: "plus", 65 | grammar: `A <- "abc"+`, 66 | cases: []actionTestCase{ 67 | {"abc", "abc"}, 68 | {"abcabc", "abcabc"}, 69 | {"abcabcabcabc", "abcabcabcabc"}, 70 | }, 71 | }, 72 | { 73 | name: "question", 74 | grammar: `A <- "abc"?`, 75 | cases: []actionTestCase{ 76 | {"", ""}, 77 | {"abc", "abc"}, 78 | }, 79 | }, 80 | { 81 | name: "single type sequence", 82 | grammar: `A <- "a" "b" "c"`, 83 | cases: []actionTestCase{ 84 | {"abc", "abc"}, 85 | }, 86 | }, 87 | { 88 | name: "single type choice", 89 | grammar: `A <- "abc" / "☺☹" / .`, 90 | cases: []actionTestCase{ 91 | {"abc", "abc"}, 92 | {"☺☹", "☺☹"}, 93 | {"z", "z"}, 94 | }, 95 | }, 96 | { 97 | name: "multi-type choice", 98 | grammar: `A <- "abc" / "x" "y" "z"`, 99 | cases: []actionTestCase{ 100 | {"abc", "abc"}, 101 | {"xyz", "xyz"}, 102 | }, 103 | }, 104 | { 105 | name: "choice branch fails after submatch", 106 | grammar: `A <- "xyz"? ( "a" "b" "c" / "a" "c" "b" )`, 107 | cases: []actionTestCase{ 108 | {"acb", "acb"}, 109 | {"xyzacb", "xyzacb"}, 110 | }, 111 | }, 112 | { 113 | name: "multi-type sequence", 114 | grammar: `A <- ("a" "b" "c") "xyz"`, 115 | cases: []actionTestCase{ 116 | {"abcxyz", "abcxyz"}, 117 | }, 118 | }, 119 | { 120 | name: "identifier", 121 | grammar: ` 122 | A <- Abc "xyz" 123 | Abc <- "a" "b" "c"`, 124 | cases: []actionTestCase{ 125 | {"abcxyz", "abcxyz"}, 126 | }, 127 | }, 128 | { 129 | name: "true predicate", 130 | grammar: ` 131 | A <- "123"? &Abc "abc" 132 | Abc <- "a" "b" "c"`, 133 | cases: []actionTestCase{ 134 | {"abc", "abc"}, 135 | {"123abc", "123abc"}, 136 | }, 137 | }, 138 | { 139 | name: "false predicate", 140 | grammar: ` 141 | A <- "123"? !Abc "xyz" 142 | Abc <- "a" "b" "c"`, 143 | cases: []actionTestCase{ 144 | {"xyz", "xyz"}, 145 | {"123xyz", "123xyz"}, 146 | }, 147 | }, 148 | { 149 | name: "true pred code", 150 | grammar: ` 151 | A <- "abc"? &{ true } "xyz"`, 152 | cases: []actionTestCase{ 153 | {"xyz", "xyz"}, 154 | {"abcxyz", "abcxyz"}, 155 | }, 156 | }, 157 | { 158 | name: "false pred code", 159 | grammar: ` 160 | A <- "abc"? !{ false } "xyz"`, 161 | cases: []actionTestCase{ 162 | {"xyz", "xyz"}, 163 | {"abcxyz", "abcxyz"}, 164 | }, 165 | }, 166 | { 167 | name: "subexpr", 168 | grammar: `A <- ("a" "b" "c")`, 169 | cases: []actionTestCase{ 170 | {"abc", "abc"}, 171 | }, 172 | }, 173 | { 174 | name: "label", 175 | grammar: `A <- l1:"a" l2:"b" l3:"c"`, 176 | cases: []actionTestCase{ 177 | {"abc", "abc"}, 178 | }, 179 | }, 180 | { 181 | name: "action", 182 | grammar: ` 183 | A <- l1:. l2:. l3:. { 184 | return map[string]string{ 185 | "1": l1, 186 | "2": l2, 187 | "3": l3, 188 | } 189 | }`, 190 | cases: []actionTestCase{ 191 | {"abc", map[string]interface{}{ 192 | "1": "a", 193 | "2": "b", 194 | "3": "c", 195 | }}, 196 | {"xyz", map[string]interface{}{ 197 | "1": "x", 198 | "2": "y", 199 | "3": "z", 200 | }}, 201 | }, 202 | }, 203 | { 204 | name: "start and end", 205 | grammar: ` 206 | A <- smiley? as v:bs cs { return [2]int(v) } 207 | smiley <- '☺' 208 | as <- 'a'* 209 | bs <- 'b'* { return [2]int{start, end} } 210 | cs <- 'c'* 211 | `, 212 | cases: []actionTestCase{ 213 | {"", []interface{}{0.0, 0.0}}, 214 | {"aaaccc", []interface{}{3.0, 3.0}}, 215 | {"aaabccc", []interface{}{3.0, 4.0}}, 216 | {"bbb", []interface{}{0.0, 3.0}}, 217 | {"aaabbbccc", []interface{}{3.0, 6.0}}, 218 | {"☺aaabbbccc", []interface{}{float64(len("☺") + 3), float64(len("☺") + 6)}}, 219 | }, 220 | }, 221 | { 222 | name: "type inference", 223 | grammar: ` 224 | A <- convert / ptr_convert / assert / func / struct / ptr_struct / map / array / slice / int / float / rune / string 225 | convert <- x:("convert" { return int32(1) }) { return string(fmt.Sprintf("%T", x)) } 226 | ptr_convert <- x:("ptr_convert" { return (*string)(nil) }) { return string(fmt.Sprintf("%T", x)) } 227 | assert <- x:("assert" { var c interface{} = peg.Node{}; return c.(peg.Node) }) { return string(fmt.Sprintf("%T", x)) } 228 | func <- x:("func" { return func(){} }) { return string(fmt.Sprintf("%T", x)) } 229 | struct <- x:("struct" { return peg.Node{} }) { return string(fmt.Sprintf("%T", x)) } 230 | ptr_struct <- x:("ptr_struct" { return &peg.Node{} }) { return string(fmt.Sprintf("%T", x)) } 231 | map <- x:("map" { return map[string]int{} }) { return string(fmt.Sprintf("%T", x)) } 232 | array <- x:("array" { return [5]int{} }) { return string(fmt.Sprintf("%T", x)) } 233 | slice <- x:("slice" { return []int{} }) { return string(fmt.Sprintf("%T", x)) } 234 | int <- x:("int" { return 0 }) { return string(fmt.Sprintf("%T", x)) } 235 | float <- x:("float" { return 0.0 }) { return string(fmt.Sprintf("%T", x)) } 236 | rune <- x:("rune" { return 'a' }) { return string(fmt.Sprintf("%T", x)) } 237 | string <- x:("string" { return "" }) { return string(fmt.Sprintf("%T", x)) } 238 | `, 239 | cases: []actionTestCase{ 240 | {"convert", "int32"}, 241 | {"ptr_convert", "*string"}, 242 | {"assert", "peg.Node"}, 243 | {"func", "func()"}, 244 | {"struct", "peg.Node"}, 245 | {"ptr_struct", "*peg.Node"}, 246 | {"array", "[5]int"}, 247 | {"slice", "[]int"}, 248 | {"int", "int"}, 249 | {"float", "float64"}, 250 | {"rune", "int32"}, 251 | {"string", "string"}, 252 | }, 253 | }, 254 | 255 | // A simple calculator. 256 | // BUG: The test grammar has reverse the normal associativity — oops. 257 | { 258 | name: "calculator", 259 | grammar: ` 260 | A <- Expr 261 | Expr <- l:Term op:(Plus / Minus) r:Expr { return int(op(l, r)) } / x:Term { return int(x) } 262 | Plus <- "+" { return func(a, b int) int { return a + b } } 263 | Minus <- "-" { return func(a, b int) int { return a - b } } 264 | Term <- l:Factor op:(Times / Divide) r:Term { return int(op(l, r)) } / x:Factor { return int(x) } 265 | Times <- "*" { return func(a, b int) int { return a * b } } 266 | Divide <- "/"{ return func(a, b int) int { return a / b } } 267 | Factor <- Number / '(' x:Expr ')' { return int(x) } 268 | Number <- x:[0-9]+ { var i int; for _, r := range x { i = i * 10 + (int(r) - '0') }; return int(i) } 269 | `, 270 | cases: []actionTestCase{ 271 | {"1", 1.0}, 272 | {"(5)", 5.0}, 273 | {"2*3", 6.0}, 274 | {"2+3", 5.0}, 275 | {"10-3*2", 4.0}, 276 | {"10-(6/2)*5", -5.0}, 277 | }, 278 | }, 279 | } 280 | 281 | func TestActionGen(t *testing.T) { 282 | for _, test := range actionTests { 283 | test := test 284 | t.Run(test.name, func(t *testing.T) { 285 | t.Parallel() 286 | source := generateTest(actionPrelude, test.grammar) 287 | binary := build(source) 288 | defer rm(binary) 289 | go rm(source) 290 | 291 | for _, c := range test.cases { 292 | var got struct { 293 | T interface{} 294 | } 295 | parseJSON(binary, c.input, &got) 296 | if !reflect.DeepEqual(got.T, c.want) { 297 | t.Errorf("parse(%q)=%s (%#v), want %s", 298 | c.input, pretty.String(got.T), got.T, 299 | pretty.String(c.want)) 300 | } 301 | } 302 | 303 | }) 304 | } 305 | } 306 | 307 | // parseJSON parses an input using the given binary 308 | // and returns the position of either the parse or error 309 | // along with whether the parse succeeded. 310 | // The format for transmitting the result 311 | // from the parser binary to the test harness 312 | // is JSON. 313 | func parseJSON(binary, input string, result interface{}) { 314 | cmd := exec.Command(binary) 315 | cmd.Stderr = os.Stderr 316 | stdin, err := cmd.StdinPipe() 317 | if err != nil { 318 | panic(err.Error()) 319 | } 320 | stdout, err := cmd.StdoutPipe() 321 | if err != nil { 322 | panic(err.Error()) 323 | } 324 | if err := cmd.Start(); err != nil { 325 | panic(err.Error()) 326 | } 327 | go func() { 328 | if _, err := io.WriteString(stdin, input); err != nil { 329 | panic(err.Error()) 330 | } 331 | if err := stdin.Close(); err != nil { 332 | panic(err.Error()) 333 | } 334 | }() 335 | if err := json.NewDecoder(stdout).Decode(result); err != nil { 336 | panic(err.Error()) 337 | } 338 | if err := cmd.Wait(); err != nil { 339 | panic(err.Error()) 340 | } 341 | } 342 | 343 | var actionPrelude = `{ 344 | package main 345 | 346 | import ( 347 | "encoding/json" 348 | "fmt" 349 | "io/ioutil" 350 | "os" 351 | 352 | "github.com/eaburns/peggy/peg" 353 | ) 354 | 355 | func main() { 356 | data, err := ioutil.ReadAll(os.Stdin) 357 | if err != nil { 358 | os.Stderr.WriteString(err.Error() + "\n") 359 | os.Exit(1) 360 | } 361 | p, err := _NewParser(string(data)) 362 | if err != nil { 363 | os.Stderr.WriteString(err.Error() + "\n") 364 | os.Exit(1) 365 | } 366 | if pos, _ := _AAccepts(p, 0); pos < 0 { 367 | os.Stderr.WriteString("parse failed") 368 | os.Exit(1) 369 | } 370 | var result struct { 371 | T interface{} 372 | } 373 | _, result.T = _AAction(p, 0) 374 | if err := json.NewEncoder(os.Stdout).Encode(&result); err != nil { 375 | // Hack — we need fmt imported for the type inference test. 376 | // However, if imported, it must be used. 377 | // Here we use it at least once. 378 | fmt.Fprintf(os.Stderr, err.Error() + "\n") 379 | os.Exit(1) 380 | } 381 | } 382 | } 383 | ` 384 | -------------------------------------------------------------------------------- /check.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package main 8 | 9 | import ( 10 | "sort" 11 | ) 12 | 13 | // Check does semantic analysis of the rules, 14 | // setting bookkeeping needed to later generate the parser, 15 | // returning any errors encountered in order of their begin location. 16 | func Check(grammar *Grammar) error { 17 | var errs Errors 18 | rules := expandTemplates(grammar.Rules, &errs) 19 | ruleMap := make(map[string]*Rule, len(rules)) 20 | for i, r := range rules { 21 | r.N = i 22 | name := r.Name.String() 23 | if other := ruleMap[name]; other != nil { 24 | errs.add(r, "rule %s redefined", name) 25 | } 26 | ruleMap[name] = r 27 | } 28 | 29 | var p path 30 | for _, r := range rules { 31 | r.checkLeft(ruleMap, p, &errs) 32 | } 33 | for _, r := range rules { 34 | check(r, ruleMap, &errs) 35 | } 36 | if err := errs.ret(); err != nil { 37 | return err 38 | } 39 | grammar.CheckedRules = rules 40 | return nil 41 | } 42 | 43 | func expandTemplates(ruleDefs []Rule, errs *Errors) []*Rule { 44 | var expanded, todo []*Rule 45 | tmplNames := make(map[string]*Rule) 46 | for i := range ruleDefs { 47 | r := &ruleDefs[i] 48 | if len(r.Name.Args) > 0 { 49 | seenParams := make(map[string]bool) 50 | for _, param := range r.Name.Args { 51 | n := param.String() 52 | if seenParams[n] { 53 | errs.add(param, "parameter %s redefined", n) 54 | } 55 | seenParams[n] = true 56 | } 57 | tmplNames[r.Name.Name.String()] = r 58 | } else { 59 | expanded = append(expanded, r) 60 | todo = append(todo, r) 61 | } 62 | } 63 | 64 | seen := make(map[string]bool) 65 | for i := 0; i < len(todo); i++ { 66 | for _, invok := range invokedTemplates(todo[i]) { 67 | if seen[invok.Name.String()] { 68 | continue 69 | } 70 | seen[invok.Name.String()] = true 71 | tmpl := tmplNames[invok.Name.Name.String()] 72 | if tmpl == nil { 73 | continue // undefined template, error reported elsewhere 74 | } 75 | exp := expand1(tmpl, invok, errs) 76 | if exp == nil { 77 | continue // error expanding, error reported elsewhere 78 | } 79 | todo = append(todo, exp) 80 | expanded = append(expanded, exp) 81 | } 82 | } 83 | return expanded 84 | } 85 | 86 | func expand1(tmpl *Rule, invok *Ident, errs *Errors) *Rule { 87 | if len(invok.Args) != len(tmpl.Args) { 88 | errs.add(invok, "template %s argument count mismatch: got %d, expected %d", 89 | tmpl.Name, len(invok.Args), len(tmpl.Args)) 90 | return nil 91 | } 92 | copy := *tmpl 93 | sub := make(map[string]string, len(tmpl.Args)) 94 | for i, arg := range invok.Args { 95 | sub[tmpl.Args[i].String()] = arg.String() 96 | } 97 | copy.Args = invok.Args 98 | copy.Expr = tmpl.Expr.substitute(sub) 99 | return © 100 | } 101 | 102 | func invokedTemplates(r *Rule) []*Ident { 103 | var tmpls []*Ident 104 | r.Expr.Walk(func(e Expr) bool { 105 | if id, ok := e.(*Ident); ok { 106 | if len(id.Args) > 0 { 107 | tmpls = append(tmpls, id) 108 | } 109 | } 110 | return true 111 | }) 112 | return tmpls 113 | } 114 | 115 | type path struct { 116 | stack []*Rule 117 | seen map[*Rule]bool 118 | } 119 | 120 | func (p *path) push(r *Rule) bool { 121 | if p.seen == nil { 122 | p.seen = make(map[*Rule]bool) 123 | } 124 | if p.seen[r] { 125 | return false 126 | } 127 | p.stack = append(p.stack, r) 128 | p.seen[r] = true 129 | return true 130 | } 131 | 132 | func (p *path) pop() { 133 | p.stack = p.stack[:len(p.stack)] 134 | } 135 | 136 | func (p *path) cycle(r *Rule) []*Rule { 137 | for i := len(p.stack) - 1; i >= 0; i-- { 138 | if p.stack[i] == r { 139 | return append(p.stack[i:], r) 140 | } 141 | } 142 | panic("no cycle") 143 | } 144 | 145 | func cycleString(rules []*Rule) string { 146 | var s string 147 | for _, r := range rules { 148 | if s != "" { 149 | s += ", " 150 | } 151 | s += r.Name.String() 152 | } 153 | return s 154 | } 155 | 156 | func (r *Rule) checkLeft(rules map[string]*Rule, p path, errs *Errors) { 157 | if r.typ != nil { 158 | return 159 | } 160 | if !p.push(r) { 161 | cycle := p.cycle(r) 162 | errs.add(cycle[0], "left-recursion: %s", cycleString(cycle)) 163 | for _, r := range cycle { 164 | r.typ = new(string) 165 | } 166 | return 167 | } 168 | r.Expr.checkLeft(rules, p, errs) 169 | t := r.Expr.Type() 170 | r.typ = &t 171 | r.epsilon = r.Expr.epsilon() 172 | p.pop() 173 | } 174 | 175 | func (e *Choice) checkLeft(rules map[string]*Rule, p path, errs *Errors) { 176 | for _, sub := range e.Exprs { 177 | sub.checkLeft(rules, p, errs) 178 | } 179 | } 180 | 181 | func (e *Action) checkLeft(rules map[string]*Rule, p path, errs *Errors) { 182 | e.Expr.checkLeft(rules, p, errs) 183 | } 184 | 185 | func (e *Sequence) checkLeft(rules map[string]*Rule, p path, errs *Errors) { 186 | for _, sub := range e.Exprs { 187 | sub.checkLeft(rules, p, errs) 188 | if !sub.epsilon() { 189 | break 190 | } 191 | } 192 | } 193 | 194 | func (e *LabelExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) { 195 | e.Expr.checkLeft(rules, p, errs) 196 | } 197 | 198 | func (e *PredExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) { 199 | e.Expr.checkLeft(rules, p, errs) 200 | } 201 | 202 | func (e *RepExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) { 203 | e.Expr.checkLeft(rules, p, errs) 204 | } 205 | 206 | func (e *OptExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) { 207 | e.Expr.checkLeft(rules, p, errs) 208 | } 209 | 210 | func (e *Ident) checkLeft(rules map[string]*Rule, p path, errs *Errors) { 211 | if e.rule = rules[e.Name.String()]; e.rule != nil { 212 | e.rule.checkLeft(rules, p, errs) 213 | } 214 | } 215 | 216 | func (e *SubExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) { 217 | e.Expr.checkLeft(rules, p, errs) 218 | } 219 | 220 | func (e *PredCode) checkLeft(rules map[string]*Rule, p path, errs *Errors) {} 221 | 222 | func (e *Literal) checkLeft(rules map[string]*Rule, p path, errs *Errors) {} 223 | 224 | func (e *CharClass) checkLeft(rules map[string]*Rule, p path, errs *Errors) {} 225 | 226 | func (e *Any) checkLeft(rules map[string]*Rule, p path, errs *Errors) {} 227 | 228 | type ctx struct { 229 | rules map[string]*Rule 230 | allLabels *[]*LabelExpr 231 | curLabels map[string]*LabelExpr 232 | } 233 | 234 | func check(rule *Rule, rules map[string]*Rule, errs *Errors) { 235 | ctx := ctx{ 236 | rules: rules, 237 | allLabels: &rule.Labels, 238 | curLabels: make(map[string]*LabelExpr), 239 | } 240 | rule.Expr.check(ctx, true, errs) 241 | sort.Slice(rule.Labels, func(i, j int) bool { 242 | return rule.Labels[i].N < rule.Labels[j].N 243 | }) 244 | } 245 | 246 | func (e *Choice) check(ctx ctx, valueUsed bool, errs *Errors) { 247 | for _, sub := range e.Exprs { 248 | subCtx := ctx 249 | subCtx.curLabels = make(map[string]*LabelExpr) 250 | for n, l := range ctx.curLabels { 251 | subCtx.curLabels[n] = l 252 | } 253 | sub.check(subCtx, valueUsed, errs) 254 | } 255 | t := e.Exprs[0].Type() 256 | for _, sub := range e.Exprs { 257 | if got := sub.Type(); *genActions && valueUsed && got != t && got != "" && t != "" { 258 | errs.add(sub, "type mismatch: got %s, expected %s", got, t) 259 | } 260 | } 261 | } 262 | 263 | func (e *Action) check(ctx ctx, valueUsed bool, errs *Errors) { 264 | e.Expr.check(ctx, false, errs) 265 | for _, l := range ctx.curLabels { 266 | e.Labels = append(e.Labels, l) 267 | } 268 | sort.Slice(e.Labels, func(i, j int) bool { 269 | return e.Labels[i].Label.String() < e.Labels[j].Label.String() 270 | }) 271 | } 272 | 273 | // BUG: figure out what to do about sequence types. 274 | func (e *Sequence) check(ctx ctx, valueUsed bool, errs *Errors) { 275 | for _, sub := range e.Exprs { 276 | sub.check(ctx, valueUsed, errs) 277 | } 278 | t := e.Exprs[0].Type() 279 | for _, sub := range e.Exprs { 280 | if got := sub.Type(); *genActions && valueUsed && got != t && got != "" && t != "" { 281 | errs.add(sub, "type mismatch: got %s, expected %s", got, t) 282 | } 283 | } 284 | } 285 | 286 | func (e *LabelExpr) check(ctx ctx, valueUsed bool, errs *Errors) { 287 | e.Expr.check(ctx, true, errs) 288 | if _, ok := ctx.curLabels[e.Label.String()]; ok { 289 | errs.add(e.Label, "label %s redefined", e.Label.String()) 290 | } 291 | e.N = len(*ctx.allLabels) 292 | *ctx.allLabels = append(*ctx.allLabels, e) 293 | ctx.curLabels[e.Label.String()] = e 294 | } 295 | 296 | func (e *PredExpr) check(ctx ctx, valueUsed bool, errs *Errors) { 297 | e.Expr.check(ctx, false, errs) 298 | } 299 | 300 | func (e *RepExpr) check(ctx ctx, valueUsed bool, errs *Errors) { 301 | e.Expr.check(ctx, valueUsed, errs) 302 | } 303 | 304 | func (e *OptExpr) check(ctx ctx, valueUsed bool, errs *Errors) { 305 | e.Expr.check(ctx, valueUsed, errs) 306 | } 307 | 308 | func (e *SubExpr) check(ctx ctx, valueUsed bool, errs *Errors) { 309 | e.Expr.check(ctx, valueUsed, errs) 310 | } 311 | 312 | func (e *Ident) check(ctx ctx, _ bool, errs *Errors) { 313 | r, ok := ctx.rules[e.Name.String()] 314 | if !ok { 315 | errs.add(e, "rule %s undefined", e.Name.String()) 316 | } else { 317 | e.rule = r 318 | } 319 | } 320 | 321 | func (e *PredCode) check(ctx ctx, _ bool, _ *Errors) { 322 | for _, l := range ctx.curLabels { 323 | e.Labels = append(e.Labels, l) 324 | } 325 | sort.Slice(e.Labels, func(i, j int) bool { 326 | return e.Labels[i].Label.String() < e.Labels[j].Label.String() 327 | }) 328 | } 329 | 330 | func (e *Literal) check(ctx, bool, *Errors) {} 331 | 332 | func (e *CharClass) check(ctx, bool, *Errors) {} 333 | 334 | func (e *Any) check(ctx, bool, *Errors) {} 335 | -------------------------------------------------------------------------------- /check_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package main 8 | 9 | import ( 10 | "regexp" 11 | "strings" 12 | "testing" 13 | ) 14 | 15 | type checkTest struct { 16 | name string 17 | in string 18 | err string 19 | skipActions bool 20 | } 21 | 22 | func (test checkTest) Run(t *testing.T) { 23 | in := strings.NewReader(test.in) 24 | g, err := Parse(in, "test.file") 25 | if err != nil { 26 | t.Errorf("Parse(%q, _)=_, %v, want _,nil", test.in, err) 27 | return 28 | } 29 | err = Check(g) 30 | if test.err == "" { 31 | if err != nil { 32 | t.Errorf("Check(%q)=%v, want nil", test.in, err) 33 | } 34 | return 35 | } 36 | re := regexp.MustCompile(test.err) 37 | if err == nil || !re.MatchString(err.Error()) { 38 | var e string 39 | if err != nil { 40 | e = err.Error() 41 | } 42 | t.Errorf("Check(%q)=%v, but expected to match %q", 43 | test.in, e, test.err) 44 | return 45 | } 46 | } 47 | 48 | func TestCheck(t *testing.T) { 49 | tests := []checkTest{ 50 | { 51 | name: "empty OK", 52 | in: "", 53 | err: "", 54 | }, 55 | { 56 | name: "various OK", 57 | in: `A <- (G/B C)* 58 | B <- &{pred}* 59 | C <- !{pred}* { return string(act) } 60 | D <- .* !B 61 | E <- C* 62 | F <- "cde"* 63 | G <- [fgh]*`, 64 | err: "", 65 | }, 66 | { 67 | name: "redefined rule", 68 | in: "A <- [x]\nA <- [y]", 69 | err: "^test.file:2.1,2.9: rule A redefined", 70 | }, 71 | { 72 | name: "undefined rule", 73 | in: "A <- B", 74 | err: "^test.file:1.6,1.7: rule B undefined", 75 | }, 76 | { 77 | name: "redefined label", 78 | in: "A <- a:[a] a:[a]", 79 | err: "^test.file:1.12,1.13: label a redefined", 80 | }, 81 | { 82 | name: "non-redefined label with same name in different branch", 83 | in: "A <- a:[a] / (a:[a] / a:[a]) / a:[a]", 84 | err: "", 85 | }, 86 | { 87 | name: "redefined label in same choice branch", 88 | in: "A <- a:[a] / a:[a] a:[a]", 89 | err: "^test.file:1.20,1.21: label a redefined", 90 | }, 91 | { 92 | name: "choice first error", 93 | in: "A <- Undefined / A", 94 | err: ".+", 95 | }, 96 | { 97 | name: "choice second error", 98 | in: "A <- B / Undefined\nB <- [x]", 99 | err: ".+", 100 | }, 101 | { 102 | name: "seq first error", 103 | in: "A <- Undefined A", 104 | err: ".+", 105 | }, 106 | { 107 | name: "sequence second error", 108 | in: "A <- B Undefined\nB <- [x]", 109 | err: ".+", 110 | }, 111 | { 112 | name: "template parameter OK", 113 | in: `A <- x 114 | B <- A 115 | C <- "c"`, 116 | err: "", 117 | }, 118 | { 119 | name: "template parameter redef", 120 | in: `A <- x 121 | B <- A 122 | C <- "c"`, 123 | err: "^test.file:1.6,1.7: parameter x redefined$", 124 | }, 125 | { 126 | name: "template arg count mismatch", 127 | in: `A <- x 128 | B <- A 129 | C <- "c"`, 130 | err: "test.file:2.10,2.16: template A argument count mismatch: got 2, expected 1", 131 | }, 132 | { 133 | name: "multiple errors", 134 | in: "A <- U1 U2\nA <- u:[x] u:[x]", 135 | err: "test.file:1.6,1.8: rule U1 undefined\n" + 136 | "test.file:1.9,1.11: rule U2 undefined\n" + 137 | "test.file:2.1,2.17: rule A redefined\n" + 138 | "test.file:2.12,2.13: label u redefined", 139 | }, 140 | { 141 | name: "right recursion is OK", 142 | in: `A <- "b" B 143 | B <- A`, 144 | }, 145 | { 146 | name: "direct left-recursion", 147 | in: `A <- A`, 148 | err: "^test.file:1.1,1.7: left-recursion: A, A$", 149 | }, 150 | { 151 | name: "indirect left-recursion", 152 | in: `A <- C0 153 | C0 <- C1 154 | C1 <- C2 155 | C2 <- C0`, 156 | err: "^test.file:2.5,2.13: left-recursion: C0, C1, C2, C0$", 157 | }, 158 | { 159 | name: "choice left-recursion", 160 | in: `A <- B / C / D 161 | B <- "b" 162 | C <- "c" 163 | D <- A`, 164 | err: "^test.file:1.1,1.15: left-recursion: A, D, A$", 165 | }, 166 | { 167 | name: "sequence left-recursion", 168 | in: `A <- !B C D E 169 | B <- "b" 170 | C <- !"c" 171 | D <- C # non-consuming through C 172 | E <- A`, 173 | err: "^test.file:1.1,1.14: left-recursion: A, E, A$", 174 | }, 175 | { 176 | name: "various expr left-recursion", 177 | in: `Choice <- "a" / Sequence 178 | Sequence <- SubExpr "b" 179 | SubExpr <- ( PredExpr ) 180 | PredExpr <- &RepExpr 181 | RepExpr <- OptExpr+ 182 | OptExpr <- Action? 183 | Action <- Choice { return "" }`, 184 | err: "^test.file:1.1,1.25: left-recursion: Choice, Sequence, SubExpr, PredExpr, RepExpr, OptExpr, Action, Choice$", 185 | }, 186 | { 187 | name: "templates calling templates", 188 | in: `A <- B 189 | B <- C 190 | C <- "a" D C? 191 | D <- X 192 | X <- "x"`, 193 | err: "", // this should work fine. 194 | }, 195 | { 196 | name: "template left-recursion", 197 | in: `A <- C0 198 | C0 <- C1 199 | C1 <- C2 200 | C2 <- X`, 201 | err: "^test.file:2.5,2.13: left-recursion: C0, C1, C2, C0$", 202 | }, 203 | { 204 | name: "multiple left-recursion errors", 205 | in: `A <- A 206 | B <- C 207 | C <- B`, 208 | err: "^test.file:1.1,1.7: left-recursion: A, A\n" + 209 | "test.file:2.5,2.11: left-recursion: B, C, B$", 210 | }, 211 | { 212 | name: "right-recursion is OK", 213 | in: `A <- B C A? 214 | B <- "b" B / C 215 | C <- "c"`, 216 | err: "", 217 | }, 218 | 219 | { 220 | name: "choice type mismatch", 221 | in: `A <- "a" / "b" { return 5 }`, 222 | err: "^test.file:1.12,1.28: type mismatch: got int, expected string", 223 | }, 224 | { 225 | name: "sequence type mismatch", 226 | in: `A <- "a" ( "b" { return 5 } )`, 227 | err: "^test.file:1.10,1.29: type mismatch: got int, expected string", 228 | }, 229 | { 230 | name: "unused choice, no mismatch", 231 | in: `A <- ( "a" / "b" { return 5 } ) { return 6 }`, 232 | err: "", 233 | }, 234 | { 235 | name: "unused sequence, no mismatch", 236 | in: `A <- "a" ( "b" { return 5 } ) { return 6 }`, 237 | err: "", 238 | }, 239 | { 240 | name: "&-pred subexpression is unused", 241 | in: `A <- "a" !( "b" { return 5 } )`, 242 | err: "", 243 | }, 244 | { 245 | name: "!-pred subexpression is unused", 246 | in: `A <- "a" !( "b" { return 5 } )`, 247 | err: "", 248 | }, 249 | { 250 | name: "multiple type errors", 251 | in: `A <- B ( "c" { return 0 } ) 252 | B <- "b" / ( "c" { return 0 } )`, 253 | err: "^test.file:1.8,1.27: type mismatch: got int, expected string\n" + 254 | "test.file:2.16,2.35: type mismatch: got int, expected string$", 255 | }, 256 | } 257 | for _, test := range tests { 258 | test := test 259 | t.Run(test.name, func(t *testing.T) { 260 | t.Parallel() 261 | test.Run(t) 262 | }) 263 | } 264 | } 265 | 266 | func TestGenActionsFalse(t *testing.T) { 267 | // This set of tests cannot be run in parallel. 268 | *genActions = false 269 | defer func() { *genActions = true }() 270 | 271 | tests := []checkTest{ 272 | { 273 | name: "choice type mismatch: no error", 274 | in: `A <- "a" / "b" { return 5 }`, 275 | }, 276 | { 277 | name: "sequence type mismatch: no error", 278 | in: `A <- "a" ( "b" { return 5 } )`, 279 | }, 280 | } 281 | for _, test := range tests { 282 | t.Run(test.name, test.Run) 283 | } 284 | } 285 | -------------------------------------------------------------------------------- /error.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package main 8 | 9 | import ( 10 | "fmt" 11 | "sort" 12 | ) 13 | 14 | // Located is an interface representing anything located within the input stream. 15 | type Located interface { 16 | Begin() Loc 17 | End() Loc 18 | } 19 | 20 | // Errors implements error, containing multiple errors. 21 | type Errors struct { 22 | Errs []Error 23 | } 24 | 25 | func (err *Errors) ret() error { 26 | if len(err.Errs) == 0 { 27 | return nil 28 | } 29 | sort.Slice(err.Errs, func(i, j int) bool { 30 | return err.Errs[i].Begin().Less(err.Errs[j].Begin()) 31 | }) 32 | return err 33 | } 34 | 35 | func (err *Errors) add(loc Located, format string, args ...interface{}) { 36 | err.Errs = append(err.Errs, Err(loc, format, args...)) 37 | } 38 | 39 | // Error returns the string representation of the Errors, 40 | // which is the string of each Error, one per-line. 41 | func (err Errors) Error() string { 42 | var s string 43 | for i, e := range err.Errs { 44 | if i > 0 { 45 | s += "\n" 46 | } 47 | s += e.Error() 48 | } 49 | return s 50 | } 51 | 52 | // Error is an error tied to an element of the Peggy input file. 53 | type Error struct { 54 | Located 55 | Msg string 56 | } 57 | 58 | func (err Error) Error() string { 59 | b, e := err.Begin(), err.End() 60 | l0, c0 := b.Line, b.Col 61 | l1, c1 := e.Line, e.Col 62 | switch { 63 | case l0 == l1 && c0 == c1: 64 | return fmt.Sprintf("%s:%d.%d: %s", b.File, l0, c0, err.Msg) 65 | default: 66 | return fmt.Sprintf("%s:%d.%d,%d.%d: %s", b.File, l0, c0, l1, c1, err.Msg) 67 | } 68 | } 69 | 70 | // Err returns an error containing the location and formatted message. 71 | func Err(loc Located, format string, args ...interface{}) Error { 72 | return Error{Located: loc, Msg: fmt.Sprintf(format, args...)} 73 | } 74 | -------------------------------------------------------------------------------- /example/calc/calc.peggy: -------------------------------------------------------------------------------- 1 | { 2 | // Calc is an example calculator program. 3 | // You can build it from calc.peggy with 4 | // peggy -o calc.go calc.peggy 5 | package main 6 | 7 | import ( 8 | "bufio" 9 | "fmt" 10 | "math/big" 11 | "os" 12 | "unicode" 13 | "unicode/utf8" 14 | 15 | "github.com/eaburns/peggy/peg" 16 | ) 17 | 18 | func main() { 19 | scanner := bufio.NewScanner(os.Stdin) 20 | for scanner.Scan() { 21 | line := scanner.Text() 22 | p, err := _NewParser(line) 23 | if err != nil { 24 | fmt.Println(err) 25 | os.Exit(1) 26 | } 27 | if pos, perr := _ExprAccepts(p, 0); pos < 0 { 28 | _, fail := _ExprFail(p, 0 ,perr) 29 | fmt.Println(peg.SimpleError(line, fail)) 30 | continue 31 | } 32 | _, result := _ExprAction(p, 0) 33 | fmt.Println((*result).String()) 34 | } 35 | if err := scanner.Err(); err != nil { 36 | fmt.Println(err) 37 | os.Exit(1) 38 | } 39 | } 40 | 41 | type op func(*big.Float, *big.Float, *big.Float) *big.Float 42 | 43 | type tail struct { 44 | op op 45 | r *big.Float 46 | } 47 | 48 | func evalTail(l big.Float, tail []tail) big.Float { 49 | for _, t := range tail { 50 | t.op(&l, &l, t.r) 51 | } 52 | return l 53 | } 54 | 55 | func isSpace(s string) bool { 56 | r, _ := utf8.DecodeRuneInString(s) 57 | return unicode.IsSpace(r) 58 | } 59 | } 60 | 61 | Expr <- s:Sum EOF { return (*big.Float)(&s) } 62 | 63 | Sum <- l:Product tail:SumTail* { return (big.Float)(evalTail(l, tail)) } 64 | 65 | SumTail <- op:AddOp r:Product { return tail{op, &r} } 66 | 67 | AddOp "operator" <- 68 | _ "+" { return op((*big.Float).Add) } / 69 | _ "-" { return op((*big.Float).Sub) } 70 | 71 | Product <- l:Value tail:ProductTail* { return (big.Float)(evalTail(l, tail)) } 72 | 73 | ProductTail <- op:MulOp r:Value { return tail{op, &r} } 74 | 75 | MulOp "operator" <- 76 | _ "*" { return op((*big.Float).Mul) } / 77 | _ "/" { return op((*big.Float).Quo) } 78 | 79 | Value <- Num / _ "(" e:Sum _ ")" { return (big.Float)(e) } 80 | 81 | Num "number" <- _ n:( [0-9]+ ("." [0-9]+)? ) { 82 | var f big.Float 83 | f.Parse(n, 10) 84 | return (big.Float)(f) 85 | } 86 | 87 | _ "space" <- ( s:. &{ isSpace(s) } )* 88 | 89 | EOF "end of file" <- !. -------------------------------------------------------------------------------- /example/label_names/label_names.go: -------------------------------------------------------------------------------- 1 | // Test labels with the same name but in different choice branches. 2 | // peggy -o label_names.go label_names.peggy 3 | package main 4 | 5 | import ( 6 | "bufio" 7 | "fmt" 8 | "os" 9 | 10 | "github.com/eaburns/peggy/peg" 11 | ) 12 | 13 | func main() { 14 | scanner := bufio.NewScanner(os.Stdin) 15 | for scanner.Scan() { 16 | line := scanner.Text() 17 | p, err := _NewParser(line) 18 | if err != nil { 19 | fmt.Println(err) 20 | os.Exit(1) 21 | } 22 | if pos, perr := _ExprAccepts(p, 0); pos < 0 { 23 | _, fail := _ExprFail(p, 0, perr) 24 | fmt.Println(peg.SimpleError(line, fail)) 25 | continue 26 | } 27 | _, result := _ExprAction(p, 0) 28 | fmt.Println(*result) 29 | } 30 | if err := scanner.Err(); err != nil { 31 | fmt.Println(err) 32 | os.Exit(1) 33 | } 34 | } 35 | 36 | const ( 37 | _Expr int = 0 38 | 39 | _N int = 1 40 | ) 41 | 42 | type _Parser struct { 43 | text string 44 | deltaPos [][_N]int32 45 | deltaErr [][_N]int32 46 | node map[_key]*peg.Node 47 | fail map[_key]*peg.Fail 48 | act map[_key]interface{} 49 | lastFail int 50 | data interface{} 51 | } 52 | 53 | type _key struct { 54 | start int 55 | rule int 56 | } 57 | 58 | type tooBigError struct{} 59 | 60 | func (tooBigError) Error() string { return "input is too big" } 61 | 62 | func _NewParser(text string) (*_Parser, error) { 63 | n := len(text) + 1 64 | if n < 0 { 65 | return nil, tooBigError{} 66 | } 67 | p := &_Parser{ 68 | text: text, 69 | deltaPos: make([][_N]int32, n), 70 | deltaErr: make([][_N]int32, n), 71 | node: make(map[_key]*peg.Node), 72 | fail: make(map[_key]*peg.Fail), 73 | act: make(map[_key]interface{}), 74 | } 75 | return p, nil 76 | } 77 | 78 | func _max(a, b int) int { 79 | if a > b { 80 | return a 81 | } 82 | return b 83 | } 84 | 85 | func _memoize(parser *_Parser, rule, start, pos, perr int) (int, int) { 86 | parser.lastFail = perr 87 | derr := perr - start 88 | parser.deltaErr[start][rule] = int32(derr + 1) 89 | if pos >= 0 { 90 | dpos := pos - start 91 | parser.deltaPos[start][rule] = int32(dpos + 1) 92 | return dpos, derr 93 | } 94 | parser.deltaPos[start][rule] = -1 95 | return -1, derr 96 | } 97 | 98 | func _memo(parser *_Parser, rule, start int) (int, int, bool) { 99 | dp := parser.deltaPos[start][rule] 100 | if dp == 0 { 101 | return 0, 0, false 102 | } 103 | if dp > 0 { 104 | dp-- 105 | } 106 | de := parser.deltaErr[start][rule] - 1 107 | return int(dp), int(de), true 108 | } 109 | 110 | func _failMemo(parser *_Parser, rule, start, errPos int) (int, *peg.Fail) { 111 | if start > parser.lastFail { 112 | return -1, &peg.Fail{} 113 | } 114 | dp := parser.deltaPos[start][rule] 115 | de := parser.deltaErr[start][rule] 116 | if start+int(de-1) < errPos { 117 | if dp > 0 { 118 | return start + int(dp-1), &peg.Fail{} 119 | } 120 | return -1, &peg.Fail{} 121 | } 122 | f := parser.fail[_key{start: start, rule: rule}] 123 | if dp < 0 && f != nil { 124 | return -1, f 125 | } 126 | if dp > 0 && f != nil { 127 | return start + int(dp-1), f 128 | } 129 | return start, nil 130 | } 131 | 132 | func _accept(parser *_Parser, f func(*_Parser, int) (int, int), pos, perr *int) bool { 133 | dp, de := f(parser, *pos) 134 | *perr = _max(*perr, *pos+de) 135 | if dp < 0 { 136 | return false 137 | } 138 | *pos += dp 139 | return true 140 | } 141 | 142 | func _node(parser *_Parser, f func(*_Parser, int) (int, *peg.Node), node *peg.Node, pos *int) bool { 143 | p, kid := f(parser, *pos) 144 | if kid == nil { 145 | return false 146 | } 147 | node.Kids = append(node.Kids, kid) 148 | *pos = p 149 | return true 150 | } 151 | 152 | func _fail(parser *_Parser, f func(*_Parser, int, int) (int, *peg.Fail), errPos int, node *peg.Fail, pos *int) bool { 153 | p, kid := f(parser, *pos, errPos) 154 | if kid.Want != "" || len(kid.Kids) > 0 { 155 | node.Kids = append(node.Kids, kid) 156 | } 157 | if p < 0 { 158 | return false 159 | } 160 | *pos = p 161 | return true 162 | } 163 | 164 | func _next(parser *_Parser, pos int) (rune, int) { 165 | r, w := peg.DecodeRuneInString(parser.text[pos:]) 166 | return r, w 167 | } 168 | 169 | func _sub(parser *_Parser, start, end int, kids []*peg.Node) *peg.Node { 170 | node := &peg.Node{ 171 | Text: parser.text[start:end], 172 | Kids: make([]*peg.Node, len(kids)), 173 | } 174 | copy(node.Kids, kids) 175 | return node 176 | } 177 | 178 | func _leaf(parser *_Parser, start, end int) *peg.Node { 179 | return &peg.Node{Text: parser.text[start:end]} 180 | } 181 | 182 | // A no-op function to mark a variable as used. 183 | func use(interface{}) {} 184 | 185 | func _ExprAccepts(parser *_Parser, start int) (deltaPos, deltaErr int) { 186 | var labels [2]string 187 | use(labels) 188 | if dp, de, ok := _memo(parser, _Expr, start); ok { 189 | return dp, de 190 | } 191 | pos, perr := start, -1 192 | // letter:[a] {…}/letter:[b] {…} 193 | { 194 | pos3 := pos 195 | // action 196 | // letter:[a] 197 | { 198 | pos5 := pos 199 | // [a] 200 | if r, w := _next(parser, pos); r != 'a' { 201 | perr = _max(perr, pos) 202 | goto fail4 203 | } else { 204 | pos += w 205 | } 206 | labels[0] = parser.text[pos5:pos] 207 | } 208 | goto ok0 209 | fail4: 210 | pos = pos3 211 | // action 212 | // letter:[b] 213 | { 214 | pos7 := pos 215 | // [b] 216 | if r, w := _next(parser, pos); r != 'b' { 217 | perr = _max(perr, pos) 218 | goto fail6 219 | } else { 220 | pos += w 221 | } 222 | labels[1] = parser.text[pos7:pos] 223 | } 224 | goto ok0 225 | fail6: 226 | pos = pos3 227 | goto fail 228 | ok0: 229 | } 230 | return _memoize(parser, _Expr, start, pos, perr) 231 | fail: 232 | return _memoize(parser, _Expr, start, -1, perr) 233 | } 234 | 235 | func _ExprNode(parser *_Parser, start int) (int, *peg.Node) { 236 | var labels [2]string 237 | use(labels) 238 | dp := parser.deltaPos[start][_Expr] 239 | if dp < 0 { 240 | return -1, nil 241 | } 242 | key := _key{start: start, rule: _Expr} 243 | node := parser.node[key] 244 | if node != nil { 245 | return start + int(dp-1), node 246 | } 247 | pos := start 248 | node = &peg.Node{Name: "Expr"} 249 | // letter:[a] {…}/letter:[b] {…} 250 | { 251 | pos3 := pos 252 | nkids1 := len(node.Kids) 253 | // action 254 | // letter:[a] 255 | { 256 | pos5 := pos 257 | // [a] 258 | if r, w := _next(parser, pos); r != 'a' { 259 | goto fail4 260 | } else { 261 | node.Kids = append(node.Kids, _leaf(parser, pos, pos+w)) 262 | pos += w 263 | } 264 | labels[0] = parser.text[pos5:pos] 265 | } 266 | goto ok0 267 | fail4: 268 | node.Kids = node.Kids[:nkids1] 269 | pos = pos3 270 | // action 271 | // letter:[b] 272 | { 273 | pos7 := pos 274 | // [b] 275 | if r, w := _next(parser, pos); r != 'b' { 276 | goto fail6 277 | } else { 278 | node.Kids = append(node.Kids, _leaf(parser, pos, pos+w)) 279 | pos += w 280 | } 281 | labels[1] = parser.text[pos7:pos] 282 | } 283 | goto ok0 284 | fail6: 285 | node.Kids = node.Kids[:nkids1] 286 | pos = pos3 287 | goto fail 288 | ok0: 289 | } 290 | node.Text = parser.text[start:pos] 291 | parser.node[key] = node 292 | return pos, node 293 | fail: 294 | return -1, nil 295 | } 296 | 297 | func _ExprFail(parser *_Parser, start, errPos int) (int, *peg.Fail) { 298 | var labels [2]string 299 | use(labels) 300 | pos, failure := _failMemo(parser, _Expr, start, errPos) 301 | if failure != nil { 302 | return pos, failure 303 | } 304 | failure = &peg.Fail{ 305 | Name: "Expr", 306 | Pos: int(start), 307 | } 308 | key := _key{start: start, rule: _Expr} 309 | // letter:[a] {…}/letter:[b] {…} 310 | { 311 | pos3 := pos 312 | // action 313 | // letter:[a] 314 | { 315 | pos5 := pos 316 | // [a] 317 | if r, w := _next(parser, pos); r != 'a' { 318 | if pos >= errPos { 319 | failure.Kids = append(failure.Kids, &peg.Fail{ 320 | Pos: int(pos), 321 | Want: "[a]", 322 | }) 323 | } 324 | goto fail4 325 | } else { 326 | pos += w 327 | } 328 | labels[0] = parser.text[pos5:pos] 329 | } 330 | goto ok0 331 | fail4: 332 | pos = pos3 333 | // action 334 | // letter:[b] 335 | { 336 | pos7 := pos 337 | // [b] 338 | if r, w := _next(parser, pos); r != 'b' { 339 | if pos >= errPos { 340 | failure.Kids = append(failure.Kids, &peg.Fail{ 341 | Pos: int(pos), 342 | Want: "[b]", 343 | }) 344 | } 345 | goto fail6 346 | } else { 347 | pos += w 348 | } 349 | labels[1] = parser.text[pos7:pos] 350 | } 351 | goto ok0 352 | fail6: 353 | pos = pos3 354 | goto fail 355 | ok0: 356 | } 357 | parser.fail[key] = failure 358 | return pos, failure 359 | fail: 360 | parser.fail[key] = failure 361 | return -1, failure 362 | } 363 | 364 | func _ExprAction(parser *_Parser, start int) (int, *string) { 365 | var labels [2]string 366 | use(labels) 367 | var label0 string 368 | var label1 string 369 | dp := parser.deltaPos[start][_Expr] 370 | if dp < 0 { 371 | return -1, nil 372 | } 373 | key := _key{start: start, rule: _Expr} 374 | n := parser.act[key] 375 | if n != nil { 376 | n := n.(string) 377 | return start + int(dp-1), &n 378 | } 379 | var node string 380 | pos := start 381 | // letter:[a] {…}/letter:[b] {…} 382 | { 383 | pos3 := pos 384 | var node2 string 385 | // action 386 | { 387 | start5 := pos 388 | // letter:[a] 389 | { 390 | pos6 := pos 391 | // [a] 392 | if r, w := _next(parser, pos); r != 'a' { 393 | goto fail4 394 | } else { 395 | label0 = parser.text[pos : pos+w] 396 | pos += w 397 | } 398 | labels[0] = parser.text[pos6:pos] 399 | } 400 | node = func( 401 | start, end int, letter string) string { 402 | fmt.Printf("a=[%s]\n", letter) 403 | return string(letter) 404 | }( 405 | start5, pos, label0) 406 | } 407 | goto ok0 408 | fail4: 409 | node = node2 410 | pos = pos3 411 | // action 412 | { 413 | start8 := pos 414 | // letter:[b] 415 | { 416 | pos9 := pos 417 | // [b] 418 | if r, w := _next(parser, pos); r != 'b' { 419 | goto fail7 420 | } else { 421 | label1 = parser.text[pos : pos+w] 422 | pos += w 423 | } 424 | labels[1] = parser.text[pos9:pos] 425 | } 426 | node = func( 427 | start, end int, letter string) string { 428 | fmt.Printf("b=[%s]\n", letter) 429 | return string(letter) 430 | }( 431 | start8, pos, label1) 432 | } 433 | goto ok0 434 | fail7: 435 | node = node2 436 | pos = pos3 437 | goto fail 438 | ok0: 439 | } 440 | parser.act[key] = node 441 | return pos, &node 442 | fail: 443 | return -1, nil 444 | } 445 | -------------------------------------------------------------------------------- /example/label_names/label_names.peggy: -------------------------------------------------------------------------------- 1 | { 2 | // Test labels with the same name but in different choice branches. 3 | // peggy -o label_names.go label_names.peggy 4 | package main 5 | 6 | import ( 7 | "bufio" 8 | "fmt" 9 | "os" 10 | 11 | "github.com/eaburns/peggy/peg" 12 | ) 13 | 14 | func main() { 15 | scanner := bufio.NewScanner(os.Stdin) 16 | for scanner.Scan() { 17 | line := scanner.Text() 18 | p, err := _NewParser(line) 19 | if err != nil { 20 | fmt.Println(err) 21 | os.Exit(1) 22 | } 23 | if pos, perr := _ExprAccepts(p, 0); pos < 0 { 24 | _, fail := _ExprFail(p, 0 ,perr) 25 | fmt.Println(peg.SimpleError(line, fail)) 26 | continue 27 | } 28 | _, result := _ExprAction(p, 0) 29 | fmt.Println(*result) 30 | } 31 | if err := scanner.Err(); err != nil { 32 | fmt.Println(err) 33 | os.Exit(1) 34 | } 35 | } 36 | } 37 | 38 | Expr <- 39 | letter:[a] { fmt.Printf("a=[%s]\n", letter); return string(letter) } / 40 | letter:[b] { fmt.Printf("b=[%s]\n", letter); return string(letter) } 41 | -------------------------------------------------------------------------------- /gen.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package main 8 | 9 | import ( 10 | "bytes" 11 | "errors" 12 | "go/format" 13 | "go/parser" 14 | "go/token" 15 | "io" 16 | "os" 17 | "reflect" 18 | "strconv" 19 | "text/template" 20 | ) 21 | 22 | // Generate generates a parser for the rules, 23 | // using a default Config: 24 | // Config{Prefix: "_"} 25 | func Generate(w io.Writer, file string, grammar *Grammar) error { 26 | return Config{Prefix: "_"}.Generate(w, file, grammar) 27 | } 28 | 29 | // A Config specifies code generation options. 30 | type Config struct { 31 | Prefix string 32 | } 33 | 34 | // Generate generates a parser for the rules. 35 | func (c Config) Generate(w io.Writer, file string, gr *Grammar) error { 36 | b := bytes.NewBuffer(nil) 37 | if err := writePrelude(b, file, gr); err != nil { 38 | return err 39 | } 40 | if err := writeDecls(b, c, gr); err != nil { 41 | return err 42 | } 43 | for _, r := range gr.CheckedRules { 44 | if err := writeRule(b, c, r); err != nil { 45 | return err 46 | } 47 | } 48 | return gofmt(w, b.String()) 49 | } 50 | 51 | func gofmt(w io.Writer, s string) error { 52 | fset := token.NewFileSet() 53 | root, err := parser.ParseFile(fset, "", s, parser.ParseComments) 54 | if err != nil { 55 | io.WriteString(os.Stderr, s) 56 | io.WriteString(w, s) 57 | return err 58 | } 59 | if err := format.Node(w, fset, root); err != nil { 60 | io.WriteString(w, s) 61 | return err 62 | } 63 | return nil 64 | } 65 | 66 | func writePrelude(w io.Writer, file string, gr *Grammar) error { 67 | if gr.Prelude == nil { 68 | return nil 69 | } 70 | _, err := io.WriteString(w, gr.Prelude.String()) 71 | return err 72 | } 73 | 74 | func writeDecls(w io.Writer, c Config, gr *Grammar) error { 75 | tmp, err := template.New("Decls").Parse(declsTemplate) 76 | if err != nil { 77 | return err 78 | } 79 | return tmp.Execute(w, map[string]interface{}{ 80 | "Config": c, 81 | "Grammar": gr, 82 | }) 83 | } 84 | 85 | func writeRule(w io.Writer, c Config, r *Rule) error { 86 | funcs := map[string]interface{}{ 87 | "gen": gen, 88 | "quote": strconv.Quote, 89 | "makeAcceptState": func(r *Rule) state { 90 | return state{ 91 | Config: c, 92 | Rule: r, 93 | n: new(int), 94 | AcceptsPass: true, 95 | } 96 | }, 97 | "makeNodeState": func(r *Rule) state { 98 | return state{ 99 | Config: c, 100 | Rule: r, 101 | n: new(int), 102 | NodePass: true, 103 | } 104 | }, 105 | "makeFailState": func(r *Rule) state { 106 | return state{ 107 | Config: c, 108 | Rule: r, 109 | n: new(int), 110 | FailPass: true, 111 | } 112 | }, 113 | "makeActionState": func(r *Rule) state { 114 | return state{ 115 | Config: c, 116 | Rule: r, 117 | n: new(int), 118 | ActionPass: true, 119 | } 120 | }, 121 | } 122 | data := map[string]interface{}{ 123 | "Config": c, 124 | "Rule": r, 125 | "GenActions": *genActions, 126 | "GenParseTree": *genParseTree, 127 | } 128 | tmp, err := template.New("rule").Parse(ruleTemplate) 129 | if err != nil { 130 | return err 131 | } 132 | for _, ts := range [][2]string{ 133 | {"ruleAccepts", ruleAccepts}, 134 | {"ruleNode", ruleNode}, 135 | {"ruleFail", ruleFail}, 136 | {"stringLabels", stringLabels}, 137 | {"ruleAction", ruleAction}, 138 | } { 139 | name, text := ts[0], ts[1] 140 | tmp, err = tmp.New(name).Funcs(funcs).Parse(text) 141 | if err != nil { 142 | return err 143 | } 144 | } 145 | return tmp.ExecuteTemplate(w, "rule", data) 146 | } 147 | 148 | type state struct { 149 | Config 150 | Rule *Rule 151 | Expr Expr 152 | Fail string 153 | // Node is the ident into which to assign action-pass value, or "". 154 | Node string 155 | n *int 156 | // AcceptsPass indicates whether to generate the accepts pass. 157 | AcceptsPass bool 158 | // NodePass indicates whether to generate the node pass. 159 | NodePass bool 160 | // FailPass indicates whether to generate the error pass. 161 | FailPass bool 162 | // ActionPass indicates whether to generate the action pass. 163 | ActionPass bool 164 | } 165 | 166 | func (s state) id(str string) string { 167 | (*s.n)++ 168 | return str + strconv.Itoa(*s.n-1) 169 | } 170 | 171 | func gen(parentState state, expr Expr, node, fail string) (string, error) { 172 | t := reflect.TypeOf(expr) 173 | tmpString, ok := templates[reflect.TypeOf(expr)] 174 | if !ok { 175 | return "", errors.New("gen not found: " + t.String()) 176 | } 177 | funcs := map[string]interface{}{ 178 | "quote": strconv.Quote, 179 | "quoteRune": strconv.QuoteRune, 180 | "id": parentState.id, 181 | "gen": gen, 182 | "last": func(i int, exprs []Expr) bool { return i == len(exprs)-1 }, 183 | } 184 | tmp, err := template.New(t.String()).Funcs(funcs).Parse(tmpString) 185 | if err != nil { 186 | return "", err 187 | } 188 | if err := addGlobalTemplates(tmp); err != nil { 189 | return "", err 190 | } 191 | b := bytes.NewBuffer(nil) 192 | state := parentState 193 | state.Expr = expr 194 | state.Fail = fail 195 | state.Node = node 196 | err = tmp.Execute(b, state) 197 | return b.String(), err 198 | } 199 | 200 | var globalTemplates = [][2]string{ 201 | {"charClassCondition", charClassCondition}, 202 | } 203 | 204 | func addGlobalTemplates(tmp *template.Template) error { 205 | for _, p := range globalTemplates { 206 | var err error 207 | if tmp, err = tmp.New(p[0]).Parse(p[1]); err != nil { 208 | return err 209 | } 210 | } 211 | return nil 212 | } 213 | 214 | // A note on formatting in Expr templates 215 | // 216 | // gofmt properly fixes any horizontal spacing issues. 217 | // However, while it eliminates duplicate empty lines, 218 | // it does not eliminate empty lines. 219 | // For example, it will convert a sequence of 2 or more empty lines 220 | // into a single empty line, but it will not remove the empty line. 221 | // So it's important to handle newlines propertly 222 | // to maintain a nice, consistent formatting. 223 | // 224 | // There are two rules: 225 | // 1) Templates must end with a newline, or the codegen will be invalid. 226 | // 2) Templates should not begin with an newline, or the codegen will be ugly. 227 | 228 | var declsTemplate = ` 229 | {{$pre := $.Config.Prefix -}} 230 | 231 | const ( 232 | {{range $r := $.Grammar.CheckedRules -}} 233 | {{$pre}}{{$r.Name.Ident}} int = {{$r.N}} 234 | {{end}} 235 | {{$pre}}N int = {{len $.Grammar.CheckedRules}} 236 | ) 237 | 238 | type {{$pre}}Parser struct { 239 | text string 240 | deltaPos [][{{$pre}}N]int32 241 | deltaErr [][{{$pre}}N]int32 242 | node map[{{$pre}}key]*peg.Node 243 | fail map[{{$pre}}key]*peg.Fail 244 | act map[{{$pre}}key]interface{} 245 | lastFail int 246 | data interface{} 247 | } 248 | 249 | type {{$pre}}key struct { 250 | start int 251 | rule int 252 | } 253 | 254 | type tooBigError struct{} 255 | func (tooBigError) Error() string { return "input is too big" } 256 | 257 | func {{$pre}}NewParser(text string) (*{{$pre}}Parser, error) { 258 | n := len(text)+1 259 | if n < 0 { 260 | return nil, tooBigError{} 261 | } 262 | p := &{{$pre}}Parser{ 263 | text: text, 264 | deltaPos: make([][{{$pre}}N]int32, n), 265 | deltaErr: make([][{{$pre}}N]int32, n), 266 | node: make(map[{{$pre}}key]*peg.Node), 267 | fail: make(map[{{$pre}}key]*peg.Fail), 268 | act: make(map[{{$pre}}key]interface{}), 269 | } 270 | return p, nil 271 | } 272 | 273 | func {{$pre}}max(a, b int) int { 274 | if a > b { 275 | return a 276 | } 277 | return b 278 | } 279 | 280 | func {{$pre}}memoize(parser *{{$pre}}Parser, rule, start, pos, perr int) (int, int) { 281 | parser.lastFail = perr 282 | derr := perr - start 283 | parser.deltaErr[start][rule] = int32(derr+1) 284 | if pos >= 0 { 285 | dpos := pos - start 286 | parser.deltaPos[start][rule] = int32(dpos + 1) 287 | return dpos, derr 288 | } 289 | parser.deltaPos[start][rule] = -1 290 | return -1, derr 291 | } 292 | 293 | func {{$pre}}memo(parser *{{$pre}}Parser, rule, start int) (int, int, bool) { 294 | dp := parser.deltaPos[start][rule] 295 | if dp == 0 { 296 | return 0, 0, false 297 | } 298 | if dp > 0 { 299 | dp-- 300 | } 301 | de := parser.deltaErr[start][rule] - 1 302 | return int(dp), int(de), true 303 | } 304 | 305 | func {{$pre}}failMemo(parser *{{$pre}}Parser, rule, start, errPos int) (int, *peg.Fail) { 306 | if start > parser.lastFail { 307 | return -1, &peg.Fail{} 308 | } 309 | dp := parser.deltaPos[start][rule] 310 | de := parser.deltaErr[start][rule] 311 | if start+int(de-1) < errPos { 312 | if dp > 0 { 313 | return start + int(dp-1), &peg.Fail{} 314 | } 315 | return -1, &peg.Fail{} 316 | } 317 | f := parser.fail[_key{start: start, rule: rule}] 318 | if dp < 0 && f != nil { 319 | return -1, f 320 | } 321 | if dp > 0 && f != nil { 322 | return start + int(dp-1), f 323 | } 324 | return start, nil 325 | } 326 | 327 | func {{$pre}}accept(parser *{{$pre}}Parser, f func(*{{$pre}}Parser, int) (int, int), pos, perr *int) bool { 328 | dp, de := f(parser, *pos) 329 | *perr = _max(*perr, *pos+de) 330 | if dp < 0 { 331 | return false 332 | } 333 | *pos += dp 334 | return true 335 | } 336 | 337 | func {{$pre}}node(parser *{{$pre}}Parser, f func(*{{$pre}}Parser, int) (int, *peg.Node), node *peg.Node, pos *int) bool { 338 | p, kid := f(parser, *pos) 339 | if kid == nil { 340 | return false 341 | } 342 | node.Kids = append(node.Kids, kid) 343 | *pos = p 344 | return true 345 | } 346 | 347 | func {{$pre}}fail(parser *{{$pre}}Parser, f func(*{{$pre}}Parser, int, int) (int, *peg.Fail), errPos int, node *peg.Fail, pos *int) bool { 348 | p, kid := f(parser, *pos, errPos) 349 | if kid.Want != "" || len(kid.Kids) > 0 { 350 | node.Kids = append(node.Kids, kid) 351 | } 352 | if p < 0 { 353 | return false 354 | } 355 | *pos = p 356 | return true 357 | } 358 | 359 | func {{$pre}}next(parser *{{$pre}}Parser, pos int) (rune, int) { 360 | r, w := peg.DecodeRuneInString(parser.text[pos:]) 361 | return r, w 362 | } 363 | 364 | func {{$pre}}sub(parser *{{$pre}}Parser, start, end int, kids []*peg.Node) *peg.Node { 365 | node := &peg.Node{ 366 | Text: parser.text[start:end], 367 | Kids: make([]*peg.Node, len(kids)), 368 | } 369 | copy(node.Kids, kids) 370 | return node 371 | } 372 | 373 | func {{$pre}}leaf(parser *{{$pre}}Parser, start, end int) *peg.Node { 374 | return &peg.Node{Text: parser.text[start:end]} 375 | } 376 | 377 | // A no-op function to mark a variable as used. 378 | func use(interface{}) {} 379 | ` 380 | 381 | // templates contains a mapping from Expr types to their templates. 382 | // These templates parse the input text and compute 383 | // for each pair encountered by the parse, 384 | // the position immediately following the text accepted by the rule, 385 | // or the position of the furthest error encountered by the rule. 386 | // 387 | // When generating the parse tree pass, 388 | // the templates also add peg.Nodes to the kids slice. 389 | // 390 | // Variables for use by the templates: 391 | // parser is the *Parser. 392 | // parser.text is the input text. 393 | // pos is the byte offset into parser.text of where to begin parsing. 394 | // If the Expr fails to parse, pos must be set to the position of the error. 395 | // If if the Expr succeeds to parse, pos must be set 396 | // to the position just after the accepted text. 397 | // 398 | // On the accepts pass these variables are also defined: 399 | // perr is the position of the max error position found so far. 400 | // It is only defined if Rule.Expr.CanFail. 401 | // It is initialized to -1 at the beginning of the parse. 402 | // It is updated by Choice nodes when branches fail, 403 | // and by rules when their entire parse fails. 404 | // ok is a scratch boolean variable. 405 | // It may be either true or false before and after each Expr template. 406 | // Each template that wants to use ok must set it before using it. 407 | // 408 | // On the node tree pass these variables are also defined: 409 | // node is the *peg.Node of the Rule being parsed. 410 | // 411 | // On the action tree pass these variables are also defined: 412 | // node is an interface{} containing the current action tree node value. 413 | // 414 | // On the fail tree pass these variables are also defined: 415 | // failure is the *peg.Fail of the Rule being parsed. 416 | // errPos is the position before which Fail nodes are not generated. 417 | var templates = map[reflect.Type]string{ 418 | reflect.TypeOf(&Choice{}): choiceTemplate, 419 | reflect.TypeOf(&Action{}): actionTemplate, 420 | reflect.TypeOf(&Sequence{}): sequenceTemplate, 421 | reflect.TypeOf(&LabelExpr{}): labelExprTemplate, 422 | reflect.TypeOf(&PredExpr{}): predExprTemplate, 423 | reflect.TypeOf(&RepExpr{}): repExprTemplate, 424 | reflect.TypeOf(&OptExpr{}): optExprTemplate, 425 | reflect.TypeOf(&SubExpr{}): subExprTemplate, 426 | reflect.TypeOf(&PredCode{}): predCodeTemplate, 427 | reflect.TypeOf(&Ident{}): identTemplate, 428 | reflect.TypeOf(&Literal{}): literalTemplate, 429 | reflect.TypeOf(&Any{}): anyTemplate, 430 | reflect.TypeOf(&CharClass{}): charClassTemplate, 431 | } 432 | 433 | var ruleTemplate = ` 434 | {{template "ruleAccepts" $}} 435 | {{if $.GenParseTree -}} 436 | {{template "ruleNode" $}} 437 | {{end -}} 438 | {{template "ruleFail" $}} 439 | {{if $.GenActions -}} 440 | {{template "ruleAction" $}} 441 | {{end -}} 442 | ` 443 | 444 | var stringLabels = ` 445 | {{- if $.Rule.Labels -}} 446 | var labels [{{len $.Rule.Labels}}]string 447 | use(labels) 448 | {{- end -}} 449 | ` 450 | 451 | var ruleAccepts = ` 452 | {{$pre := $.Config.Prefix -}} 453 | {{- $id := $.Rule.Name.Ident -}} 454 | func {{$pre}}{{$id}}Accepts(parser *{{$pre}}Parser, start int) (deltaPos, deltaErr int) { 455 | {{- template "stringLabels" $}} 456 | if dp, de, ok := {{$pre}}memo(parser, {{$pre}}{{$id}}, start); ok { 457 | return dp, de 458 | } 459 | pos, perr := start, -1 460 | {{gen (makeAcceptState $.Rule) $.Rule.Expr "" "fail" -}} 461 | 462 | {{if $.Rule.ErrorName -}} 463 | perr = start 464 | {{end -}} 465 | return {{$pre}}memoize(parser, {{$pre}}{{$id}}, start, pos, perr) 466 | {{if $.Rule.Expr.CanFail -}} 467 | fail: 468 | return {{$pre}}memoize(parser, {{$pre}}{{$id}}, start, -1, perr) 469 | {{end -}} 470 | } 471 | ` 472 | 473 | var ruleNode = ` 474 | {{$pre := $.Config.Prefix -}} 475 | {{- $id := $.Rule.Name.Ident -}} 476 | {{- $name := $.Rule.Name.String -}} 477 | func {{$pre}}{{$id}}Node(parser *{{$pre}}Parser, start int) (int, *peg.Node) { 478 | {{- template "stringLabels" $}} 479 | dp := parser.deltaPos[start][{{$pre}}{{$id}}] 480 | if dp < 0 { 481 | return -1, nil 482 | } 483 | key := {{$pre}}key{start: start, rule: {{$pre}}{{$id}}} 484 | node := parser.node[key] 485 | if node != nil { 486 | return start + int(dp - 1), node 487 | } 488 | pos := start 489 | node = &peg.Node{Name: {{quote $name}}} 490 | {{gen (makeNodeState $.Rule) $.Rule.Expr "" "fail" -}} 491 | 492 | node.Text = parser.text[start:pos] 493 | parser.node[key] = node 494 | return pos, node 495 | {{if $.Rule.Expr.CanFail -}} 496 | fail: 497 | return -1, nil 498 | {{end -}} 499 | } 500 | ` 501 | 502 | var ruleFail = ` 503 | {{$pre := $.Config.Prefix -}} 504 | {{- $id := $.Rule.Name.Ident -}} 505 | func {{$pre}}{{$id}}Fail(parser *{{$pre}}Parser, start, errPos int) (int, *peg.Fail) { 506 | {{- template "stringLabels" $}} 507 | pos, failure := {{$pre}}failMemo(parser, {{$pre}}{{$id}}, start, errPos) 508 | if failure != nil { 509 | return pos, failure 510 | } 511 | failure = &peg.Fail{ 512 | Name: {{quote $id}}, 513 | Pos: int(start), 514 | } 515 | key := {{$pre}}key{start: start, rule: {{$pre}}{{$id}}} 516 | {{gen (makeFailState $.Rule) $.Rule.Expr "" "fail" -}} 517 | 518 | {{if $.Rule.ErrorName -}} 519 | failure.Kids = nil 520 | {{end -}} 521 | parser.fail[key] = failure 522 | return pos, failure 523 | {{if $.Rule.Expr.CanFail -}} 524 | fail: 525 | {{if $.Rule.ErrorName -}} 526 | failure.Kids = nil 527 | failure.Want = {{quote $.Rule.ErrorName.String}} 528 | {{end -}} 529 | parser.fail[key] = failure 530 | return -1, failure 531 | {{end -}} 532 | } 533 | ` 534 | 535 | var ruleAction = ` 536 | {{$pre := $.Config.Prefix -}} 537 | {{- $id := $.Rule.Name.Ident -}} 538 | {{- $type := $.Rule.Expr.Type -}} 539 | func {{$pre}}{{$id}}Action(parser *{{$pre}}Parser, start int) (int, *{{$type}}) { 540 | {{- template "stringLabels" $}} 541 | {{if $.Rule.Labels -}} 542 | {{range $l := $.Rule.Labels -}} 543 | var label{{$l.N}} {{$l.Type}} 544 | {{end}} 545 | {{- end -}} 546 | dp := parser.deltaPos[start][{{$pre}}{{$id}}] 547 | if dp < 0 { 548 | return -1, nil 549 | } 550 | key := {{$pre}}key{start: start, rule: {{$pre}}{{$id}}} 551 | n := parser.act[key] 552 | if n != nil { 553 | n := n.({{$type}}) 554 | return start + int(dp - 1), &n 555 | } 556 | var node {{$type}} 557 | pos := start 558 | {{gen (makeActionState $.Rule) $.Rule.Expr "node" "fail" -}} 559 | 560 | parser.act[key] = node 561 | return pos, &node 562 | {{if $.Rule.Expr.CanFail -}} 563 | fail: 564 | return -1, nil 565 | {{end -}} 566 | } 567 | ` 568 | 569 | var choiceTemplate = `// {{$.Expr.String}} 570 | { 571 | {{- $ok := id "ok" -}} 572 | {{- $nkids := id "nkids" -}} 573 | {{- $node0 := id "node" -}} 574 | {{- $pos0 := id "pos" -}} 575 | {{$pos0}} := pos 576 | {{if $.NodePass -}} 577 | {{$nkids}} := len(node.Kids) 578 | {{else if (and $.Node $.ActionPass) -}} 579 | var {{$node0}} {{$.Expr.Type}} 580 | {{end -}} 581 | {{- range $i, $subExpr := $.Expr.Exprs -}} 582 | {{- $fail := id "fail" -}} 583 | {{gen $ $subExpr $.Node $fail -}} 584 | 585 | {{if $subExpr.CanFail -}} 586 | goto {{$ok}} 587 | {{$fail}}: 588 | {{if $.NodePass -}} 589 | node.Kids = node.Kids[:{{$nkids}}] 590 | {{else if (and $.Node $.ActionPass) -}} 591 | {{$.Node}} = {{$node0}} 592 | {{end -}} 593 | pos = {{$pos0}} 594 | {{if last $i $.Expr.Exprs -}} 595 | goto {{$.Fail}} 596 | {{end -}} 597 | {{end -}} 598 | {{end -}} 599 | {{$ok}}: 600 | } 601 | ` 602 | 603 | var actionTemplate = `// action 604 | {{if $.ActionPass -}} 605 | { 606 | {{$start := id "start" -}} 607 | {{$start}} := pos 608 | {{gen $ $.Expr.Expr "" $.Fail -}} 609 | {{/* TODO: don't put the func in the scope of the rule. */ -}} 610 | {{if $.Node}}{{$.Node}} = {{end}} func( 611 | start, end int, 612 | {{- if $.Expr.Labels -}} 613 | {{range $lexpr := $.Expr.Labels -}} 614 | {{$lexpr.Label}} {{$lexpr.Type}}, 615 | {{- end -}} 616 | {{- end -}}) 617 | {{- $.Expr.Type}} { {{$.Expr.Code}} }( 618 | {{$start}}, pos, 619 | {{- if $.Expr.Labels -}} 620 | {{range $lexpr := $.Expr.Labels -}} 621 | label{{$lexpr.N}}, 622 | {{- end -}} 623 | {{- end -}} 624 | ) 625 | } 626 | {{else -}} 627 | {{gen $ $.Expr.Expr "" $.Fail -}} 628 | {{end -}} 629 | ` 630 | 631 | var sequenceTemplate = `// {{$.Expr.String}} 632 | {{$node := id "node" -}} 633 | {{if (and $.ActionPass $.Node (eq $.Expr.Type "string")) -}} 634 | { 635 | var {{$node}} string 636 | {{else if (and $.ActionPass $.Node) -}} 637 | {{$.Node}} = make({{$.Expr.Type}}, {{len $.Expr.Exprs}}) 638 | {{end -}} 639 | 640 | {{range $i, $subExpr := $.Expr.Exprs -}} 641 | {{if (and $.ActionPass $.Node (eq $.Expr.Type "string")) -}} 642 | {{gen $ $subExpr $node $.Fail -}} 643 | {{$.Node}}, {{$node}} = {{$.Node}}+{{$node}}, "" 644 | {{else if (and $.ActionPass $.Node) -}} 645 | {{gen $ $subExpr (printf "%s[%d]" $.Node $i) $.Fail -}} 646 | {{else -}} 647 | {{gen $ $subExpr "" $.Fail -}} 648 | {{end -}} 649 | {{end -}} 650 | 651 | {{if (and $.ActionPass $.Node (eq $.Expr.Type "string")) -}} 652 | } 653 | {{end -}} 654 | ` 655 | 656 | var labelExprTemplate = `// {{$.Expr.String}} 657 | {{$name := $.Expr.Label.String -}} 658 | {{- $pos0 := id "pos" -}} 659 | {{- $subExpr := $.Expr.Expr -}} 660 | { 661 | {{$pos0}} := pos 662 | {{if $.ActionPass -}} 663 | {{gen $ $subExpr (printf "label%d" $.Expr.N) $.Fail -}} 664 | {{if $.Node -}} 665 | {{$.Node}} = label{{$.Expr.N}} 666 | {{end -}} 667 | {{else -}} 668 | {{gen $ $subExpr "" $.Fail -}} 669 | {{end -}} 670 | labels[{{$.Expr.N}}] = parser.text[{{$pos0}}:pos] 671 | } 672 | ` 673 | 674 | var predExprTemplate = `// {{$.Expr.String}} 675 | { 676 | {{- $pre := $.Config.Prefix -}} 677 | {{- $ok := id "ok" -}} 678 | {{- $subExpr := $.Expr.Expr -}} 679 | {{- $pos0 := id "pos" -}} 680 | {{- $nkids := id "nkids" -}} 681 | {{- $perr0 := id "perr" -}} 682 | {{$pos0}} := pos 683 | {{if $.AcceptsPass -}} 684 | {{$perr0}} := perr 685 | {{else if $.NodePass -}} 686 | {{$nkids}} := len(node.Kids) 687 | {{else if $.FailPass -}} 688 | {{$nkids}} := len(failure.Kids) 689 | {{end -}} 690 | 691 | {{- if $.Expr.Neg -}} 692 | {{gen $ $subExpr "" $ok -}} 693 | pos = {{$pos0}} 694 | {{if $.NodePass -}} 695 | node.Kids = node.Kids[:{{$nkids}}] 696 | {{else if $.AcceptsPass -}} 697 | perr = {{$pre}}max({{$perr0}}, pos) 698 | {{else if $.FailPass -}} 699 | failure.Kids = failure.Kids[:{{$nkids}}] 700 | if pos >= errPos { 701 | failure.Kids = append(failure.Kids, &peg.Fail{ 702 | Pos: int(pos), 703 | Want: {{quote $.Expr.String}}, 704 | }) 705 | } 706 | {{end -}} 707 | goto {{$.Fail}} 708 | {{else -}} 709 | {{- $fail := id "fail" -}} 710 | {{gen $ $subExpr "" $fail -}} 711 | goto {{$ok}} 712 | {{$fail}}: 713 | pos = {{$pos0}} 714 | {{if $.AcceptsPass -}} 715 | perr = {{$pre}}max({{$perr0}}, pos) 716 | {{else if $.FailPass -}} 717 | failure.Kids = failure.Kids[:{{$nkids}}] 718 | if pos >= errPos { 719 | failure.Kids = append(failure.Kids, &peg.Fail{ 720 | Pos: int(pos), 721 | Want: {{quote $.Expr.String}}, 722 | }) 723 | } 724 | {{end -}} 725 | goto {{$.Fail}} 726 | {{end -}} 727 | 728 | {{$ok}}: 729 | pos = {{$pos0}} 730 | {{if $.AcceptsPass -}} 731 | perr = {{$perr0}} 732 | {{else if $.NodePass -}} 733 | node.Kids = node.Kids[:{{$nkids}}] 734 | {{else if $.FailPass -}} 735 | failure.Kids = failure.Kids[:{{$nkids}}] 736 | {{else if (and $.ActionPass $.Node) -}} 737 | {{$.Node}} = "" 738 | {{end -}} 739 | } 740 | ` 741 | 742 | var repExprTemplate = `// {{$.Expr.String}} 743 | {{$nkids := id "nkids" -}} 744 | {{$pos0 := id "pos" -}} 745 | {{$node := id "node" -}} 746 | {{- $fail := id "fail" -}} 747 | {{- $subExpr := $.Expr.Expr -}} 748 | {{if eq $.Expr.Op '+' -}} 749 | {{if (and $.ActionPass $.Node) -}} 750 | { 751 | var {{$node}} {{$subExpr.Type}} 752 | {{gen $ $subExpr $node $.Fail -}} 753 | {{if (eq $.Expr.Type "string") -}} 754 | {{$.Node}} += {{$node}} 755 | {{else -}} 756 | {{$.Node}} = append({{$.Node}}, {{$node}}) 757 | {{end -}} 758 | } 759 | {{else -}} 760 | {{gen $ $subExpr "" $.Fail -}} 761 | {{end -}} 762 | {{end -}} 763 | for { 764 | {{if $.NodePass -}} 765 | {{$nkids}} := len(node.Kids) 766 | {{end -}} 767 | {{$pos0}} := pos 768 | {{if (and $.ActionPass $.Node) -}} 769 | var {{$node}} {{$subExpr.Type}} 770 | {{gen $ $subExpr $node $fail -}} 771 | {{if (eq $.Expr.Type "string") -}} 772 | {{$.Node}} += {{$node}} 773 | {{else -}} 774 | {{$.Node}} = append({{$.Node}}, {{$node}}) 775 | {{end -}} 776 | {{else -}} 777 | {{gen $ $subExpr "" $fail -}} 778 | {{end -}} 779 | continue 780 | {{$fail}}: 781 | {{if $.NodePass -}} 782 | node.Kids = node.Kids[:{{$nkids}}] 783 | {{end -}} 784 | pos = {{$pos0}} 785 | break 786 | } 787 | ` 788 | 789 | var optExprTemplate = `// {{$.Expr.String}} 790 | {{$nkids := id "nkids" -}} 791 | {{$pos0 := id "pos" -}} 792 | {{- $fail := id "fail" -}} 793 | {{- $subExpr := $.Expr.Expr -}} 794 | {{- if $subExpr.CanFail -}} 795 | { 796 | {{if $.NodePass -}} 797 | {{$nkids}} := len(node.Kids) 798 | {{end -}} 799 | {{$pos0}} := pos 800 | {{if (and $.ActionPass $.Node (eq $subExpr.Type "string")) -}} 801 | {{gen $ $subExpr $.Node $fail -}} 802 | {{else if (and $.ActionPass $.Node) -}} 803 | {{$.Node}} = new({{$subExpr.Type}}) 804 | {{gen $ $subExpr (printf "*%s" $.Node) $fail -}} 805 | {{else -}} 806 | {{gen $ $subExpr "" $fail -}} 807 | {{end -}} 808 | {{- $ok := id "ok" -}} 809 | goto {{$ok}} 810 | {{$fail}}: 811 | {{if $.NodePass -}} 812 | node.Kids = node.Kids[:{{$nkids}}] 813 | {{else if (and $.ActionPass $.Node (eq $subExpr.Type "string")) -}} 814 | {{$.Node}} = "" 815 | {{else if (and $.ActionPass $.Node) -}} 816 | {{$.Node}} = nil 817 | {{end -}} 818 | pos = {{$pos0}} 819 | {{$ok}}: 820 | } 821 | {{else -}} 822 | {{- /* TODO: disallow this case in check */ -}} 823 | {{gen $ $subExpr $fail -}} 824 | {{- end -}} 825 | ` 826 | 827 | var subExprTemplate = `// {{$.Expr.String}} 828 | {{if $.NodePass -}} 829 | { 830 | {{- $pre := $.Config.Prefix -}} 831 | {{$nkids := id "nkids" -}} 832 | {{$nkids}} := len(node.Kids) 833 | {{$pos0 := id "pos0" -}} 834 | {{$pos0}} := pos 835 | {{gen $ $.Expr.Expr $.Node $.Fail -}} 836 | sub := {{$pre}}sub(parser, {{$pos0}}, pos, node.Kids[{{$nkids}}:]) 837 | node.Kids = append(node.Kids[:{{$nkids}}], sub) 838 | } 839 | {{else -}} 840 | {{gen $ $.Expr.Expr $.Node $.Fail -}} 841 | {{end -}} 842 | ` 843 | 844 | // TODO: instead, create a function for each predicate 845 | // with params that are the parser followed by 846 | // a string for each defined label. 847 | // Predicate code shouldn't have access to the label.Kids, 848 | // because it's undefined for the Accepts and Fail pass. 849 | // NOTE: kids are OK for actions, 850 | // because actions are only to be called by the Node pass 851 | // on a successful parse. 852 | var predCodeTemplate = `// pred code 853 | if ok := func( 854 | {{- if $.Expr.Labels -}} 855 | {{range $lexpr := $.Expr.Labels -}} 856 | {{$lexpr.Label}} string, 857 | {{- end -}} 858 | {{- end -}}) bool { return {{$.Expr.Code}} }( 859 | {{- if $.Expr.Labels -}} 860 | {{range $lexpr := $.Expr.Labels -}} 861 | labels[{{$lexpr.N}}], 862 | {{- end -}} 863 | {{- end -}} 864 | ); {{if not $.Expr.Neg}}!{{end}}ok { 865 | {{if $.AcceptsPass -}} 866 | {{- $pre := $.Config.Prefix -}} 867 | perr = {{$pre}}max(perr, pos) 868 | {{else if $.FailPass -}} 869 | if pos >= errPos { 870 | failure.Kids = append(failure.Kids, &peg.Fail{ 871 | Pos: int(pos), 872 | Want: 873 | {{- if $.Expr.Neg}}"!{"{{else}}"&{"{{end}}+ 874 | {{- quote $.Expr.Code.String}}+"}", 875 | }) 876 | } 877 | {{end -}} 878 | goto {{$.Fail}} 879 | } 880 | {{if (and $.ActionPass $.Node) -}} 881 | {{$.Node}} = "" 882 | {{end -}} 883 | ` 884 | 885 | var identTemplate = `// {{$.Expr.String}} 886 | {{$pre := $.Config.Prefix -}} 887 | {{- $name := $.Expr.Name.Ident -}} 888 | {{if $.AcceptsPass -}} 889 | if !{{$pre}}accept(parser, {{$pre}}{{$name}}Accepts, &pos, &perr) { 890 | goto {{$.Fail}} 891 | } 892 | {{else if $.NodePass -}} 893 | if !{{$pre}}node(parser, {{$pre}}{{$name}}Node, node, &pos) { 894 | goto {{$.Fail}} 895 | } 896 | {{else if $.FailPass -}} 897 | if !{{$pre}}fail(parser, {{$pre}}{{$name}}Fail, errPos, failure, &pos) { 898 | goto {{$.Fail}} 899 | } 900 | {{else if $.ActionPass -}} 901 | if p, n := {{$pre}}{{$name}}Action(parser, pos); n == nil { 902 | goto {{$.Fail}} 903 | } else { 904 | {{if (and $.ActionPass $.Node) -}} 905 | {{$.Node}} = *n 906 | {{end -}} 907 | pos = p 908 | } 909 | {{end -}} 910 | ` 911 | 912 | var literalTemplate = `// {{$.Expr.String}} 913 | {{$want := quote $.Expr.Text.String -}} 914 | {{- $n := len $.Expr.Text.String -}} 915 | if len(parser.text[pos:]) < {{$n}} || parser.text[pos:pos+{{$n}}] != {{$want}} { 916 | {{if $.AcceptsPass -}} 917 | {{- $pre := $.Config.Prefix -}} 918 | perr = {{$pre}}max(perr, pos) 919 | {{else if $.FailPass -}} 920 | if pos >= errPos { 921 | failure.Kids = append(failure.Kids, &peg.Fail{ 922 | Pos: int(pos), 923 | Want: {{quote $.Expr.String}}, 924 | }) 925 | } 926 | {{end -}} 927 | goto {{$.Fail}} 928 | } 929 | {{$pre := $.Config.Prefix -}} 930 | {{if $.NodePass -}} 931 | node.Kids = append(node.Kids, {{$pre}}leaf(parser, pos, pos + {{$n}})) 932 | {{else if (and $.ActionPass $.Node) -}} 933 | {{$.Node}} = parser.text[pos:pos+{{$n}}] 934 | {{end -}} 935 | {{if eq $n 1 -}} 936 | pos++ 937 | {{- else -}} 938 | pos += {{$n}} 939 | {{- end}} 940 | ` 941 | 942 | var anyTemplate = `// {{$.Expr.String}} 943 | {{$pre := $.Config.Prefix -}} 944 | {{- /* \uFFFD is utf8.RuneError */ -}} 945 | if r, w := {{$pre}}next(parser, pos); w == 0 || r == '\uFFFD' { 946 | {{if $.AcceptsPass -}} 947 | {{- $pre := $.Config.Prefix -}} 948 | perr = {{$pre}}max(perr, pos) 949 | {{else if $.FailPass -}} 950 | if pos >= errPos { 951 | failure.Kids = append(failure.Kids, &peg.Fail{ 952 | Pos: int(pos), 953 | Want: ".", 954 | }) 955 | } 956 | {{end -}} 957 | goto {{$.Fail}} 958 | } else { 959 | {{$pre := $.Config.Prefix -}} 960 | {{if $.NodePass -}} 961 | node.Kids = append(node.Kids, {{$pre}}leaf(parser, pos, pos + w)) 962 | {{else if (and $.ActionPass $.Node) -}} 963 | {{$.Node}} = parser.text[pos:pos+w] 964 | {{end -}} 965 | pos += w 966 | } 967 | ` 968 | 969 | // charClassCondition emits the if-condition for a character class, 970 | // assuming that r and w are the rune and its width respectively. 971 | var charClassCondition = ` 972 | {{- /* \uFFFD is utf8.RuneError */ -}} 973 | {{- if $.Expr.Neg -}}w == 0 || r == '\uFFFD' ||{{end}} 974 | {{- range $i, $span := $.Expr.Spans -}} 975 | {{- $first := index $span 0 -}} 976 | {{- $second := index $span 1 -}} 977 | {{- if $.Expr.Neg -}} 978 | {{- if gt $i 0 -}} || {{- end -}} 979 | {{- if eq $first $second -}} 980 | r == {{quoteRune $first}} 981 | {{- else -}} 982 | (r >= {{quoteRune $first}} && r <= {{quoteRune $second}}) 983 | {{- end -}} 984 | {{- else -}} 985 | {{- if gt $i 0}} && {{end -}} 986 | {{- if eq $first $second -}} 987 | r != {{quoteRune $first}} 988 | {{- else -}} 989 | (r < {{quoteRune $first}} || r > {{quoteRune $second}}) 990 | {{- end -}} 991 | {{- end -}} 992 | {{- end -}} 993 | ` 994 | 995 | var charClassTemplate = `// {{$.Expr.String}} 996 | {{$pre := $.Config.Prefix -}} 997 | if r, w := {{$pre}}next(parser, pos); 998 | {{template "charClassCondition" $}} { 999 | {{if $.AcceptsPass -}} 1000 | {{- $pre := $.Config.Prefix -}} 1001 | perr = {{$pre}}max(perr, pos) 1002 | {{else if $.FailPass -}} 1003 | if pos >= errPos { 1004 | failure.Kids = append(failure.Kids, &peg.Fail{ 1005 | Pos: int(pos), 1006 | Want: {{quote $.Expr.String}}, 1007 | }) 1008 | } 1009 | {{end -}} 1010 | goto {{$.Fail}} 1011 | } else { 1012 | {{$pre := $.Config.Prefix -}} 1013 | {{if $.NodePass -}} 1014 | {{$pre := $.Config.Prefix -}} 1015 | node.Kids = append(node.Kids, {{$pre}}leaf(parser, pos, pos + w)) 1016 | {{else if (and $.ActionPass $.Node) -}} 1017 | {{$.Node}} = parser.text[pos:pos+w] 1018 | {{end -}} 1019 | pos += w 1020 | } 1021 | ` 1022 | -------------------------------------------------------------------------------- /go.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package main 8 | 9 | import ( 10 | "go/ast" 11 | "go/parser" 12 | "go/printer" 13 | "go/scanner" 14 | "go/token" 15 | "strings" 16 | ) 17 | 18 | // ParseGoFile parses go function body statements, returning any syntax errors. 19 | // The errors contain location information starting from the given Loc. 20 | func ParseGoFile(loc Loc, code string) error { 21 | _, err := parser.ParseFile(token.NewFileSet(), loc.File, code, 0) 22 | if err == nil { 23 | return nil 24 | } 25 | 26 | el, ok := err.(scanner.ErrorList) 27 | if !ok { 28 | return err 29 | } 30 | p := el[0].Pos 31 | loc.Line += p.Line - 1 // -1 because p.Line is 1-based. 32 | if p.Line > 1 { 33 | loc.Col = 1 34 | } 35 | loc.Col += p.Column - 1 36 | return Err(loc, el[0].Msg) 37 | } 38 | 39 | // ParseGoBody parses go function body statements, returning any syntax errors. 40 | // The errors contain location information starting from the given Loc. 41 | func ParseGoBody(loc Loc, code string) (string, error) { 42 | code = "package main; func p() interface{} {\n" + code + "}" 43 | fset := token.NewFileSet() 44 | file, err := parser.ParseFile(fset, loc.File, code, 0) 45 | if err == nil { 46 | return inferType(loc, fset, file) 47 | } 48 | 49 | el, ok := err.(scanner.ErrorList) 50 | if !ok { 51 | return "", err 52 | } 53 | p := el[0].Pos 54 | loc.Line += p.Line - 2 // -2 because p.Line is 1-based and the func line. 55 | if p.Line > 2 { 56 | loc.Col = 1 57 | } 58 | loc.Col += p.Column - 1 59 | return "", Err(loc, el[0].Msg) 60 | } 61 | 62 | // inferType infers the type of a function by considering its first return statement. 63 | // If the returned expression is: 64 | // * a type conversion, the type is returned. 65 | // * a type assertion, the type is returned. 66 | // * a function literal, the type is returned. 67 | // * a composite literal, the type is returned. 68 | // * an &-composite literal, the type is returned. 69 | // * an int literal, int is returned. 70 | // * a float literal, float64 is returned. 71 | // * a character literal, rune is returned. 72 | // * a string literal, string is returned. 73 | // 74 | // If the file does not have exactly one top-level funciton, inferType panics. 75 | // If the function has no return statement, an error is returned. 76 | // If the return statement does not have exactly one returned value, an error is returned. 77 | // If the returned value is not an expression in the list above, an error is returned. 78 | func inferType(loc Loc, fset *token.FileSet, file *ast.File) (string, error) { 79 | var funcDecl *ast.FuncDecl 80 | for _, decl := range file.Decls { 81 | if d, ok := decl.(*ast.FuncDecl); ok { 82 | if funcDecl != nil { 83 | panic("multiple function declarations") 84 | } 85 | funcDecl = d 86 | } 87 | } 88 | if funcDecl == nil { 89 | panic("no function declarations") 90 | } 91 | 92 | var v findReturnVisitor 93 | ast.Walk(&v, funcDecl) 94 | if v.retStmt == nil { 95 | return "", Err(loc, "no return statement") 96 | } 97 | if len(v.retStmt.Results) != 1 { 98 | return "", Err(loc, "must return exactly one value") 99 | } 100 | 101 | var typ interface{} 102 | switch e := v.retStmt.Results[0].(type) { 103 | case *ast.CallExpr: 104 | if len(e.Args) != 1 { 105 | var s strings.Builder 106 | printer.Fprint(&s, fset, e) 107 | return "", Err(loc, "cannot infer type from a function call: "+s.String()) 108 | } 109 | typ = e.Fun 110 | case *ast.TypeAssertExpr: 111 | typ = e.Type 112 | case *ast.FuncLit: 113 | typ = e.Type 114 | case *ast.CompositeLit: 115 | typ = e.Type 116 | case *ast.BasicLit: 117 | switch e.Kind { 118 | case token.INT: 119 | return "int", nil 120 | case token.FLOAT: 121 | return "float64", nil 122 | case token.CHAR: 123 | return "rune", nil 124 | case token.STRING: 125 | return "string", nil 126 | } 127 | case *ast.UnaryExpr: 128 | lit, ok := e.X.(*ast.CompositeLit) 129 | if !ok || e.Op != token.AND { 130 | return "", Err(loc, "cannot infer type") 131 | } 132 | var s strings.Builder 133 | printer.Fprint(&s, fset, lit.Type) 134 | return "*" + s.String(), nil 135 | default: 136 | return "", Err(loc, "cannot infer type") 137 | } 138 | var s strings.Builder 139 | printer.Fprint(&s, fset, typ) 140 | return s.String(), nil 141 | } 142 | 143 | type findReturnVisitor struct { 144 | retStmt *ast.ReturnStmt 145 | } 146 | 147 | func (v *findReturnVisitor) Visit(n ast.Node) ast.Visitor { 148 | if r, ok := n.(*ast.ReturnStmt); ok { 149 | v.retStmt = r 150 | return nil 151 | } 152 | return v 153 | } 154 | 155 | // ParseGoExpr parses a go expression, returning any syntax errors. 156 | // The errors contain location information starting from the given Loc. 157 | func ParseGoExpr(loc Loc, code string) error { 158 | _, err := parser.ParseExprFrom(token.NewFileSet(), loc.File, code, 0) 159 | if err == nil { 160 | return nil 161 | } 162 | 163 | el, ok := err.(scanner.ErrorList) 164 | if !ok { 165 | return err 166 | } 167 | p := el[0].Pos 168 | loc.Line += p.Line - 1 // -1 because p.Line is 1-based. 169 | if p.Line > 1 { 170 | loc.Col = 1 171 | } 172 | loc.Col += p.Column - 1 173 | return Err(loc, el[0].Msg) 174 | } 175 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/eaburns/peggy 2 | 3 | go 1.13 4 | 5 | require github.com/eaburns/pretty v1.0.0 6 | -------------------------------------------------------------------------------- /go.sum: -------------------------------------------------------------------------------- 1 | github.com/eaburns/pretty v1.0.0 h1:00W1wrrtMXUSqLPN0txS8j7g9qFXy6nA5vZVqVQOo6w= 2 | github.com/eaburns/pretty v1.0.0/go.mod h1:retcK8A0KEgdmb0nuxhvyxixwCmEPO7SKlK0IJhjg8A= 3 | -------------------------------------------------------------------------------- /gok.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | # Copyright 2017 The Peggy Authors 3 | # 4 | # Use of this source code is governed by a BSD-style 5 | # license that can be found in the LICENSE file or at 6 | # https://developers.google.com/open-source/licenses/bsd. 7 | 8 | # 9 | # Verifies that go code passes go fmt, go vet, golint, and go test. 10 | # 11 | 12 | o=$(mktemp tmp.XXXXXXXXXX) 13 | 14 | fail() { 15 | echo Failed 16 | cat $o 17 | rm $o 18 | exit 1 19 | } 20 | 21 | trap fail INT TERM 22 | 23 | #echo Generating 24 | #go generate . || fail 25 | 26 | echo Formatting 27 | gofmt -l $(find . -name '*.go') > $o 2>&1 28 | test $(wc -l $o | awk '{ print $1 }') = "0" || fail 29 | 30 | echo Vetting 31 | go vet ./... > $o 2>&1 || fail 32 | 33 | echo Testing 34 | go test -test.timeout=60s ./... > $o 2>&1 || fail 35 | 36 | echo Linting 37 | golint ./... \ 38 | | grep -v 'receiver name peggyrcvr should be consistent'\ 39 | | grep -v 'const peggyEofCode should be peggyEOFCode'\ 40 | | egrep -v 'grammar.y.*ALL_CAPS'\ 41 | | egrep -v '(Begin|End|FullParenString|Type|CanFail|Walk).*should have comment or be unexported'\ 42 | | egrep -v 'GenAccept should have comment or'\ 43 | | egrep -v 'calc.go.*use underscores'\ 44 | | egrep -v 'calc.go.*const __ should be _'\ 45 | > $o 2>&1 46 | # Silly: diff the grepped golint output with empty. 47 | # If it's non-empty, error, otherwise succeed. 48 | e=$(mktemp tmp.XXXXXXXXXX) 49 | touch $e 50 | diff $o $e > /dev/null || { rm $e; fail; } 51 | 52 | rm $o $e 53 | -------------------------------------------------------------------------------- /grammar.go: -------------------------------------------------------------------------------- 1 | //line grammar.y:8 2 | package main 3 | 4 | import __yyfmt__ "fmt" 5 | 6 | //line grammar.y:8 7 | import "io" 8 | 9 | //line grammar.y:13 10 | type peggySymType struct { 11 | yys int 12 | text text 13 | cclass *CharClass 14 | loc Loc 15 | expr Expr 16 | action *Action 17 | rule Rule 18 | rules []Rule 19 | texts []Text 20 | name Name 21 | grammar Grammar 22 | } 23 | 24 | const _ERROR = 57346 25 | const _IDENT = 57347 26 | const _STRING = 57348 27 | const _CODE = 57349 28 | const _ARROW = 57350 29 | const _CHARCLASS = 57351 30 | 31 | var peggyToknames = [...]string{ 32 | "$end", 33 | "error", 34 | "$unk", 35 | "_ERROR", 36 | "_IDENT", 37 | "_STRING", 38 | "_CODE", 39 | "_ARROW", 40 | "_CHARCLASS", 41 | "'.'", 42 | "'*'", 43 | "'+'", 44 | "'?'", 45 | "':'", 46 | "'/'", 47 | "'!'", 48 | "'&'", 49 | "'('", 50 | "')'", 51 | "'^'", 52 | "'<'", 53 | "'>'", 54 | "','", 55 | "'\\n'", 56 | } 57 | var peggyStatenames = [...]string{} 58 | 59 | const peggyEofCode = 1 60 | const peggyErrCode = 2 61 | const peggyInitialStackSize = 16 62 | 63 | //line grammar.y:174 64 | 65 | // Parse parses a Peggy input file, and returns the Grammar. 66 | func Parse(in io.RuneScanner, fileName string) (*Grammar, error) { 67 | x := &lexer{ 68 | in: in, 69 | file: fileName, 70 | line: 1, 71 | } 72 | peggyParse(x) 73 | if x.err != nil { 74 | return nil, x.err 75 | } 76 | return &x.result, nil 77 | } 78 | 79 | //line yacctab:1 80 | var peggyExca = [...]int{ 81 | -1, 1, 82 | 1, -1, 83 | -2, 0, 84 | -1, 64, 85 | 19, 42, 86 | -2, 0, 87 | } 88 | 89 | const peggyPrivate = 57344 90 | 91 | const peggyLast = 118 92 | 93 | var peggyAct = [...]int{ 94 | 95 | 2, 31, 26, 27, 60, 68, 29, 4, 14, 42, 96 | 43, 18, 48, 69, 9, 44, 22, 21, 44, 18, 97 | 25, 3, 38, 41, 56, 10, 12, 4, 13, 15, 98 | 20, 24, 11, 49, 50, 46, 10, 54, 10, 7, 99 | 17, 15, 16, 1, 55, 57, 51, 52, 53, 58, 100 | 23, 59, 62, 19, 11, 63, 8, 64, 6, 45, 101 | 66, 65, 11, 39, 61, 67, 40, 37, 35, 34, 102 | 28, 5, 0, 33, 32, 36, 30, 39, 47, 0, 103 | 40, 37, 0, 0, 0, 0, 0, 33, 32, 36, 104 | 11, 39, 0, 0, 40, 37, 0, 0, 0, 0, 105 | 0, 33, 32, 36, 30, 39, 0, 0, 40, 37, 106 | 0, 0, 0, 0, 0, 33, 32, 36, 107 | } 108 | var peggyPact = [...]int{ 109 | 110 | -17, -1000, 49, -1000, -17, -1000, -17, -17, -1000, -1000, 111 | 34, -10, -1000, 27, -1000, 27, -17, 8, 26, -17, 112 | -1000, 99, -17, -13, -1000, -1000, 0, -1000, 71, -1000, 113 | -2, -1000, -17, -17, 35, -1000, -17, -1000, -1000, -1000, 114 | -1000, 99, -1000, 19, -17, -1000, -1000, -1000, -17, 57, 115 | 57, -1000, -1000, -1000, 99, 0, -1000, 99, 85, -1000, 116 | -1000, -1000, -1000, -1000, 3, -1000, -1000, -6, -1000, -1000, 117 | } 118 | var peggyPgo = [...]int{ 119 | 120 | 0, 71, 2, 3, 70, 6, 1, 69, 68, 59, 121 | 4, 58, 50, 14, 39, 22, 43, 0, 21, 122 | } 123 | var peggyR1 = [...]int{ 124 | 125 | 0, 16, 1, 1, 11, 14, 14, 14, 13, 13, 126 | 15, 15, 12, 12, 2, 2, 3, 3, 4, 4, 127 | 5, 5, 6, 6, 6, 7, 7, 7, 7, 8, 128 | 8, 8, 8, 8, 8, 8, 8, 10, 9, 18, 129 | 18, 17, 17, 130 | } 131 | var peggyR2 = [...]int{ 132 | 133 | 0, 2, 4, 2, 1, 3, 1, 0, 4, 5, 134 | 4, 1, 1, 3, 4, 1, 2, 1, 2, 1, 135 | 4, 1, 3, 3, 1, 2, 2, 2, 1, 5, 136 | 3, 3, 1, 1, 1, 1, 4, 1, 1, 2, 137 | 1, 1, 0, 138 | } 139 | var peggyChk = [...]int{ 140 | 141 | -1000, -16, -17, -18, 24, -1, -11, -14, 7, -13, 142 | -15, 5, -18, -18, -17, -18, 8, 6, 21, -14, 143 | -13, -17, 8, -12, 5, -17, -2, -3, -4, -5, 144 | 5, -6, 17, 16, -7, -8, 18, 10, -15, 6, 145 | 9, -17, 22, 23, 15, -9, -5, 7, 14, -17, 146 | -17, 11, 12, 13, -17, -2, 5, -17, -17, -6, 147 | -10, 7, -6, -10, -2, -3, -6, -17, 2, 19, 148 | } 149 | var peggyDef = [...]int{ 150 | 151 | 42, -2, 7, 41, 40, 1, 0, 42, 4, 6, 152 | 0, 11, 39, 7, 3, 41, 42, 0, 0, 42, 153 | 5, 0, 42, 0, 12, 2, 8, 15, 17, 19, 154 | 11, 21, 42, 42, 24, 28, 42, 32, 33, 34, 155 | 35, 0, 10, 0, 42, 16, 18, 38, 42, 0, 156 | 0, 25, 26, 27, 0, 9, 13, 0, 0, 22, 157 | 30, 37, 23, 31, -2, 14, 20, 0, 36, 29, 158 | } 159 | var peggyTok1 = [...]int{ 160 | 161 | 1, 3, 3, 3, 3, 3, 3, 3, 3, 3, 162 | 24, 3, 3, 3, 3, 3, 3, 3, 3, 3, 163 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 164 | 3, 3, 3, 16, 3, 3, 3, 3, 17, 3, 165 | 18, 19, 11, 12, 23, 3, 10, 15, 3, 3, 166 | 3, 3, 3, 3, 3, 3, 3, 3, 14, 3, 167 | 21, 3, 22, 13, 3, 3, 3, 3, 3, 3, 168 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 169 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 170 | 3, 3, 3, 3, 20, 171 | } 172 | var peggyTok2 = [...]int{ 173 | 174 | 2, 3, 4, 5, 6, 7, 8, 9, 175 | } 176 | var peggyTok3 = [...]int{ 177 | 0, 178 | } 179 | 180 | var peggyErrorMessages = [...]struct { 181 | state int 182 | token int 183 | msg string 184 | }{} 185 | 186 | //line yaccpar:1 187 | 188 | /* parser for yacc output */ 189 | 190 | var ( 191 | peggyDebug = 0 192 | peggyErrorVerbose = false 193 | ) 194 | 195 | type peggyLexer interface { 196 | Lex(lval *peggySymType) int 197 | Error(s string) 198 | } 199 | 200 | type peggyParser interface { 201 | Parse(peggyLexer) int 202 | Lookahead() int 203 | } 204 | 205 | type peggyParserImpl struct { 206 | lval peggySymType 207 | stack [peggyInitialStackSize]peggySymType 208 | char int 209 | } 210 | 211 | func (p *peggyParserImpl) Lookahead() int { 212 | return p.char 213 | } 214 | 215 | func peggyNewParser() peggyParser { 216 | return &peggyParserImpl{} 217 | } 218 | 219 | const peggyFlag = -1000 220 | 221 | func peggyTokname(c int) string { 222 | if c >= 1 && c-1 < len(peggyToknames) { 223 | if peggyToknames[c-1] != "" { 224 | return peggyToknames[c-1] 225 | } 226 | } 227 | return __yyfmt__.Sprintf("tok-%v", c) 228 | } 229 | 230 | func peggyStatname(s int) string { 231 | if s >= 0 && s < len(peggyStatenames) { 232 | if peggyStatenames[s] != "" { 233 | return peggyStatenames[s] 234 | } 235 | } 236 | return __yyfmt__.Sprintf("state-%v", s) 237 | } 238 | 239 | func peggyErrorMessage(state, lookAhead int) string { 240 | const TOKSTART = 4 241 | 242 | if !peggyErrorVerbose { 243 | return "syntax error" 244 | } 245 | 246 | for _, e := range peggyErrorMessages { 247 | if e.state == state && e.token == lookAhead { 248 | return "syntax error: " + e.msg 249 | } 250 | } 251 | 252 | res := "syntax error: unexpected " + peggyTokname(lookAhead) 253 | 254 | // To match Bison, suggest at most four expected tokens. 255 | expected := make([]int, 0, 4) 256 | 257 | // Look for shiftable tokens. 258 | base := peggyPact[state] 259 | for tok := TOKSTART; tok-1 < len(peggyToknames); tok++ { 260 | if n := base + tok; n >= 0 && n < peggyLast && peggyChk[peggyAct[n]] == tok { 261 | if len(expected) == cap(expected) { 262 | return res 263 | } 264 | expected = append(expected, tok) 265 | } 266 | } 267 | 268 | if peggyDef[state] == -2 { 269 | i := 0 270 | for peggyExca[i] != -1 || peggyExca[i+1] != state { 271 | i += 2 272 | } 273 | 274 | // Look for tokens that we accept or reduce. 275 | for i += 2; peggyExca[i] >= 0; i += 2 { 276 | tok := peggyExca[i] 277 | if tok < TOKSTART || peggyExca[i+1] == 0 { 278 | continue 279 | } 280 | if len(expected) == cap(expected) { 281 | return res 282 | } 283 | expected = append(expected, tok) 284 | } 285 | 286 | // If the default action is to accept or reduce, give up. 287 | if peggyExca[i+1] != 0 { 288 | return res 289 | } 290 | } 291 | 292 | for i, tok := range expected { 293 | if i == 0 { 294 | res += ", expecting " 295 | } else { 296 | res += " or " 297 | } 298 | res += peggyTokname(tok) 299 | } 300 | return res 301 | } 302 | 303 | func peggylex1(lex peggyLexer, lval *peggySymType) (char, token int) { 304 | token = 0 305 | char = lex.Lex(lval) 306 | if char <= 0 { 307 | token = peggyTok1[0] 308 | goto out 309 | } 310 | if char < len(peggyTok1) { 311 | token = peggyTok1[char] 312 | goto out 313 | } 314 | if char >= peggyPrivate { 315 | if char < peggyPrivate+len(peggyTok2) { 316 | token = peggyTok2[char-peggyPrivate] 317 | goto out 318 | } 319 | } 320 | for i := 0; i < len(peggyTok3); i += 2 { 321 | token = peggyTok3[i+0] 322 | if token == char { 323 | token = peggyTok3[i+1] 324 | goto out 325 | } 326 | } 327 | 328 | out: 329 | if token == 0 { 330 | token = peggyTok2[1] /* unknown char */ 331 | } 332 | if peggyDebug >= 3 { 333 | __yyfmt__.Printf("lex %s(%d)\n", peggyTokname(token), uint(char)) 334 | } 335 | return char, token 336 | } 337 | 338 | func peggyParse(peggylex peggyLexer) int { 339 | return peggyNewParser().Parse(peggylex) 340 | } 341 | 342 | func (peggyrcvr *peggyParserImpl) Parse(peggylex peggyLexer) int { 343 | var peggyn int 344 | var peggyVAL peggySymType 345 | var peggyDollar []peggySymType 346 | _ = peggyDollar // silence set and not used 347 | peggyS := peggyrcvr.stack[:] 348 | 349 | Nerrs := 0 /* number of errors */ 350 | Errflag := 0 /* error recovery flag */ 351 | peggystate := 0 352 | peggyrcvr.char = -1 353 | peggytoken := -1 // peggyrcvr.char translated into internal numbering 354 | defer func() { 355 | // Make sure we report no lookahead when not parsing. 356 | peggystate = -1 357 | peggyrcvr.char = -1 358 | peggytoken = -1 359 | }() 360 | peggyp := -1 361 | goto peggystack 362 | 363 | ret0: 364 | return 0 365 | 366 | ret1: 367 | return 1 368 | 369 | peggystack: 370 | /* put a state and value onto the stack */ 371 | if peggyDebug >= 4 { 372 | __yyfmt__.Printf("char %v in %v\n", peggyTokname(peggytoken), peggyStatname(peggystate)) 373 | } 374 | 375 | peggyp++ 376 | if peggyp >= len(peggyS) { 377 | nyys := make([]peggySymType, len(peggyS)*2) 378 | copy(nyys, peggyS) 379 | peggyS = nyys 380 | } 381 | peggyS[peggyp] = peggyVAL 382 | peggyS[peggyp].yys = peggystate 383 | 384 | peggynewstate: 385 | peggyn = peggyPact[peggystate] 386 | if peggyn <= peggyFlag { 387 | goto peggydefault /* simple state */ 388 | } 389 | if peggyrcvr.char < 0 { 390 | peggyrcvr.char, peggytoken = peggylex1(peggylex, &peggyrcvr.lval) 391 | } 392 | peggyn += peggytoken 393 | if peggyn < 0 || peggyn >= peggyLast { 394 | goto peggydefault 395 | } 396 | peggyn = peggyAct[peggyn] 397 | if peggyChk[peggyn] == peggytoken { /* valid shift */ 398 | peggyrcvr.char = -1 399 | peggytoken = -1 400 | peggyVAL = peggyrcvr.lval 401 | peggystate = peggyn 402 | if Errflag > 0 { 403 | Errflag-- 404 | } 405 | goto peggystack 406 | } 407 | 408 | peggydefault: 409 | /* default state action */ 410 | peggyn = peggyDef[peggystate] 411 | if peggyn == -2 { 412 | if peggyrcvr.char < 0 { 413 | peggyrcvr.char, peggytoken = peggylex1(peggylex, &peggyrcvr.lval) 414 | } 415 | 416 | /* look through exception table */ 417 | xi := 0 418 | for { 419 | if peggyExca[xi+0] == -1 && peggyExca[xi+1] == peggystate { 420 | break 421 | } 422 | xi += 2 423 | } 424 | for xi += 2; ; xi += 2 { 425 | peggyn = peggyExca[xi+0] 426 | if peggyn < 0 || peggyn == peggytoken { 427 | break 428 | } 429 | } 430 | peggyn = peggyExca[xi+1] 431 | if peggyn < 0 { 432 | goto ret0 433 | } 434 | } 435 | if peggyn == 0 { 436 | /* error ... attempt to resume parsing */ 437 | switch Errflag { 438 | case 0: /* brand new error */ 439 | peggylex.Error(peggyErrorMessage(peggystate, peggytoken)) 440 | Nerrs++ 441 | if peggyDebug >= 1 { 442 | __yyfmt__.Printf("%s", peggyStatname(peggystate)) 443 | __yyfmt__.Printf(" saw %s\n", peggyTokname(peggytoken)) 444 | } 445 | fallthrough 446 | 447 | case 1, 2: /* incompletely recovered error ... try again */ 448 | Errflag = 3 449 | 450 | /* find a state where "error" is a legal shift action */ 451 | for peggyp >= 0 { 452 | peggyn = peggyPact[peggyS[peggyp].yys] + peggyErrCode 453 | if peggyn >= 0 && peggyn < peggyLast { 454 | peggystate = peggyAct[peggyn] /* simulate a shift of "error" */ 455 | if peggyChk[peggystate] == peggyErrCode { 456 | goto peggystack 457 | } 458 | } 459 | 460 | /* the current p has no shift on "error", pop stack */ 461 | if peggyDebug >= 2 { 462 | __yyfmt__.Printf("error recovery pops state %d\n", peggyS[peggyp].yys) 463 | } 464 | peggyp-- 465 | } 466 | /* there is no state on the stack with an error shift ... abort */ 467 | goto ret1 468 | 469 | case 3: /* no shift yet; clobber input char */ 470 | if peggyDebug >= 2 { 471 | __yyfmt__.Printf("error recovery discards %s\n", peggyTokname(peggytoken)) 472 | } 473 | if peggytoken == peggyEofCode { 474 | goto ret1 475 | } 476 | peggyrcvr.char = -1 477 | peggytoken = -1 478 | goto peggynewstate /* try again in the same state */ 479 | } 480 | } 481 | 482 | /* reduction by production peggyn */ 483 | if peggyDebug >= 2 { 484 | __yyfmt__.Printf("reduce %v in:\n\t%v\n", peggyn, peggyStatname(peggystate)) 485 | } 486 | 487 | peggynt := peggyn 488 | peggypt := peggyp 489 | _ = peggypt // guard against "declared and not used" 490 | 491 | peggyp -= peggyR2[peggyn] 492 | // peggyp is now the index of $0. Perform the default action. Iff the 493 | // reduced production is ε, $1 is possibly out of range. 494 | if peggyp+1 >= len(peggyS) { 495 | nyys := make([]peggySymType, len(peggyS)*2) 496 | copy(nyys, peggyS) 497 | peggyS = nyys 498 | } 499 | peggyVAL = peggyS[peggyp+1] 500 | 501 | /* consult goto table to find next state */ 502 | peggyn = peggyR1[peggyn] 503 | peggyg := peggyPgo[peggyn] 504 | peggyj := peggyg + peggyS[peggyp].yys + 1 505 | 506 | if peggyj >= peggyLast { 507 | peggystate = peggyAct[peggyg] 508 | } else { 509 | peggystate = peggyAct[peggyj] 510 | if peggyChk[peggystate] != -peggyn { 511 | peggystate = peggyAct[peggyg] 512 | } 513 | } 514 | // dummy call; replaced with literal code 515 | switch peggynt { 516 | 517 | case 1: 518 | peggyDollar = peggyS[peggypt-2 : peggypt+1] 519 | //line grammar.y:43 520 | { 521 | peggylex.(*lexer).result = peggyDollar[2].grammar 522 | } 523 | case 2: 524 | peggyDollar = peggyS[peggypt-4 : peggypt+1] 525 | //line grammar.y:46 526 | { 527 | peggyVAL.grammar = Grammar{Prelude: peggyDollar[1].text, Rules: peggyDollar[3].rules} 528 | } 529 | case 3: 530 | peggyDollar = peggyS[peggypt-2 : peggypt+1] 531 | //line grammar.y:47 532 | { 533 | peggyVAL.grammar = Grammar{Rules: peggyDollar[1].rules} 534 | } 535 | case 4: 536 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 537 | //line grammar.y:51 538 | { 539 | loc := peggyDollar[1].text.Begin() 540 | loc.Col++ // skip the open {. 541 | err := ParseGoFile(loc, peggyDollar[1].text.String()) 542 | if err != nil { 543 | peggylex.(*lexer).err = err 544 | } 545 | peggyVAL.text = peggyDollar[1].text 546 | } 547 | case 5: 548 | peggyDollar = peggyS[peggypt-3 : peggypt+1] 549 | //line grammar.y:62 550 | { 551 | peggyVAL.rules = append(peggyDollar[1].rules, peggyDollar[3].rule) 552 | } 553 | case 6: 554 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 555 | //line grammar.y:63 556 | { 557 | peggyVAL.rules = []Rule{peggyDollar[1].rule} 558 | } 559 | case 7: 560 | peggyDollar = peggyS[peggypt-0 : peggypt+1] 561 | //line grammar.y:67 562 | { 563 | peggyVAL.rules = nil 564 | } 565 | case 8: 566 | peggyDollar = peggyS[peggypt-4 : peggypt+1] 567 | //line grammar.y:70 568 | { 569 | peggyVAL.rule = Rule{Name: peggyDollar[1].name, Expr: peggyDollar[4].expr} 570 | } 571 | case 9: 572 | peggyDollar = peggyS[peggypt-5 : peggypt+1] 573 | //line grammar.y:73 574 | { 575 | peggyVAL.rule = Rule{Name: peggyDollar[1].name, ErrorName: peggyDollar[2].text, Expr: peggyDollar[5].expr} 576 | } 577 | case 10: 578 | peggyDollar = peggyS[peggypt-4 : peggypt+1] 579 | //line grammar.y:78 580 | { 581 | peggyVAL.name = Name{Name: peggyDollar[1].text, Args: peggyDollar[3].texts} 582 | } 583 | case 11: 584 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 585 | //line grammar.y:79 586 | { 587 | peggyVAL.name = Name{Name: peggyDollar[1].text} 588 | } 589 | case 12: 590 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 591 | //line grammar.y:82 592 | { 593 | peggyVAL.texts = []Text{peggyDollar[1].text} 594 | } 595 | case 13: 596 | peggyDollar = peggyS[peggypt-3 : peggypt+1] 597 | //line grammar.y:83 598 | { 599 | peggyVAL.texts = append(peggyDollar[1].texts, peggyDollar[3].text) 600 | } 601 | case 14: 602 | peggyDollar = peggyS[peggypt-4 : peggypt+1] 603 | //line grammar.y:87 604 | { 605 | e, ok := peggyDollar[1].expr.(*Choice) 606 | if !ok { 607 | e = &Choice{Exprs: []Expr{peggyDollar[1].expr}} 608 | } 609 | e.Exprs = append(e.Exprs, peggyDollar[4].expr) 610 | peggyVAL.expr = e 611 | } 612 | case 15: 613 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 614 | //line grammar.y:95 615 | { 616 | peggyVAL.expr = peggyDollar[1].expr 617 | } 618 | case 16: 619 | peggyDollar = peggyS[peggypt-2 : peggypt+1] 620 | //line grammar.y:99 621 | { 622 | peggyDollar[2].action.Expr = peggyDollar[1].expr 623 | peggyVAL.expr = peggyDollar[2].action 624 | } 625 | case 17: 626 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 627 | //line grammar.y:103 628 | { 629 | peggyVAL.expr = peggyDollar[1].expr 630 | } 631 | case 18: 632 | peggyDollar = peggyS[peggypt-2 : peggypt+1] 633 | //line grammar.y:107 634 | { 635 | e, ok := peggyDollar[1].expr.(*Sequence) 636 | if !ok { 637 | e = &Sequence{Exprs: []Expr{peggyDollar[1].expr}} 638 | } 639 | e.Exprs = append(e.Exprs, peggyDollar[2].expr) 640 | peggyVAL.expr = e 641 | } 642 | case 19: 643 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 644 | //line grammar.y:115 645 | { 646 | peggyVAL.expr = peggyDollar[1].expr 647 | } 648 | case 20: 649 | peggyDollar = peggyS[peggypt-4 : peggypt+1] 650 | //line grammar.y:118 651 | { 652 | peggyVAL.expr = &LabelExpr{Label: peggyDollar[1].text, Expr: peggyDollar[4].expr} 653 | } 654 | case 21: 655 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 656 | //line grammar.y:119 657 | { 658 | peggyVAL.expr = peggyDollar[1].expr 659 | } 660 | case 22: 661 | peggyDollar = peggyS[peggypt-3 : peggypt+1] 662 | //line grammar.y:122 663 | { 664 | peggyVAL.expr = &PredExpr{Expr: peggyDollar[3].expr, Loc: peggyDollar[1].loc} 665 | } 666 | case 23: 667 | peggyDollar = peggyS[peggypt-3 : peggypt+1] 668 | //line grammar.y:123 669 | { 670 | peggyVAL.expr = &PredExpr{Neg: true, Expr: peggyDollar[3].expr, Loc: peggyDollar[1].loc} 671 | } 672 | case 24: 673 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 674 | //line grammar.y:124 675 | { 676 | peggyVAL.expr = peggyDollar[1].expr 677 | } 678 | case 25: 679 | peggyDollar = peggyS[peggypt-2 : peggypt+1] 680 | //line grammar.y:127 681 | { 682 | peggyVAL.expr = &RepExpr{Op: '*', Expr: peggyDollar[1].expr, Loc: peggyDollar[2].loc} 683 | } 684 | case 26: 685 | peggyDollar = peggyS[peggypt-2 : peggypt+1] 686 | //line grammar.y:128 687 | { 688 | peggyVAL.expr = &RepExpr{Op: '+', Expr: peggyDollar[1].expr, Loc: peggyDollar[2].loc} 689 | } 690 | case 27: 691 | peggyDollar = peggyS[peggypt-2 : peggypt+1] 692 | //line grammar.y:129 693 | { 694 | peggyVAL.expr = &OptExpr{Expr: peggyDollar[1].expr, Loc: peggyDollar[2].loc} 695 | } 696 | case 28: 697 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 698 | //line grammar.y:130 699 | { 700 | peggyVAL.expr = peggyDollar[1].expr 701 | } 702 | case 29: 703 | peggyDollar = peggyS[peggypt-5 : peggypt+1] 704 | //line grammar.y:133 705 | { 706 | peggyVAL.expr = &SubExpr{Expr: peggyDollar[3].expr, Open: peggyDollar[1].loc, Close: peggyDollar[5].loc} 707 | } 708 | case 30: 709 | peggyDollar = peggyS[peggypt-3 : peggypt+1] 710 | //line grammar.y:134 711 | { 712 | peggyVAL.expr = &PredCode{Code: peggyDollar[3].text, Loc: peggyDollar[1].loc} 713 | } 714 | case 31: 715 | peggyDollar = peggyS[peggypt-3 : peggypt+1] 716 | //line grammar.y:135 717 | { 718 | peggyVAL.expr = &PredCode{Neg: true, Code: peggyDollar[3].text, Loc: peggyDollar[1].loc} 719 | } 720 | case 32: 721 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 722 | //line grammar.y:136 723 | { 724 | peggyVAL.expr = &Any{Loc: peggyDollar[1].loc} 725 | } 726 | case 33: 727 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 728 | //line grammar.y:137 729 | { 730 | peggyVAL.expr = &Ident{Name: peggyDollar[1].name} 731 | } 732 | case 34: 733 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 734 | //line grammar.y:138 735 | { 736 | peggyVAL.expr = &Literal{Text: peggyDollar[1].text} 737 | } 738 | case 35: 739 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 740 | //line grammar.y:139 741 | { 742 | peggyVAL.expr = peggyDollar[1].cclass 743 | } 744 | case 36: 745 | peggyDollar = peggyS[peggypt-4 : peggypt+1] 746 | //line grammar.y:140 747 | { 748 | peggylex.Error("unexpected end of file") 749 | } 750 | case 37: 751 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 752 | //line grammar.y:144 753 | { 754 | loc := peggyDollar[1].text.Begin() 755 | loc.Col++ // skip the open {. 756 | err := ParseGoExpr(loc, peggyDollar[1].text.String()) 757 | if err != nil { 758 | peggylex.(*lexer).err = err 759 | } 760 | peggyVAL.text = peggyDollar[1].text 761 | } 762 | case 38: 763 | peggyDollar = peggyS[peggypt-1 : peggypt+1] 764 | //line grammar.y:156 765 | { 766 | loc := peggyDollar[1].text.Begin() 767 | loc.Col++ // skip the open {. 768 | typ, err := ParseGoBody(loc, peggyDollar[1].text.String()) 769 | if err != nil { 770 | peggylex.(*lexer).err = err 771 | } 772 | peggyVAL.action = &Action{Code: peggyDollar[1].text, ReturnType: typ} 773 | } 774 | } 775 | goto peggystack /* stack new state and value */ 776 | } 777 | -------------------------------------------------------------------------------- /grammar.y: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | %{ 8 | package main 9 | 10 | import "io" 11 | %} 12 | 13 | %union{ 14 | text text 15 | cclass *CharClass 16 | loc Loc 17 | expr Expr 18 | action *Action 19 | rule Rule 20 | rules []Rule 21 | texts []Text 22 | name Name 23 | grammar Grammar 24 | } 25 | 26 | %type Grammar 27 | %type Expr, ActExpr, SeqExpr, LabelExpr, PredExpr, RepExpr, Operand 28 | %type GoAction 29 | %type GoPred Prelude 30 | %type Args 31 | %type Rule 32 | %type Rules 33 | %type Name 34 | 35 | %token _ERROR 36 | %token _IDENT _STRING _CODE _ARROW 37 | %token _CHARCLASS 38 | %token '.', '*', '+', '?', ':', '/', '!', '&', '(', ')', '^', '<', '>', ',' 39 | 40 | %% 41 | 42 | Top: 43 | Nl Grammar { peggylex.(*lexer).result = $2 } 44 | 45 | Grammar: 46 | Prelude NewLine Rules Nl { $$ = Grammar{ Prelude: $1, Rules: $3 } } 47 | | Rules Nl { $$ = Grammar{ Rules: $1 } } 48 | 49 | Prelude: 50 | _CODE 51 | { 52 | loc := $1.Begin() 53 | loc.Col++ // skip the open {. 54 | err := ParseGoFile(loc, $1.String()) 55 | if err != nil { 56 | peggylex.(*lexer).err = err 57 | } 58 | $$ = $1 59 | } 60 | 61 | Rules: 62 | Rules NewLine Rule { $$ = append($1, $3) } 63 | | Rule { $$ = []Rule{ $1 } } 64 | // The following production adds a shift/reduce conflict: 65 | // reduce the empty string or shift into a Rule? 66 | // Yacc always prefers shift in the case of both, which is the desired behavior. 67 | | { $$ = nil } 68 | 69 | Rule: 70 | Name _ARROW Nl Expr { 71 | $$ = Rule{ Name: $1, Expr: $4 } 72 | } 73 | | Name _STRING _ARROW Nl Expr { 74 | $$ = Rule{ Name: $1, ErrorName: $2, Expr: $5 } 75 | } 76 | 77 | Name: 78 | _IDENT '<' Args '>' { $$ = Name{ Name: $1, Args: $3 } } 79 | | _IDENT { $$ = Name{ Name: $1 } } 80 | 81 | Args: 82 | _IDENT { $$ = []Text{$1} } 83 | | Args ',' _IDENT { $$ = append($1, $3) } 84 | 85 | Expr: 86 | Expr '/' Nl ActExpr 87 | { 88 | e, ok := $1.(*Choice) 89 | if !ok { 90 | e = &Choice{ Exprs: []Expr{$1} } 91 | } 92 | e.Exprs = append(e.Exprs, $4) 93 | $$ = e 94 | } 95 | | ActExpr { $$ = $1 } 96 | 97 | ActExpr: 98 | SeqExpr GoAction 99 | { 100 | $2.Expr = $1 101 | $$ = $2 102 | } 103 | | SeqExpr { $$ = $1 } 104 | 105 | SeqExpr: 106 | SeqExpr LabelExpr 107 | { 108 | e, ok := $1.(*Sequence) 109 | if !ok { 110 | e = &Sequence{ Exprs: []Expr{$1} } 111 | } 112 | e.Exprs = append(e.Exprs, $2) 113 | $$ = e 114 | } 115 | | LabelExpr { $$ = $1 } 116 | 117 | LabelExpr: 118 | _IDENT ':' Nl PredExpr { $$ = &LabelExpr{ Label: $1, Expr: $4 } } 119 | | PredExpr { $$ = $1 } 120 | 121 | PredExpr: 122 | '&' Nl PredExpr { $$ = &PredExpr{ Expr: $3, Loc: $1 } } 123 | | '!' Nl PredExpr { $$ = &PredExpr{ Neg: true, Expr: $3, Loc: $1 } } 124 | | RepExpr { $$ = $1 } 125 | 126 | RepExpr: 127 | RepExpr '*' { $$ = &RepExpr{ Op: '*', Expr: $1, Loc: $2 } } 128 | | RepExpr '+' { $$ = &RepExpr{ Op: '+', Expr: $1, Loc: $2 } } 129 | | RepExpr '?' { $$ = &OptExpr{ Expr: $1, Loc: $2 } } 130 | | Operand { $$ = $1 } 131 | 132 | Operand: 133 | '(' Nl Expr Nl ')' { $$ = &SubExpr{ Expr: $3, Open: $1, Close: $5 } } 134 | | '&' Nl GoPred { $$ = &PredCode{ Code: $3, Loc: $1 } } 135 | | '!' Nl GoPred { $$ = &PredCode{ Neg: true, Code: $3, Loc: $1 } } 136 | | '.' { $$ = &Any{ Loc: $1 } } 137 | | Name { $$ = &Ident{ Name: $1 } } 138 | | _STRING { $$ = &Literal{ Text: $1 } } 139 | | _CHARCLASS { $$ =$1 } 140 | | '(' Nl Expr error { peggylex.Error("unexpected end of file") } 141 | 142 | GoPred: 143 | _CODE 144 | { 145 | loc := $1.Begin() 146 | loc.Col++ // skip the open {. 147 | err := ParseGoExpr(loc, $1.String()) 148 | if err != nil { 149 | peggylex.(*lexer).err = err 150 | } 151 | $$ = $1 152 | } 153 | 154 | GoAction: 155 | _CODE 156 | { 157 | loc := $1.Begin() 158 | loc.Col++ // skip the open {. 159 | typ, err := ParseGoBody(loc, $1.String()) 160 | if err != nil { 161 | peggylex.(*lexer).err = err 162 | } 163 | $$ = &Action{ Code: $1, ReturnType: typ } 164 | } 165 | 166 | NewLine: 167 | '\n' NewLine 168 | | '\n' 169 | 170 | Nl: 171 | NewLine 172 | | 173 | 174 | %% 175 | 176 | // Parse parses a Peggy input file, and returns the Grammar. 177 | func Parse(in io.RuneScanner, fileName string) (*Grammar, error) { 178 | x := &lexer{ 179 | in: in, 180 | file: fileName, 181 | line: 1, 182 | } 183 | peggyParse(x) 184 | if x.err != nil { 185 | return nil, x.err 186 | } 187 | return &x.result, nil 188 | } 189 | -------------------------------------------------------------------------------- /lex.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package main 8 | 9 | import ( 10 | "errors" 11 | "fmt" 12 | "io" 13 | "unicode" 14 | ) 15 | 16 | const eof = -1 17 | 18 | type text struct { 19 | str string 20 | begin, end Loc 21 | } 22 | 23 | func (t text) PrettyPrint() string { 24 | return fmt.Sprintf(`Text{%d:%d-%d:%d: "%s"}`, 25 | t.begin.Line, t.begin.Col, 26 | t.end.Line, t.end.Col, 27 | t.str) 28 | } 29 | 30 | func (t text) String() string { return t.str } 31 | func (t text) Begin() Loc { return t.begin } 32 | func (t text) End() Loc { return t.end } 33 | 34 | type lexer struct { 35 | in io.RuneScanner 36 | file string 37 | n, line, lineStart, prevLineStart int 38 | eof bool 39 | 40 | // prevBegin is the beginning of the most-recently scanned token. 41 | // prevEnd is the end of the most-recently scanned token. 42 | // These are used for error reporting. 43 | prevBegin, prevEnd Loc 44 | 45 | // err is non-nil if there was an error during parsing. 46 | err error 47 | // result contains the Grammar resulting from a successful parse. 48 | result Grammar 49 | } 50 | 51 | // Begin returns the begin location of the last returned token. 52 | func (x *lexer) Begin() Loc { return x.prevBegin } 53 | 54 | // End returns the end location of the last returned token. 55 | func (x *lexer) End() Loc { return x.prevEnd } 56 | 57 | func (x *lexer) loc() Loc { 58 | return Loc{ 59 | File: x.file, 60 | Line: x.line, 61 | Col: x.n - x.lineStart + 1, 62 | } 63 | } 64 | 65 | func (x *lexer) next() (rune, error) { 66 | if x.eof { 67 | return eof, nil 68 | } 69 | r, _, err := x.in.ReadRune() 70 | if err == io.EOF { 71 | x.eof = true 72 | return eof, nil 73 | } 74 | x.n++ 75 | if r == '\n' { 76 | x.prevLineStart = x.lineStart 77 | x.lineStart = x.n 78 | x.line++ 79 | } 80 | return r, err 81 | } 82 | 83 | func (x *lexer) back() error { 84 | if x.eof { 85 | return nil 86 | } 87 | if x.lineStart == x.n { 88 | x.lineStart = x.prevLineStart 89 | x.line-- 90 | } 91 | x.n-- 92 | return x.in.UnreadRune() 93 | } 94 | 95 | func (x *lexer) Error(s string) { 96 | if x.err != nil { 97 | return 98 | } 99 | x.err = Err(x, s) 100 | } 101 | 102 | func (x *lexer) Lex(lval *peggySymType) (v int) { 103 | defer func() { x.prevEnd = x.loc() }() 104 | for { 105 | x.prevBegin = x.loc() 106 | lval.text.begin = x.loc() 107 | lval.loc = x.loc() 108 | r, err := x.next() 109 | 110 | switch { 111 | case err != nil: 112 | break 113 | 114 | case r == '#': 115 | if err = comment(x); err != nil { 116 | break 117 | } 118 | return '\n' 119 | 120 | case unicode.IsLetter(r) || r == '_': 121 | if lval.text.str, err = ident(x); err != nil { 122 | break 123 | } 124 | lval.text.str = string([]rune{r}) + lval.text.str 125 | lval.text.end = x.loc() 126 | return _IDENT 127 | 128 | case r == '<': 129 | b := x.loc() 130 | if r, err = x.next(); err != nil { 131 | break 132 | } 133 | lval.text.str = string([]rune{'<', r}) 134 | lval.text.end = x.loc() 135 | if r != '-' { 136 | x.back() 137 | x.prevBegin = b 138 | return int('<') 139 | } 140 | return _ARROW 141 | 142 | case r == '{': 143 | if lval.text.str, err = code(x); err != nil { 144 | break 145 | } 146 | lval.text.end = x.loc() 147 | return _CODE 148 | 149 | case r == '[': 150 | if err = x.back(); err != nil { 151 | break 152 | } 153 | if lval.cclass, err = charClass(x); err != nil { 154 | x.err = err 155 | return _ERROR 156 | } 157 | return _CHARCLASS 158 | 159 | case r == '\'' || r == '"': 160 | if lval.text.str, err = delimited(x, r); err != nil { 161 | break 162 | } 163 | lval.text.end = x.loc() 164 | return _STRING 165 | 166 | case unicode.IsSpace(r) && r != '\n': 167 | continue 168 | 169 | default: 170 | return int(r) 171 | } 172 | x.prevEnd = x.loc() 173 | x.Error(err.Error()) 174 | return _ERROR 175 | } 176 | } 177 | 178 | func delimited(x *lexer, d rune) (string, error) { 179 | var rs []rune 180 | for { 181 | r, esc, err := x.nextUnesc(d) 182 | switch { 183 | case err != nil: 184 | return "", err 185 | case r == eof: 186 | return "", errors.New("unclosed " + string([]rune{d})) 187 | case r == d && !esc: 188 | return string(rs), nil 189 | } 190 | rs = append(rs, r) 191 | } 192 | } 193 | 194 | func ident(x *lexer) (string, error) { 195 | var rs []rune 196 | for { 197 | r, err := x.next() 198 | if err != nil { 199 | return "", err 200 | } 201 | if !isIdentRune(r) { 202 | return string(rs), x.back() 203 | } 204 | rs = append(rs, r) 205 | } 206 | } 207 | 208 | func isIdentRune(r rune) bool { 209 | return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_' 210 | } 211 | 212 | func code(x *lexer) (string, error) { 213 | var rs []rune 214 | var n int 215 | for { 216 | r, err := x.next() 217 | if err != nil { 218 | return "", err 219 | } 220 | if r == eof { 221 | return "", errors.New("unclosed {") 222 | } 223 | if r == '{' { 224 | n++ 225 | } 226 | if r == '}' { 227 | if n == 0 { 228 | break 229 | } 230 | n-- 231 | } 232 | rs = append(rs, r) 233 | } 234 | return string(rs), nil 235 | } 236 | 237 | func comment(x *lexer) error { 238 | for { 239 | r, err := x.next() 240 | if err != nil { 241 | return err 242 | } 243 | if r == '\n' || r == eof { 244 | return nil 245 | } 246 | } 247 | } 248 | 249 | func charClass(x *lexer) (*CharClass, error) { 250 | c := &CharClass{Open: x.loc()} 251 | if r, err := x.next(); err != nil { 252 | return nil, Err(c.Open, err.Error()) 253 | } else if r != '[' { 254 | panic("impossible, no [") 255 | } 256 | 257 | var prev rune 258 | var hasPrev, span bool 259 | 260 | // last is the Loc just before last read rune. 261 | var last Loc 262 | 263 | // spanLoc is the location of the current span. 264 | // (We use type text to borrow that it implements Located. 265 | // However we ignore the str field.) 266 | var spanLoc text 267 | loop: 268 | for { 269 | last = x.loc() 270 | if !span && !hasPrev { 271 | spanLoc.begin = x.loc() 272 | } 273 | r, esc, err := x.nextUnesc(']') 274 | switch { 275 | case err != nil: 276 | return nil, err 277 | 278 | case r == eof: 279 | c.Close = x.loc() 280 | return nil, Err(c, "unclosed [") 281 | 282 | case r == ']' && !esc: 283 | c.Close = x.loc() 284 | break loop 285 | 286 | case span: 287 | spanLoc.end = x.loc() 288 | if !hasPrev { 289 | return nil, Err(spanLoc, "bad span") 290 | } 291 | if prev >= r { 292 | return nil, Err(spanLoc, "bad span") 293 | } 294 | c.Spans = append(c.Spans, [2]rune{prev, r}) 295 | hasPrev, span = false, false 296 | spanLoc.begin = spanLoc.end 297 | 298 | case r == '-' && !esc: 299 | span = true 300 | 301 | default: 302 | if r == '^' && !esc && !c.Neg && len(c.Spans) == 0 && !hasPrev { 303 | c.Neg = true 304 | continue 305 | } 306 | if hasPrev { 307 | c.Spans = append(c.Spans, [2]rune{prev, prev}) 308 | spanLoc.begin = last // in case current rune starts a span. 309 | } 310 | prev, hasPrev = r, true 311 | } 312 | } 313 | if span { 314 | spanLoc.end = last // just before closing ] 315 | return nil, Err(spanLoc, "bad span") 316 | } 317 | if hasPrev { 318 | c.Spans = append(c.Spans, [2]rune{prev, prev}) 319 | } 320 | if len(c.Spans) == 0 { 321 | return nil, Err(c, "bad char class: empty") 322 | } 323 | return c, nil 324 | } 325 | 326 | var errUnknownEsc = errors.New("unknown escape sequence") 327 | 328 | // Like next, but unescapes an escapes a rune according to Go's unescaping rules. 329 | // The second return value is whether the rune was escaped. 330 | func (x *lexer) nextUnesc(delim rune) (rune, bool, error) { 331 | switch r, err := x.next(); { 332 | case err != nil: 333 | return 0, false, err 334 | case r == delim: 335 | return r, false, nil 336 | case r == '\\': 337 | r, err = x.next() 338 | if err != nil { 339 | return 0, true, err 340 | } 341 | switch r { 342 | case eof: 343 | return eof, true, nil 344 | case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\': 345 | switch r { 346 | case 'a': 347 | r = '\a' 348 | case 'b': 349 | r = '\b' 350 | case 'f': 351 | r = '\f' 352 | case 'n': 353 | r = '\n' 354 | case 'r': 355 | r = '\r' 356 | case 't': 357 | r = '\t' 358 | case 'v': 359 | r = '\v' 360 | case '\\': 361 | r = '\\' 362 | } 363 | return r, true, nil 364 | case '0', '1', '2', '3', '4', '5', '6', '7': 365 | v, _ := oct(r) 366 | for i := 1; i < 3; i++ { 367 | r, err := x.next() 368 | if err != nil { 369 | return 0, false, err 370 | } 371 | d, ok := oct(r) 372 | if !ok { 373 | return 0, false, errUnknownEsc 374 | } 375 | v = (v << 3) | d 376 | } 377 | if v > 255 { 378 | return 0, false, errors.New("octal escape >255") 379 | } 380 | return v, true, nil 381 | case 'x', 'u', 'U': 382 | var n int 383 | switch r { 384 | case 'x': 385 | n = 2 386 | case 'u': 387 | n = 4 388 | case 'U': 389 | n = 8 390 | } 391 | var v int32 392 | for i := 0; i < n; i++ { 393 | r, err := x.next() 394 | if err != nil { 395 | return 0, false, err 396 | } 397 | d, ok := hex(r) 398 | if !ok { 399 | return 0, false, errUnknownEsc 400 | } 401 | v = (v << 4) | d 402 | } 403 | // TODO: surrogate halves are also illegal — whatever that is. 404 | if v > 0x10FFFF { 405 | return 0, false, errors.New("hex escape >0x10FFFF") 406 | } 407 | return v, true, nil 408 | default: 409 | if r == delim { 410 | return r, true, nil 411 | } 412 | // For character classes, allow \- as - and \^ as ^. 413 | if delim == ']' && (r == '-' || r == '^') { 414 | return r, true, nil 415 | } 416 | return 0, false, errUnknownEsc 417 | } 418 | default: 419 | return r, false, nil 420 | } 421 | } 422 | 423 | func oct(r rune) (int32, bool) { 424 | if '0' <= r && r <= '7' { 425 | return int32(r) - '0', true 426 | } 427 | return 0, false 428 | } 429 | 430 | func hex(r rune) (int32, bool) { 431 | if '0' <= r && r <= '9' { 432 | return int32(r) - '0', true 433 | } 434 | if 'a' <= r && r <= 'f' { 435 | return int32(r) - 'a' + 10, true 436 | } 437 | if 'A' <= r && r <= 'F' { 438 | return int32(r) - 'A' + 10, true 439 | } 440 | return 0, false 441 | } 442 | -------------------------------------------------------------------------------- /main.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package main 8 | 9 | import ( 10 | "bufio" 11 | "flag" 12 | "fmt" 13 | "io" 14 | "os" 15 | ) 16 | 17 | //go:generate goyacc -o grammar.go -p "peggy" grammar.y 18 | 19 | var ( 20 | out = flag.String("o", "", "output file path") 21 | prefix = flag.String("p", "_", "identifier prefix") 22 | genActions = flag.Bool("a", true, "generate action parsing") 23 | genParseTree = flag.Bool("t", true, "generate parse tree parsing") 24 | prettyPrint = flag.Bool("pretty", false, "don't check or generate, write the grammar without labels or actions") 25 | ) 26 | 27 | func main() { 28 | flag.Parse() 29 | args := flag.Args() 30 | 31 | in := bufio.NewReader(os.Stdin) 32 | file := "" 33 | if len(args) > 0 { 34 | f, err := os.Open(args[0]) 35 | if err != nil { 36 | fmt.Println(err) 37 | os.Exit(1) 38 | } 39 | in = bufio.NewReader(f) 40 | file = args[0] 41 | } 42 | 43 | g, err := Parse(in, file) 44 | if err != nil { 45 | fmt.Println(err) 46 | os.Exit(1) 47 | } 48 | 49 | var w io.Writer = os.Stdout 50 | if *out != "" { 51 | f, err := os.Create(*out) 52 | if err != nil { 53 | fmt.Println(err) 54 | os.Exit(1) 55 | } 56 | defer func() { 57 | if err := f.Close(); err != nil { 58 | fmt.Println(err) 59 | } 60 | }() 61 | w = f 62 | } 63 | if *prettyPrint { 64 | for i := range g.Rules { 65 | r := &g.Rules[i] 66 | if _, err := io.WriteString(w, r.String()+"\n"); err != nil { 67 | fmt.Println(err) 68 | os.Exit(1) 69 | } 70 | } 71 | os.Exit(0) 72 | } 73 | if err := Check(g); err != nil { 74 | fmt.Println(err) 75 | os.Exit(1) 76 | } 77 | 78 | cfg := Config{Prefix: *prefix} 79 | if err := cfg.Generate(w, file, g); err != nil { 80 | fmt.Println(err) 81 | os.Exit(1) 82 | } 83 | } 84 | -------------------------------------------------------------------------------- /parse_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package main 8 | 9 | import ( 10 | "errors" 11 | "io" 12 | "regexp" 13 | "strings" 14 | "testing" 15 | 16 | "github.com/eaburns/pretty" 17 | ) 18 | 19 | // A ParserTest is a Peggy input-file parser test 20 | // with a given input and expected string formats. 21 | type ParserTest struct { 22 | Name string 23 | Input string 24 | // FullString is the expected fully parenthesized string. 25 | FullString string 26 | // String is the expected regular String string. 27 | // This is the same as Input, but without 28 | // comments and unnecessary whitespace, 29 | // except for a single space, " ", 30 | // separating sub-exprsessions of a sequence, 31 | // and on either side of <-. 32 | String string 33 | // Prelude is the expected file prelude text. 34 | Prelude string 35 | // Error is a regexp string that matches an expected parse error. 36 | Error string 37 | } 38 | 39 | // ParseTests is a set of tests matching 40 | // FullString and String outputs with expected outputs for successful parses, 41 | // and expected parse errors for failed parses. 42 | // If Input contains a ☹ rune, the io.RuneScanner returns an error on that rune. 43 | var ParseTests = []ParserTest{ 44 | { 45 | Name: "empty", 46 | Input: "", 47 | FullString: "", 48 | String: "", 49 | }, 50 | { 51 | Name: "only whitespace", 52 | Input: " \n\n\t ", 53 | FullString: "", 54 | String: "", 55 | }, 56 | { 57 | Name: "simple rule", 58 | Input: "A <- B", 59 | FullString: "A <- (B)", 60 | String: "A <- B", 61 | }, 62 | { 63 | Name: "named rule", 64 | Input: `A "name" <- B`, 65 | FullString: `A "name" <- (B)`, 66 | String: `A "name" <- B`, 67 | }, 68 | { 69 | Name: "named rule, single quotes", 70 | Input: `A 'name' <- B`, 71 | FullString: `A "name" <- (B)`, 72 | String: `A "name" <- B`, 73 | }, 74 | { 75 | Name: "named rule, empty name", 76 | Input: `A "" <- B`, 77 | FullString: `A "" <- (B)`, 78 | String: `A "" <- B`, 79 | }, 80 | { 81 | Name: "named rule, escapes", 82 | Input: `A "\t\nabc" <- B`, 83 | FullString: `A "\t\nabc" <- (B)`, 84 | String: `A "\t\nabc" <- B`, 85 | }, 86 | { 87 | Name: "prelude and simple rule", 88 | Input: `{ 89 | package main 90 | 91 | import "fmt" 92 | 93 | func main() { fmt.Println("Hello, World") } 94 | } 95 | A <- B`, 96 | FullString: "A <- (B)", 97 | String: "A <- B", 98 | Prelude: ` 99 | package main 100 | 101 | import "fmt" 102 | 103 | func main() { fmt.Println("Hello, World") } 104 | `, 105 | }, 106 | { 107 | Name: "multiple simple rules", 108 | Input: "A <- B\nC <- D", 109 | FullString: "A <- (B)\nC <- (D)", 110 | String: "A <- B\nC <- D", 111 | }, 112 | { 113 | Name: "multiple simple rules", 114 | Input: "A <- B\nC <- D", 115 | FullString: "A <- (B)\nC <- (D)", 116 | String: "A <- B\nC <- D", 117 | }, 118 | { 119 | Name: "whitespace", 120 | Input: "\tA <- B\n \n\n C <- D\t ", 121 | FullString: "A <- (B)\nC <- (D)", 122 | String: "A <- B\nC <- D", 123 | }, 124 | { 125 | Name: "comments", 126 | Input: "# comment\nA <- B # comment\n# comment", 127 | FullString: "A <- (B)", 128 | String: "A <- B", 129 | }, 130 | 131 | // Operands. 132 | { 133 | Name: "& pred code", 134 | Input: "A <- &{pred}", 135 | FullString: "A <- (&{pred})", 136 | String: "A <- &{…}", 137 | }, 138 | { 139 | Name: "! pred code", 140 | Input: "A <- !{pred}", 141 | FullString: "A <- (!{pred})", 142 | String: "A <- !{…}", 143 | }, 144 | { 145 | Name: "any", 146 | Input: "A <- .", 147 | FullString: "A <- (.)", 148 | String: "A <- .", 149 | }, 150 | { 151 | Name: "identifier", 152 | Input: "A <- BCD", 153 | FullString: "A <- (BCD)", 154 | String: "A <- BCD", 155 | }, 156 | { 157 | Name: "non-ASCII identifier", 158 | Input: "Â <- _αβξ", 159 | FullString: "Â <- (_αβξ)", 160 | String: "Â <- _αβξ", 161 | }, 162 | { 163 | Name: "double-quote string", 164 | Input: `A <- "BCD☺"`, 165 | FullString: `A <- ("BCD☺")`, 166 | String: `A <- "BCD☺"`, 167 | }, 168 | { 169 | Name: "single-quote string", 170 | Input: `A <- 'BCD☺'`, 171 | FullString: `A <- ("BCD☺")`, 172 | String: `A <- "BCD☺"`, 173 | }, 174 | { 175 | Name: "character class", 176 | Input: `A <- [abc\nxyzαβξ1-9A-Z\-]`, 177 | FullString: `A <- ([abc\nxyzαβξ1-9A-Z\-])`, 178 | String: `A <- [abc\nxyzαβξ1-9A-Z\-]`, 179 | }, 180 | { 181 | Name: "^ character class", 182 | Input: `A <- [^^abc\nxyzαβξ]`, 183 | FullString: `A <- ([^\^abc\nxyzαβξ])`, 184 | String: `A <- [^\^abc\nxyzαβξ]`, 185 | }, 186 | { 187 | Name: "character class, delimiters", 188 | Input: `A <- [[\]]`, 189 | FullString: `A <- ([[\]])`, 190 | String: `A <- [[\]]`, 191 | }, 192 | { 193 | // ^ should only negate the class if it's at the beginning 194 | Name: "character class, non-first^", 195 | Input: `A <- [abc^]`, 196 | FullString: `A <- ([abc\^])`, 197 | String: `A <- [abc\^]`, 198 | }, 199 | { 200 | Name: "character class, escaping", 201 | Input: `A <- [\a] [\b] [\f] [\n] [\r] [\t] [\v] [\\] [\-] [\]] [\101] [\x41] [\u0041] [\U00000041] [\aa\b] [a\ab] [\^]`, 202 | FullString: `A <- ((((((((((((((((([\a]) ([\b])) ([\f])) ([\n])) ([\r])) ([\t])) ([\v])) ([\\])) ([\-])) ([\]])) ([A])) ([A])) ([A])) ([A])) ([\aa\b])) ([a\ab])) ([\^]))`, 203 | String: `A <- [\a] [\b] [\f] [\n] [\r] [\t] [\v] [\\] [\-] [\]] [A] [A] [A] [A] [\aa\b] [a\ab] [\^]`, 204 | }, 205 | 206 | // Associativity. 207 | { 208 | Name: "choice associativity", 209 | Input: "A <- B/C/D", 210 | FullString: "A <- (((B)/(C))/(D))", 211 | String: "A <- B/C/D", 212 | }, 213 | { 214 | Name: "sequence associativity", 215 | Input: "A <- B C D", 216 | FullString: "A <- (((B) (C)) (D))", 217 | String: "A <- B C D", 218 | }, 219 | 220 | // Precedence. 221 | { 222 | Name: "various precedences", 223 | Input: "A <- x:B*+ C?/(!D y:&E)* {return 0}/F !{p}", 224 | FullString: "A <- ((((x:(((B)*)+)) ((C)?))/((((!(D)) (y:(&(E))))*) {return 0}))/((F) (!{p})))", 225 | String: "A <- x:B*+ C?/(!D y:&E)* {…}/F !{…}", 226 | }, 227 | { 228 | Name: "action < choice", 229 | Input: "A <- B { return 0 }/C { return 0 }", 230 | FullString: "A <- (((B) { return 0 })/((C) { return 0 }))", 231 | String: "A <- B {…}/C {…}", 232 | }, 233 | { 234 | Name: "sequence < action", 235 | Input: "A <- B C { return 0 }", 236 | FullString: "A <- (((B) (C)) { return 0 })", 237 | String: "A <- B C {…}", 238 | }, 239 | { 240 | Name: "label < sequence", 241 | Input: "A <- s:A t:B", 242 | FullString: "A <- ((s:(A)) (t:(B)))", 243 | String: "A <- s:A t:B", 244 | }, 245 | { 246 | Name: "pred < label", 247 | Input: "A <- s:!A t:&B", 248 | FullString: "A <- ((s:(!(A))) (t:(&(B))))", 249 | String: "A <- s:!A t:&B", 250 | }, 251 | { 252 | Name: "rep < pred", 253 | Input: "A <- !A* &B+ !C?", 254 | FullString: "A <- (((!((A)*)) (&((B)+))) (!((C)?)))", 255 | String: "A <- !A* &B+ !C?", 256 | }, 257 | { 258 | Name: "operand < rep", 259 | Input: `A <- (a/b c)* 260 | B <- &{pred}* 261 | C <- !{pred}* 262 | D <- .* 263 | E <- Z* 264 | F <- "cde"* 265 | G <- [fgh]*`, 266 | FullString: `A <- (((a)/((b) (c)))*) 267 | B <- ((&{pred})*) 268 | C <- ((!{pred})*) 269 | D <- ((.)*) 270 | E <- ((Z)*) 271 | F <- (("cde")*) 272 | G <- (([fgh])*)`, 273 | String: `A <- (a/b c)* 274 | B <- &{…}* 275 | C <- !{…}* 276 | D <- .* 277 | E <- Z* 278 | F <- "cde"* 279 | G <- [fgh]*`, 280 | }, 281 | 282 | // Templates 283 | { 284 | Name: "1-ary template rule", 285 | Input: `A <- x`, 286 | FullString: `A <- (x)`, 287 | String: `A <- x`, 288 | }, 289 | { 290 | Name: "3-ary template rule", 291 | Input: `A <- x y z`, 292 | FullString: `A <- (((x) (y)) (z))`, 293 | String: `A <- x y z`, 294 | }, 295 | { 296 | Name: "1-ary template invocation", 297 | Input: `A <- B C`, 298 | FullString: `A <- ((B) (C))`, 299 | String: `A <- B C`, 300 | }, 301 | { 302 | Name: "3-ary template invocation", 303 | Input: `A <- B C`, 304 | FullString: `A <- ((B) (C))`, 305 | String: `A <- B C`, 306 | }, 307 | 308 | // Rune escaping 309 | { 310 | Name: `escape \a`, 311 | Input: `A <- "\a"`, 312 | FullString: `A <- ("\a")`, 313 | String: `A <- "\a"`, 314 | }, 315 | { 316 | Name: `escape \b`, 317 | Input: `A <- "\b"`, 318 | FullString: `A <- ("\b")`, 319 | String: `A <- "\b"`, 320 | }, 321 | { 322 | Name: `escape \f`, 323 | Input: `A <- "\f"`, 324 | FullString: `A <- ("\f")`, 325 | String: `A <- "\f"`, 326 | }, 327 | { 328 | Name: `escape \n`, 329 | Input: `A <- "\n"`, 330 | FullString: `A <- ("\n")`, 331 | String: `A <- "\n"`, 332 | }, 333 | { 334 | Name: `escape \r`, 335 | Input: `A <- "\r"`, 336 | FullString: `A <- ("\r")`, 337 | String: `A <- "\r"`, 338 | }, 339 | { 340 | Name: `escape \t`, 341 | Input: `A <- "\t"`, 342 | FullString: `A <- ("\t")`, 343 | String: `A <- "\t"`, 344 | }, 345 | { 346 | Name: `escape \v`, 347 | Input: `A <- "\v"`, 348 | FullString: `A <- ("\v")`, 349 | String: `A <- "\v"`, 350 | }, 351 | { 352 | Name: `escape \\`, 353 | Input: `A <- "\\"`, 354 | FullString: `A <- ("\\")`, 355 | String: `A <- "\\"`, 356 | }, 357 | { 358 | Name: `escape \"`, 359 | Input: `A <- "\""`, 360 | FullString: `A <- ("\"")`, 361 | String: `A <- "\""`, 362 | }, 363 | { 364 | Name: `escape \'`, 365 | Input: `A <- '\''`, 366 | FullString: `A <- ("'")`, 367 | String: `A <- "'"`, 368 | }, 369 | { 370 | Name: `escape \000`, 371 | Input: `A <- "\000"`, 372 | FullString: `A <- ("\x00")`, 373 | String: `A <- "\x00"`, 374 | }, 375 | { 376 | Name: `escape \101 (A)`, 377 | Input: `A <- "\101"`, 378 | FullString: `A <- ("A")`, 379 | String: `A <- "A"`, 380 | }, 381 | { 382 | Name: `escape \101BCD`, 383 | Input: `A <- "\101BCD"`, 384 | FullString: `A <- ("ABCD")`, 385 | String: `A <- "ABCD"`, 386 | }, 387 | { 388 | Name: `escape \377 (255)`, 389 | Input: `A <- "\377"`, 390 | FullString: `A <- ("ÿ")`, // \xFF 391 | String: `A <- "ÿ"`, 392 | }, 393 | { 394 | Name: `escape \400 (256)`, 395 | Input: `A <- "\400"`, 396 | Error: "^test.file:1.6,1.11:.*>255", 397 | }, 398 | { 399 | Name: `escape \400 (256)`, 400 | Input: `A <- "xyz\400"`, 401 | // TODO: report the correct error location. 402 | Error: "^test.file:1.6,1.14:.*>255", 403 | }, 404 | { 405 | Name: `escape \4`, 406 | Input: `A <- "\4"`, 407 | Error: "^test.file:1.6,1.10: unknown escape sequence", 408 | }, 409 | { 410 | Name: `escape \40`, 411 | Input: `A <- "\40"`, 412 | Error: "^test.file:1.6,1.11: unknown escape sequence", 413 | }, 414 | { 415 | Name: `escape \x00`, 416 | Input: `A <- "\x00"`, 417 | FullString: `A <- ("\x00")`, 418 | String: `A <- "\x00"`, 419 | }, 420 | { 421 | Name: `escape \x41 (A)`, 422 | Input: `A <- "\x41"`, 423 | FullString: `A <- ("A")`, 424 | String: `A <- "A"`, 425 | }, 426 | { 427 | Name: `escape \x41BCD`, 428 | Input: `A <- "\x41BCD"`, 429 | FullString: `A <- ("ABCD")`, 430 | String: `A <- "ABCD"`, 431 | }, 432 | { 433 | Name: `escape \xFF`, 434 | Input: `A <- "\xFF"`, 435 | FullString: `A <- ("ÿ")`, // \xFF 436 | String: `A <- "ÿ"`, 437 | }, 438 | { 439 | Name: `escape \xF`, 440 | Input: `A <- "\xF"`, 441 | Error: "^test.file:1.6,1.11: unknown escape sequence", 442 | }, 443 | { 444 | Name: `escape \u0000`, 445 | Input: `A <- "\u0000"`, 446 | FullString: `A <- ("\x00")`, 447 | String: `A <- "\x00"`, 448 | }, 449 | { 450 | Name: `escape \u0041 (A)`, 451 | Input: `A <- "\u0041"`, 452 | FullString: `A <- ("A")`, 453 | String: `A <- "A"`, 454 | }, 455 | { 456 | Name: `escape \u0041BCD`, 457 | Input: `A <- "\u0041BCD"`, 458 | FullString: `A <- ("ABCD")`, 459 | String: `A <- "ABCD"`, 460 | }, 461 | { 462 | Name: `escape \u263A (☺)`, 463 | Input: `A <- "\u263A"`, 464 | FullString: `A <- ("☺")`, 465 | String: `A <- "☺"`, 466 | }, 467 | { 468 | Name: `escape \u263a (☺)`, 469 | Input: `A <- "\u263a"`, 470 | FullString: `A <- ("☺")`, 471 | String: `A <- "☺"`, 472 | }, 473 | { 474 | Name: `escape \uF`, 475 | Input: `A <- "\xF"`, 476 | Error: "^test.file:1.6,1.11: unknown escape sequence", 477 | }, 478 | { 479 | Name: `escape \uFF`, 480 | Input: `A <- "\uFF"`, 481 | Error: "^test.file:1.6,1.12: unknown escape sequence", 482 | }, 483 | { 484 | Name: `escape \uFFF`, 485 | Input: `A <- "\uFFF"`, 486 | Error: "^test.file:1.6,1.13: unknown escape sequence", 487 | }, 488 | { 489 | Name: `escape \U00000000`, 490 | Input: `A <- "\U00000000"`, 491 | FullString: `A <- ("\x00")`, 492 | String: `A <- "\x00"`, 493 | }, 494 | { 495 | Name: `escape \U00000041 (A)`, 496 | Input: `A <- "\U00000041"`, 497 | FullString: `A <- ("A")`, 498 | String: `A <- "A"`, 499 | }, 500 | { 501 | Name: `escape \U00000041BCD`, 502 | Input: `A <- "\U00000041BCD"`, 503 | FullString: `A <- ("ABCD")`, 504 | String: `A <- "ABCD"`, 505 | }, 506 | { 507 | Name: `escape \U0000263A (☺)`, 508 | Input: `A <- "\U0000263A"`, 509 | FullString: `A <- ("☺")`, 510 | String: `A <- "☺"`, 511 | }, 512 | { 513 | Name: `escape \U0000263a (☺)`, 514 | Input: `A <- "\U0000263a"`, 515 | FullString: `A <- ("☺")`, 516 | String: `A <- "☺"`, 517 | }, 518 | { 519 | Name: `escape \U0010FFFF`, 520 | Input: `A <- "\U0010FFFF"`, 521 | FullString: `A <- ("\U0010ffff")`, 522 | String: `A <- "\U0010ffff"`, 523 | }, 524 | { 525 | Name: `escape \U00110000`, 526 | Input: `A <- "\U00110000"`, 527 | Error: "^test.file:1.6,1.17:.*>0x10FFFF", 528 | }, 529 | { 530 | Name: `escape \UF`, 531 | Input: `A <- "\UF"`, 532 | Error: "^test.file:1.6,1.11: unknown escape sequence", 533 | }, 534 | { 535 | Name: `escape \UFF`, 536 | Input: `A <- "\UFF"`, 537 | Error: "^test.file:1.6,1.12: unknown escape sequence", 538 | }, 539 | { 540 | Name: `escape \UFFF`, 541 | Input: `A <- "\UFFF"`, 542 | Error: "^test.file:1.6,1.13: unknown escape sequence", 543 | }, 544 | { 545 | Name: `escape \UFFFF`, 546 | Input: `A <- "\UFFFF"`, 547 | Error: "^test.file:1.6,1.14: unknown escape sequence", 548 | }, 549 | { 550 | Name: `escape \UFFFFF`, 551 | Input: `A <- "\UFFFFF"`, 552 | Error: "^test.file:1.6,1.15: unknown escape sequence", 553 | }, 554 | { 555 | Name: `escape \UFFFFFF`, 556 | Input: `A <- "\UFFFFFF"`, 557 | Error: "^test.file:1.6,1.16: unknown escape sequence", 558 | }, 559 | { 560 | Name: `escape \UFFFFFFF`, 561 | Input: `A <- "\UFFFFFFF"`, 562 | Error: "^test.file:1.6,1.17: unknown escape sequence", 563 | }, 564 | { 565 | Name: `string with multiple escapes`, 566 | Input: `A <- "x\a\b\f\n\r\t\v\\\"\000\x00\u0000\U00000000☺"`, 567 | FullString: `A <- ("x\a\b\f\n\r\t\v\\\"\x00\x00\x00\x00☺")`, 568 | String: `A <- "x\a\b\f\n\r\t\v\\\"\x00\x00\x00\x00☺"`, 569 | }, 570 | { 571 | Name: `unknown escape`, 572 | Input: `A <- "\z"`, 573 | Error: "^test.file:1.6,1.9: unknown escape sequence", 574 | }, 575 | { 576 | Name: `escape eof`, 577 | Input: `A <- "\`, 578 | Error: `^test.file:1.6,1.8: unclosed "`, 579 | }, 580 | 581 | // Whitespace. 582 | // BUG: The current YACC grammar 583 | // doesn't allow whitespace between all tokens, 584 | // but only particular tokens. 585 | // Specifically whitespace can only appear after 586 | // delimiters after which a new rule cannot begin. 587 | // This is because, in order to remain LALR(1), 588 | // a newline terminates a sequence expression, 589 | // denoting that the next identifier is a rule name. 590 | { 591 | Name: `after <-`, 592 | Input: `A <- 593 | "a" 594 | 595 | B <- #comment 596 | "b" 597 | 598 | C "c" <- 599 | "c" 600 | 601 | D "d" <- #comment 602 | "d"`, 603 | FullString: `A <- ("a") 604 | B <- ("b") 605 | C "c" <- ("c") 606 | D "d" <- ("d")`, 607 | String: `A <- "a" 608 | B <- "b" 609 | C "c" <- "c" 610 | D "d" <- "d"`, 611 | }, 612 | { 613 | Name: `after /`, 614 | Input: `A <- B / 615 | C / # comment 616 | D`, 617 | FullString: `A <- (((B)/(C))/(D))`, 618 | String: `A <- B/C/D`, 619 | }, 620 | { 621 | Name: `after : label`, 622 | Input: `A <- l: 623 | B m: #comment 624 | C`, 625 | FullString: `A <- ((l:(B)) (m:(C)))`, 626 | String: `A <- l:B m:C`, 627 | }, 628 | { 629 | Name: `after & predicate`, 630 | Input: `A <- & 631 | B & #comment 632 | C`, 633 | FullString: `A <- ((&(B)) (&(C)))`, 634 | String: `A <- &B &C`, 635 | }, 636 | { 637 | Name: `after ! predicate`, 638 | Input: `A <- ! 639 | B ! #comment 640 | C`, 641 | FullString: `A <- ((!(B)) (!(C)))`, 642 | String: `A <- !B !C`, 643 | }, 644 | { 645 | Name: `after (`, 646 | Input: `A <- ( 647 | B ( #comment 648 | C))`, 649 | FullString: `A <- ((B) (C))`, 650 | String: `A <- (B (C))`, 651 | }, 652 | { 653 | Name: `before )`, 654 | Input: `A <- (B (C 655 | ) #comment 656 | )`, 657 | FullString: `A <- ((B) (C))`, 658 | String: `A <- (B (C))`, 659 | }, 660 | { 661 | Name: `after & code`, 662 | Input: `A <- & 663 | {code} & #comment 664 | {CODE}`, 665 | FullString: `A <- ((&{code}) (&{CODE}))`, 666 | String: `A <- &{…} &{…}`, 667 | }, 668 | { 669 | Name: `after ! code`, 670 | Input: `A <- ! 671 | {code} ! #comment 672 | {CODE}`, 673 | FullString: `A <- ((!{code}) (!{CODE}))`, 674 | String: `A <- !{…} !{…}`, 675 | }, 676 | 677 | // Systax errors. 678 | { 679 | Name: "bad rule name", 680 | Input: "\n\t\t&", 681 | Error: "^test.file:2.3,2.4:", 682 | }, 683 | { 684 | Name: "missing <-", 685 | Input: "\nA B", 686 | Error: "^test.file:2.3,2.4:", 687 | }, 688 | { 689 | Name: "bad <-", 690 | Input: "\nA <~ C", 691 | Error: "^test.file:2.4,2.5:", 692 | }, 693 | { 694 | Name: "missing expr", 695 | Input: "\nA <-", 696 | Error: "^test.file:2.5:", 697 | }, 698 | { 699 | Name: "unexpected rune", 700 | Input: "\nA <- C ☺", 701 | Error: "^test.file:2.8,2.9:", 702 | }, 703 | { 704 | Name: "unclosed (", 705 | Input: "\nA <- (B", 706 | Error: "^test.file:2.8:", 707 | }, 708 | { 709 | Name: "unclosed '", 710 | Input: "\nA <- 'B", 711 | Error: "^test.file:2.6,2.8: unclosed '", 712 | }, 713 | { 714 | Name: `unclosed "`, 715 | Input: "\nA <- \"B", 716 | Error: "^test.file:2.6,2.8: unclosed \"", 717 | }, 718 | { 719 | Name: `unclosed {`, 720 | Input: "\nA <- B { code", 721 | Error: "^test.file:2.8,2.14: unclosed {", 722 | }, 723 | { 724 | Name: `unclosed spans lines`, 725 | Input: "\nA <- \"B\n\nC", 726 | Error: "^test.file:2.6,4.2: unclosed \"", 727 | }, 728 | { 729 | Name: "unclosed [", 730 | Input: "\nA <- [B", 731 | Error: "^test.file:2.6,2.8: unclosed [[]", 732 | }, 733 | { 734 | Name: "character class empty", 735 | Input: "\nA <- []", 736 | Error: "^test.file:2.6,2.8: bad char class: empty", 737 | }, 738 | { 739 | Name: "character class starts with span", 740 | Input: "\nA <- [-9]", 741 | Error: "^test.file:2.7,2.9: bad span", 742 | }, 743 | { 744 | Name: "character class no span start", 745 | Input: "\nA <- [1-3-9]", 746 | Error: "^test.file:2.10,2.12: bad span", 747 | }, 748 | { 749 | Name: "character class ends with span", 750 | Input: "\nA <- [0-]", 751 | Error: "^test.file:2.7,2.9: bad span", 752 | }, 753 | { 754 | Name: "character class inverted span", 755 | Input: "\nA <- [9-0]", 756 | Error: "^test.file:2.7,2.10: bad span", 757 | }, 758 | { 759 | Name: "character class span after span", 760 | Input: "\nA <- [^0-9abcA-Zz-a]", 761 | Error: "^test.file:2.17,2.20: bad span", 762 | }, 763 | { 764 | Name: "character class bad span after rune", 765 | Input: "\nA <- [^0-9abcZ-A]", 766 | Error: "^test.file:2.14,2.17: bad span", 767 | }, 768 | 769 | // Go syntax errors. 770 | { 771 | Name: `bad prelude`, 772 | Input: "{ not package line }\nA <- B", 773 | Error: "^test.file:1.3", 774 | }, 775 | { 776 | Name: `bad multi-line prelude`, 777 | Input: `{ 778 | package main 779 | 780 | import "fmt" 781 | 782 | // Missing open paren. 783 | func main() { fmt.Println"Hello, World") } 784 | } 785 | A <- B`, 786 | Error: "^test.file:7.26", 787 | }, 788 | { 789 | Name: `bad bool expression`, 790 | // = instead of ==. 791 | Input: "\nA <- &{ x = z}", 792 | Error: "^test.file:2.11", 793 | }, 794 | { 795 | Name: `bad multi-line bool expression`, 796 | // Missing the closed paren on p(. 797 | Input: "\nA <- &{ x == \n p(y, z, h}", 798 | Error: "^test.file:3.11", 799 | }, 800 | { 801 | Name: `bad action`, 802 | Input: "A <- B { if ( }", 803 | Error: "^test.file:1.15", 804 | }, 805 | { 806 | Name: `bad multi-line action`, 807 | Input: "\nA <- B {\n if ( }", 808 | Error: "^test.file:3.7", 809 | }, 810 | { 811 | Name: `bad action: invalid nested func def`, 812 | Input: "\nA <- B { func f() int { return 1 } }", 813 | Error: "^test.file:2.15", 814 | }, 815 | { 816 | Name: `action with nested return`, 817 | Input: "A <- B { if true { return 0 } else { return 1 } }", 818 | FullString: "A <- ((B) { if true { return 0 } else { return 1 } })", 819 | String: "A <- B {…}", 820 | }, 821 | { 822 | Name: `missing return`, 823 | Input: "A <- B { }", 824 | Error: "^test.file:1.9: no return statement", 825 | }, 826 | { 827 | Name: `multi-value return`, 828 | Input: "A <- B { return 1, 2, 3 }", 829 | Error: "^test.file:1.9: must return exactly one value", 830 | }, 831 | { 832 | Name: `non-conversion multi-ary function return`, 833 | Input: "A <- B { return f(a, b, c) }", 834 | Error: "^test.file:1.9: cannot infer type", 835 | }, 836 | { 837 | Name: `non-conversion nil-ary function return`, 838 | Input: "A <- B { return f() }", 839 | Error: "^test.file:1.9: cannot infer type", 840 | }, 841 | { 842 | Name: `non-conversion function return`, 843 | Input: "A <- B { return f(a, b, c) }", 844 | Error: "^test.file:1.9: cannot infer type", 845 | }, 846 | 847 | // I/O errors. 848 | { 849 | Name: "only I/O error", 850 | Input: "☹", 851 | Error: testIOError, 852 | }, 853 | { 854 | Name: "comment I/O error", 855 | Input: "#☹", 856 | Error: testIOError, 857 | }, 858 | { 859 | Name: "ident I/O error", 860 | Input: "A☹", 861 | Error: testIOError, 862 | }, 863 | { 864 | Name: "arrow I/O error", 865 | Input: "A <☹", 866 | Error: testIOError, 867 | }, 868 | { 869 | Name: "code I/O error", 870 | Input: "A <- B { ☹", 871 | Error: testIOError, 872 | }, 873 | { 874 | Name: "char class I/O error", 875 | Input: "A <- [☹", 876 | Error: testIOError, 877 | }, 878 | { 879 | Name: "double-quoted string I/O error", 880 | Input: "A <- \"☹", 881 | Error: testIOError, 882 | }, 883 | { 884 | Name: "single-quoted string I/O error", 885 | Input: "A <- '☹", 886 | Error: testIOError, 887 | }, 888 | } 889 | 890 | func TestParse(t *testing.T) { 891 | for _, test := range ParseTests { 892 | test := test 893 | t.Run(test.Name, func(t *testing.T) { 894 | t.Parallel() 895 | in := testRuneScanner{strings.NewReader(test.Input)} 896 | g, err := Parse(in, "test.file") 897 | 898 | if test.Error != "" { 899 | if err == nil { 900 | t.Log(pretty.String(g.Rules)) 901 | t.Errorf("Parse(%q) ok, but expected error matching %q", 902 | test.Input, test.Error) 903 | return 904 | } 905 | re := regexp.MustCompile(test.Error) 906 | if !re.MatchString(err.Error()) { 907 | t.Errorf("Parse(%q) err=%q, but expected to match %q", 908 | test.Input, err.Error(), test.Error) 909 | return 910 | } 911 | return 912 | } 913 | 914 | if err != nil { 915 | t.Errorf("Parse(%q) failed: %s", test.Input, err) 916 | return 917 | } 918 | var pre string 919 | if g.Prelude != nil { 920 | pre = g.Prelude.String() 921 | } 922 | if pre != test.Prelude { 923 | t.Errorf("Parse(%q).Prelude=\n%s\nwant:\n%s", 924 | test.Input, pre, test.Prelude) 925 | return 926 | } 927 | if s := FullString(g.Rules); s != test.FullString { 928 | t.Errorf("Parse(%q)\nfull string:\n%q\nwant:\n%q", 929 | test.Input, s, test.FullString) 930 | return 931 | } 932 | if s := String(g.Rules); s != test.String { 933 | t.Errorf("Parse(%q)\nstring:\n%q\nwant:\n%q", 934 | test.Input, s, test.String) 935 | return 936 | } 937 | }) 938 | } 939 | } 940 | 941 | // testRuneScanner implements io.RuneScanner, wrapping another RuneScanner, 942 | // however, whenever the original scanner would've returned a ☹ rune, 943 | // testRuneScanner instead returns an error. 944 | type testRuneScanner struct { 945 | io.RuneScanner 946 | } 947 | 948 | const testIOError = "test I/O error" 949 | 950 | func (rs testRuneScanner) ReadRune() (rune, int, error) { 951 | r, n, err := rs.RuneScanner.ReadRune() 952 | if r == '☹' { 953 | return 0, 0, errors.New(testIOError) 954 | } 955 | return r, n, err 956 | } 957 | -------------------------------------------------------------------------------- /peg/fail.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package peg 8 | 9 | import "fmt" 10 | 11 | // SimpleError returns an error with a basic error message 12 | // that describes what was expected at all of the leaf fails 13 | // with the greatest position in the tree. 14 | // 15 | // The FilePath field of the returned Error is the empty string. 16 | // The caller can set this field if to prefix the location 17 | // with the path to an input file. 18 | func SimpleError(text string, node *Fail) Error { 19 | leaves := LeafFails(node) 20 | 21 | var want string 22 | for i, l := range leaves { 23 | switch { 24 | case i == len(leaves)-1 && i == 1: 25 | want += " or " 26 | case i == len(leaves)-1 && len(want) > 1: 27 | want += ", or " 28 | case i > 0: 29 | want += ", " 30 | } 31 | want += l.Want 32 | } 33 | 34 | got := "EOF" 35 | pos := leaves[0].Pos 36 | if pos < len(text) { 37 | end := pos + 10 38 | if end > len(text) { 39 | end = len(text) 40 | } 41 | got = "'" + text[pos:end] + "'" 42 | } 43 | 44 | return Error{ 45 | Loc: Location(text, pos), 46 | Message: fmt.Sprintf("want %s; got %s", want, got), 47 | } 48 | } 49 | 50 | // Error implements error, prefixing an error message 51 | // with location information for the error. 52 | type Error struct { 53 | // FilePath is the path of the input file containing the error. 54 | FilePath string 55 | // Loc is the location of the error. 56 | Loc Loc 57 | // Message is the error message. 58 | Message string 59 | } 60 | 61 | func (err Error) Error() string { 62 | return fmt.Sprintf("%s:%d.%d: %s", 63 | err.FilePath, err.Loc.Line, err.Loc.Column, err.Message) 64 | } 65 | 66 | // LeafFails returns all fails in the tree with the greatest Pos. 67 | func LeafFails(node *Fail) []*Fail { 68 | pos := -1 69 | var fails []*Fail 70 | seen := make(map[*Fail]bool) 71 | var walk func(*Fail) 72 | walk = func(n *Fail) { 73 | if seen[n] { 74 | return 75 | } 76 | seen[n] = true 77 | if len(n.Kids) == 0 { 78 | switch { 79 | case n.Pos > pos: 80 | pos = n.Pos 81 | fails = append(fails[:0], n) 82 | case n.Pos == pos: 83 | fails = append(fails, n) 84 | } 85 | return 86 | } 87 | for _, k := range n.Kids { 88 | walk(k) 89 | } 90 | } 91 | walk(node) 92 | return fails 93 | } 94 | 95 | // DedupFails removes duplicate fail branches from the tree, 96 | // keeping only the first occurrence of each. 97 | // This is useful for example before printing the Fail tree, 98 | // because the non-deduped Fail tree can be exponential 99 | // in the input size. 100 | func DedupFails(node *Fail) { 101 | seen := make(map[*Fail]bool) 102 | var walk func(*Fail) bool 103 | walk = func(n *Fail) bool { 104 | if seen[n] { 105 | return false 106 | } 107 | seen[n] = true 108 | var kids []*Fail 109 | for _, k := range n.Kids { 110 | if walk(k) { 111 | kids = append(kids, k) 112 | } 113 | } 114 | n.Kids = kids 115 | return true 116 | } 117 | walk(node) 118 | } 119 | -------------------------------------------------------------------------------- /peg/fail_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package peg 8 | 9 | import ( 10 | "reflect" 11 | "testing" 12 | 13 | "github.com/eaburns/pretty" 14 | ) 15 | 16 | func TestDedupFails(t *testing.T) { 17 | x := &Fail{Name: "x"} 18 | z := &Fail{Name: "z"} 19 | y := &Fail{Name: "y", Kids: []*Fail{z, z}} 20 | root := &Fail{ 21 | Kids: []*Fail{ 22 | x, 23 | &Fail{ 24 | Kids: []*Fail{ 25 | y, 26 | y, 27 | }, 28 | }, 29 | x, 30 | }, 31 | } 32 | DedupFails(root) 33 | want := &Fail{ 34 | Kids: []*Fail{ 35 | &Fail{Name: "x"}, 36 | &Fail{ 37 | Kids: []*Fail{ 38 | &Fail{ 39 | Name: "y", 40 | Kids: []*Fail{ 41 | &Fail{Name: "z"}, 42 | }, 43 | }, 44 | }, 45 | }, 46 | }, 47 | } 48 | if !reflect.DeepEqual(root, want) { 49 | t.Errorf("DedupFails()=%v, want %v", 50 | pretty.String(root), pretty.String(want)) 51 | } 52 | } 53 | 54 | func TestLeafFails(t *testing.T) { 55 | x0 := &Fail{Name: "x0", Pos: 10} 56 | x1 := &Fail{Name: "x1", Pos: 10} 57 | y0 := &Fail{Name: "y0", Pos: 15} 58 | y1 := &Fail{Name: "y1", Pos: 15} 59 | z0 := &Fail{Name: "z0", Pos: 20} 60 | z1 := &Fail{Name: "z1", Pos: 20} 61 | 62 | root := &Fail{ 63 | Kids: []*Fail{ 64 | x0, 65 | y0, 66 | z0, 67 | &Fail{ 68 | Kids: []*Fail{ 69 | x1, 70 | y1, 71 | z1, 72 | z0, 73 | }, 74 | }, 75 | z1, 76 | x0, 77 | y1, 78 | }, 79 | } 80 | 81 | got := LeafFails(root) 82 | want := []*Fail{z0, z1} 83 | if !reflect.DeepEqual(got, want) { 84 | t.Errorf("LeafFails()=%s, want %s", 85 | pretty.String(got), pretty.String(want)) 86 | } 87 | } 88 | 89 | func TestSimpleError_1(t *testing.T) { 90 | text := "123456789\nabcdefg" 91 | root := &Fail{ 92 | Kids: []*Fail{ 93 | &Fail{Pos: 10, Want: "A"}, 94 | }, 95 | } 96 | err := SimpleError(text, root) 97 | want := ":2.1: want A; got 'abcdefg'" 98 | if err.Error() != want { 99 | t.Errorf("err.Error()=%q, want %q", err.Error(), want) 100 | } 101 | } 102 | 103 | func TestSimpleError_2(t *testing.T) { 104 | text := "123456789\nabcdefg" 105 | root := &Fail{ 106 | Kids: []*Fail{ 107 | &Fail{Pos: 10, Want: "A"}, 108 | &Fail{Pos: 10, Want: "B"}, 109 | }, 110 | } 111 | err := SimpleError(text, root) 112 | want := ":2.1: want A or B; got 'abcdefg'" 113 | if err.Error() != want { 114 | t.Errorf("err.Error()=%q, want %q", err.Error(), want) 115 | } 116 | } 117 | 118 | func TestSimpleError_3(t *testing.T) { 119 | text := "123456789\nabcdefg" 120 | root := &Fail{ 121 | Kids: []*Fail{ 122 | &Fail{Pos: 10, Want: "A"}, 123 | &Fail{Pos: 10, Want: "B"}, 124 | &Fail{Pos: 10, Want: "C"}, 125 | }, 126 | } 127 | err := SimpleError(text, root) 128 | want := ":2.1: want A, B, or C; got 'abcdefg'" 129 | if err.Error() != want { 130 | t.Errorf("err.Error()=%q, want %q", err.Error(), want) 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /peg/loc.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package peg 8 | 9 | import "unicode/utf8" 10 | 11 | // A Loc is a location in the input text. 12 | type Loc struct { 13 | Byte int 14 | Rune int 15 | Line int 16 | Column int 17 | } 18 | 19 | // Location returns the Loc at the corresponding byte offset in the text. 20 | func Location(text string, byte int) Loc { 21 | var loc Loc 22 | loc.Line = 1 23 | loc.Column = 1 24 | for byte > loc.Byte { 25 | r, w := utf8.DecodeRuneInString(text[loc.Byte:]) 26 | loc.Byte += w 27 | loc.Rune++ 28 | loc.Column++ 29 | if r == '\n' { 30 | loc.Line++ 31 | loc.Column = 1 32 | } 33 | } 34 | return loc 35 | } 36 | -------------------------------------------------------------------------------- /peg/loc_test.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package peg 8 | 9 | import ( 10 | "strings" 11 | "testing" 12 | ) 13 | 14 | func TestLocation(t *testing.T) { 15 | tests := []struct { 16 | in string 17 | want Loc 18 | }{ 19 | { 20 | in: "*", 21 | want: Loc{Byte: 0, Rune: 0, Line: 1, Column: 1}, 22 | }, 23 | { 24 | in: "abc*", 25 | want: Loc{Byte: 3, Rune: 3, Line: 1, Column: 4}, 26 | }, 27 | { 28 | in: "ab\n*", 29 | want: Loc{Byte: 3, Rune: 3, Line: 2, Column: 1}, 30 | }, 31 | { 32 | in: "ab\n*", 33 | want: Loc{Byte: 3, Rune: 3, Line: 2, Column: 1}, 34 | }, 35 | { 36 | in: "ab\nabc\nxyz*", 37 | want: Loc{Byte: 10, Rune: 10, Line: 3, Column: 4}, 38 | }, 39 | { 40 | in: "☺*", 41 | want: Loc{Byte: len("☺"), Rune: 1, Line: 1, Column: 2}, 42 | }, 43 | { 44 | in: "☺☺☺*", 45 | want: Loc{Byte: 3 * len("☺"), Rune: 3, Line: 1, Column: 4}, 46 | }, 47 | { 48 | in: "☺☺\n☺*", 49 | want: Loc{Byte: 3*len("☺") + 1, Rune: 4, Line: 2, Column: 2}, 50 | }, 51 | { 52 | in: "☺☺\n☺*☹☹☹", 53 | want: Loc{Byte: 3*len("☺") + 1, Rune: 4, Line: 2, Column: 2}, 54 | }, 55 | } 56 | for _, test := range tests { 57 | b := strings.Index(test.in, "*") 58 | if b < 0 { 59 | panic("no *") 60 | } 61 | got := Location(test.in, b) 62 | if got != test.want { 63 | t.Errorf("Location(%q, %d)=%v, want %v", test.in, b, got, test.want) 64 | } 65 | } 66 | } 67 | -------------------------------------------------------------------------------- /peg/peg.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package peg 8 | 9 | import "unicode/utf8" 10 | 11 | // A Node is a node in a Peggy parse tree. 12 | type Node struct { 13 | // Name is the name of the Rule associated with the node, 14 | // or the empty string for anonymous Nodes 15 | // that are not associated with any Rule. 16 | Name string 17 | 18 | // Text is the input text of the Node's subtree. 19 | Text string 20 | 21 | // Kids are the immediate successors of this node. 22 | Kids []*Node 23 | } 24 | 25 | // A Fail is a node in a failed-parse tree. 26 | // A failed-parse tree contains all paths in a failed parse 27 | // that lead to the furthest error location in the input text. 28 | // There are two types of nodes: named and unnamed. 29 | // Named nodes represent grammar rules that failed to parse. 30 | // Unnamed nodes represent terminal expressions that failed to parse. 31 | type Fail struct { 32 | // Name is the name of the Rule associated with the node, 33 | // or the empty string if the Fail is a terminal expression failure. 34 | Name string 35 | 36 | // Pos is the byte offset into the input of the Fail. 37 | Pos int 38 | 39 | // Kids are the immediate succors of this Fail. 40 | // Kids is only non-nil for named Fail nodes. 41 | Kids []*Fail 42 | 43 | // Want is a string describing what was expected at the error position. 44 | // It is only non-empty for unnamed Fail nodes. 45 | // 46 | // It can be of one of the following forms: 47 | // "…" indicating a failed literal match, where the text between the quotes is the expected literal using Go escaping. 48 | // . indicating a failed . match. 49 | // […] indicating a failed character class match, where the text between the [ and ] is the character class. 50 | // !… where the text after ! is the string representation of a failed predicate subexpression. 51 | // &… where the text after & is the string representation of a failed predicate subexpression. 52 | // … the error-name of a rule. 53 | // For example, "int" in rule: Integer "int" <- [0-9]. 54 | Want string 55 | } 56 | 57 | // DecodeRuneInString is utf8.DecodeRuneInString. 58 | // It's here so parsers can just include peg, and not also need unicode/utf8. 59 | func DecodeRuneInString(s string) (rune, int) { 60 | return utf8.DecodeRuneInString(s) 61 | } 62 | -------------------------------------------------------------------------------- /peg/pretty.go: -------------------------------------------------------------------------------- 1 | // Copyright 2018 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package peg 8 | 9 | import ( 10 | "bytes" 11 | "io" 12 | "strconv" 13 | ) 14 | 15 | type nodeOrFail interface { 16 | name() string 17 | numKids() int 18 | kid(int) nodeOrFail 19 | text() string 20 | } 21 | 22 | func (f *Node) name() string { return f.Name } 23 | func (f *Node) numKids() int { return len(f.Kids) } 24 | func (f *Node) kid(i int) nodeOrFail { return f.Kids[i] } 25 | func (f *Node) text() string { return f.Text } 26 | func (f *Fail) name() string { return f.Name } 27 | func (f *Fail) numKids() int { return len(f.Kids) } 28 | func (f *Fail) kid(i int) nodeOrFail { return f.Kids[i] } 29 | func (f *Fail) text() string { return f.Want } 30 | 31 | // Pretty returns a human-readable string of a Node or Fail 32 | // and the subtree beneath it. 33 | // The output looks like: 34 | // { 35 | // , 36 | // , 37 | // … 38 | // , 39 | // } 40 | func Pretty(n nodeOrFail) string { 41 | b := bytes.NewBuffer(nil) 42 | PrettyWrite(b, n) 43 | return b.String() 44 | } 45 | 46 | // PrettyWrite is like Pretty but outputs to an io.Writer. 47 | func PrettyWrite(w io.Writer, n nodeOrFail) error { 48 | return prettyWrite(w, "", n) 49 | } 50 | 51 | func prettyWrite(w io.Writer, tab string, n nodeOrFail) error { 52 | if _, err := io.WriteString(w, tab); err != nil { 53 | return err 54 | } 55 | if n.numKids() == 0 { 56 | if n.name() != "" { 57 | if _, err := io.WriteString(w, n.name()+"("); err != nil { 58 | return err 59 | } 60 | } 61 | if _, err := io.WriteString(w, `"`+n.text()+`"`); err != nil { 62 | return err 63 | } 64 | if n.name() != "" { 65 | if _, err := io.WriteString(w, ")"); err != nil { 66 | return err 67 | } 68 | } 69 | return nil 70 | } 71 | if _, err := io.WriteString(w, n.name()); err != nil { 72 | return err 73 | } 74 | if f, ok := n.(*Fail); ok { 75 | pos := "[" + strconv.Itoa(f.Pos) + "]" 76 | if _, err := io.WriteString(w, pos); err != nil { 77 | return err 78 | } 79 | } 80 | if n.numKids() == 0 { 81 | if n.name() == "" { 82 | if _, err := io.WriteString(w, "{}"); err != nil { 83 | return err 84 | } 85 | } 86 | return nil 87 | } 88 | if _, err := io.WriteString(w, "{"); err != nil { 89 | return err 90 | } 91 | if n.numKids() == 1 && n.kid(0).numKids() == 0 { 92 | if err := prettyWrite(w, "", n.kid(0)); err != nil { 93 | return err 94 | } 95 | if _, err := io.WriteString(w, "}"); err != nil { 96 | return err 97 | } 98 | return nil 99 | } 100 | for i := 0; i < n.numKids(); i++ { 101 | if _, err := io.WriteString(w, "\n"); err != nil { 102 | return err 103 | } 104 | if err := prettyWrite(w, tab+"\t", n.kid(i)); err != nil { 105 | return err 106 | } 107 | if _, err := io.WriteString(w, ","); err != nil { 108 | return err 109 | } 110 | } 111 | if _, err := io.WriteString(w, "\n"+tab+"}"); err != nil { 112 | return err 113 | } 114 | return nil 115 | } 116 | -------------------------------------------------------------------------------- /rule.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package main 8 | 9 | import "fmt" 10 | 11 | // Grammar is a PEG grammar. 12 | type Grammar struct { 13 | // Prelude is custom code added to the beginning of the generated output. 14 | Prelude Text 15 | 16 | // Rules are the rules of the grammar. 17 | Rules []Rule 18 | 19 | // CheckedRules are the rules successfully checked by the Check pass. 20 | // It contains all non-template rules and all expanded templates. 21 | CheckedRules []*Rule 22 | } 23 | 24 | // A Rule defines a production in a PEG grammar. 25 | type Rule struct { 26 | Name 27 | 28 | // ErrorName, if non-nil, indicates that this is a named rule. 29 | // Errors beneath a named rule are collapsed, 30 | // reporting the error position as the start of the rule's parse 31 | // with the "want" message set to ErrorName. 32 | // 33 | // If nil, the rule is unnamed and does not collapse errors. 34 | ErrorName Text 35 | 36 | // Expr is the PEG expression matched by the rule. 37 | Expr Expr 38 | 39 | // N is the rule's unique integer within its containing Grammar. 40 | // It is a small integer that may be used as an array index. 41 | N int 42 | 43 | // typ is the type of the rule in the action pass. 44 | // typ is nil before the checkLeft pass add non-nil after. 45 | typ *string 46 | 47 | // epsilon indicates whether the rule can match the empty string. 48 | epsilon bool 49 | 50 | // Labels is the set of all label names in the rule's expression. 51 | Labels []*LabelExpr 52 | } 53 | 54 | func (r *Rule) Begin() Loc { return r.Name.Begin() } 55 | func (r *Rule) End() Loc { return r.Expr.End() } 56 | func (r Rule) Type() string { return *r.typ } 57 | 58 | // A Name is the name of a rule template. 59 | type Name struct { 60 | // Name is the name of the template. 61 | Name Text 62 | 63 | // Args are the arguments or parameters of the template. 64 | Args []Text 65 | } 66 | 67 | func (n Name) Begin() Loc { return n.Name.Begin() } 68 | func (n Name) End() Loc { 69 | if len(n.Args) == 0 { 70 | return n.Name.End() 71 | } 72 | return n.Args[len(n.Args)-1].End() 73 | } 74 | 75 | // Text is a string of text located along with its location in the input. 76 | type Text interface { 77 | Located 78 | // String is the text string. 79 | String() string 80 | } 81 | 82 | // Loc identifies a location in a file by its line and column numbers. 83 | type Loc struct { 84 | // File is the name of the input file. 85 | File string 86 | // Line is line number of the location. 87 | // The first line of input is line number 1. 88 | Line int 89 | // Col is the Loc's rune offset into the line. 90 | // Col 0 is before the first rune on the line. 91 | Col int 92 | } 93 | 94 | // Less returns whether the receiver is earlier in the input than the argument. 95 | func (l Loc) Less(j Loc) bool { 96 | if l.Line == j.Line { 97 | return l.Col < j.Col 98 | } 99 | return l.Line < j.Line 100 | } 101 | 102 | // PrettyPrint implements the pretty.PrettyPrinter interface, 103 | // returning a simpler, one-line string form of the Loc. 104 | func (l Loc) PrettyPrint() string { return fmt.Sprintf("Loc{%d, %d}", l.Line, l.Col) } 105 | 106 | // Begin returns the Loc. 107 | func (l Loc) Begin() Loc { return l } 108 | 109 | // End returns the Loc. 110 | func (l Loc) End() Loc { return l } 111 | 112 | // Expr is PEG expression that matches a sequence of input runes. 113 | type Expr interface { 114 | Located 115 | String() string 116 | 117 | // fullString returns the fully parenthesized string representation. 118 | fullString() string 119 | 120 | // Walk calls a function for each expression in the tree. 121 | // Walk stops early if the function returns false. 122 | Walk(func(Expr) bool) bool 123 | 124 | // substitute returns a clone of the expression 125 | // with all occurrences of identifiers that are keys of sub 126 | // substituted with the corresponding value. 127 | // substitute must not be called after Check, 128 | // because it does not update bookkeeping fields 129 | // that are set by the Check pass. 130 | substitute(sub map[string]string) Expr 131 | 132 | // Type returns the type of the expression in the Action Tree. 133 | // This is the Go type associated with the expression. 134 | Type() string 135 | 136 | // epsilon returns whether the rule can match the empty string. 137 | epsilon() bool 138 | 139 | // CanFail returns whether the node can ever fail to parse. 140 | // Nodes like * or ?, for example, can never fail. 141 | // Parents of never-fail nodes needn't emit a failure branch, 142 | // as it will never be called. 143 | CanFail() bool 144 | 145 | // checkLeft checks for left-recursion and sets rule types. 146 | checkLeft(rules map[string]*Rule, p path, errs *Errors) 147 | 148 | // check checks for undefined identifiers, 149 | // linking defined identifiers to rules; 150 | // and checks for type mismatches. 151 | check(ctx ctx, valueUsed bool, errs *Errors) 152 | } 153 | 154 | // A Choice is an ordered choice between expressions. 155 | type Choice struct{ Exprs []Expr } 156 | 157 | func (e *Choice) Begin() Loc { return e.Exprs[0].Begin() } 158 | func (e *Choice) End() Loc { return e.Exprs[len(e.Exprs)-1].End() } 159 | 160 | func (e *Choice) Walk(f func(Expr) bool) bool { 161 | if !f(e) { 162 | return false 163 | } 164 | for _, kid := range e.Exprs { 165 | if !kid.Walk(f) { 166 | return false 167 | } 168 | } 169 | return true 170 | } 171 | 172 | func (e *Choice) substitute(sub map[string]string) Expr { 173 | substitute := *e 174 | substitute.Exprs = make([]Expr, len(e.Exprs)) 175 | for i, kid := range e.Exprs { 176 | substitute.Exprs[i] = kid.substitute(sub) 177 | } 178 | return &substitute 179 | } 180 | 181 | // Type returns the type of a choice expression, 182 | // which is the type of it's first branch. 183 | // All other branches must have the same type; 184 | // this is verified during the Check pass. 185 | func (e *Choice) Type() string { return e.Exprs[0].Type() } 186 | 187 | func (e *Choice) epsilon() bool { 188 | for _, e := range e.Exprs { 189 | if e.epsilon() { 190 | return true 191 | } 192 | } 193 | return false 194 | } 195 | 196 | func (e *Choice) CanFail() bool { 197 | // A choice node can only fail if all of its branches can fail. 198 | // If there is a non-failing branch, it will always return accept. 199 | for _, s := range e.Exprs { 200 | if !s.CanFail() { 201 | return false 202 | } 203 | } 204 | return true 205 | } 206 | 207 | // An Action is an action expression: 208 | // a subexpression and code to run if matched. 209 | type Action struct { 210 | Expr Expr 211 | // Code is the Go code to execute if the subexpression is matched. 212 | // The Begin and End locations of Code includes the { } delimiters, 213 | // but the string does not. 214 | // 215 | // TODO: specify the environment under which the code is run. 216 | Code Text 217 | 218 | // ReturnType is the go type of the value returned by the action. 219 | ReturnType string 220 | 221 | // Labels are the labels that are in scope of this action. 222 | Labels []*LabelExpr 223 | } 224 | 225 | func (e *Action) Begin() Loc { return e.Expr.Begin() } 226 | func (e *Action) End() Loc { return e.Code.End() } 227 | func (e *Action) Type() string { return e.ReturnType } 228 | func (e *Action) epsilon() bool { return e.Expr.epsilon() } 229 | func (e *Action) CanFail() bool { return e.Expr.CanFail() } 230 | 231 | func (e *Action) Walk(f func(Expr) bool) bool { 232 | return f(e) && e.Expr.Walk(f) 233 | } 234 | 235 | func (e *Action) substitute(sub map[string]string) Expr { 236 | substitute := *e 237 | substitute.Expr = e.Expr.substitute(sub) 238 | substitute.Labels = nil 239 | return &substitute 240 | } 241 | 242 | // A Sequence is a sequence of expressions. 243 | type Sequence struct{ Exprs []Expr } 244 | 245 | func (e *Sequence) Begin() Loc { return e.Exprs[0].Begin() } 246 | func (e *Sequence) End() Loc { return e.Exprs[len(e.Exprs)-1].End() } 247 | 248 | func (e *Sequence) Walk(f func(Expr) bool) bool { 249 | if !f(e) { 250 | return false 251 | } 252 | for _, kid := range e.Exprs { 253 | if !kid.Walk(f) { 254 | return false 255 | } 256 | } 257 | return true 258 | } 259 | 260 | func (e *Sequence) substitute(sub map[string]string) Expr { 261 | substitute := *e 262 | substitute.Exprs = make([]Expr, len(e.Exprs)) 263 | for i, kid := range e.Exprs { 264 | substitute.Exprs[i] = kid.substitute(sub) 265 | } 266 | return &substitute 267 | } 268 | 269 | // Type returns the type of a sequence expression, 270 | // which is based on the type of its first sub-expression. 271 | // All other other sub-expressions must have the same type; 272 | // this is verified during the Check pass. 273 | // 274 | // If the first sub-expression is a string, 275 | // the type of the entire sequence is a string. 276 | // The value is the concatenation of all sub-expressions. 277 | // 278 | // Otherwise, the type is a slice of the first sub-expression type. 279 | // The value is the slice of all sub-expression values. 280 | func (e *Sequence) Type() string { 281 | t := e.Exprs[0].Type() 282 | switch t { 283 | case "": 284 | return "" 285 | case "string": 286 | return "string" 287 | default: 288 | return "[]" + t 289 | } 290 | } 291 | 292 | func (e *Sequence) epsilon() bool { 293 | for _, e := range e.Exprs { 294 | if !e.epsilon() { 295 | return false 296 | } 297 | } 298 | return true 299 | } 300 | 301 | func (e *Sequence) CanFail() bool { 302 | for _, s := range e.Exprs { 303 | if s.CanFail() { 304 | return true 305 | } 306 | } 307 | return false 308 | } 309 | 310 | // A LabelExpr is a labeled subexpression. 311 | // The label can be used in actions to refer to the result of the subexperssion. 312 | type LabelExpr struct { 313 | // Label is the text of the label, not including the :. 314 | Label Text 315 | Expr Expr 316 | // N is a small integer assigned to this label 317 | // that is unique within the containing Rule. 318 | // It is a small integer that may be used as an array index. 319 | N int 320 | } 321 | 322 | func (e *LabelExpr) Begin() Loc { return e.Label.Begin() } 323 | func (e *LabelExpr) End() Loc { return e.Expr.End() } 324 | func (e *LabelExpr) Type() string { return e.Expr.Type() } 325 | func (e *LabelExpr) epsilon() bool { return e.Expr.epsilon() } 326 | func (e *LabelExpr) CanFail() bool { return e.Expr.CanFail() } 327 | 328 | func (e *LabelExpr) Walk(f func(Expr) bool) bool { 329 | return f(e) && e.Expr.Walk(f) 330 | } 331 | 332 | func (e *LabelExpr) substitute(sub map[string]string) Expr { 333 | substitute := *e 334 | substitute.Expr = e.Expr.substitute(sub) 335 | return &substitute 336 | } 337 | 338 | // A PredExpr is a non-consuming predicate expression: 339 | // If it succeeds (or fails, in the case of Neg), 340 | // return success and consume no input. 341 | // If it fails (or succeeds, in the case of Neg), 342 | // return failure and consume no input. 343 | // Predicate expressions allow a powerful form of lookahead. 344 | type PredExpr struct { 345 | Expr Expr 346 | // Neg indicates that the result of the predicate is negated. 347 | Neg bool 348 | // Loc is the location of the operator, & or !. 349 | Loc Loc 350 | } 351 | 352 | func (e *PredExpr) Begin() Loc { return e.Loc } 353 | func (e *PredExpr) End() Loc { return e.Expr.End() } 354 | 355 | // Type returns the type of the predicate expression, 356 | // which is a string; the value is always the empty string. 357 | func (e *PredExpr) Type() string { return "string" } 358 | 359 | func (e *PredExpr) epsilon() bool { return true } 360 | func (e *PredExpr) CanFail() bool { return e.Expr.CanFail() } 361 | 362 | func (e *PredExpr) Walk(f func(Expr) bool) bool { 363 | return f(e) && e.Expr.Walk(f) 364 | } 365 | 366 | func (e *PredExpr) substitute(sub map[string]string) Expr { 367 | substitute := *e 368 | substitute.Expr = e.Expr.substitute(sub) 369 | return &substitute 370 | } 371 | 372 | // A RepExpr is a repetition expression, sepecifying whether the sub-expression 373 | // should be matched any number of times (*) or one or more times (+), 374 | type RepExpr struct { 375 | // Op is one of * or +. 376 | Op rune 377 | Expr Expr 378 | // Loc is the location of the operator, * or +. 379 | Loc Loc 380 | } 381 | 382 | func (e *RepExpr) Begin() Loc { return e.Expr.Begin() } 383 | func (e *RepExpr) End() Loc { return e.Loc } 384 | 385 | // Type returns the type of the repetition expression, 386 | // which is based on the type of its sub-expression. 387 | // 388 | // If the sub-expression type is string, 389 | // the repetition expression type is a string. 390 | // The value is the concatenation of all matches, 391 | // or the empty string if nothing matches. 392 | // 393 | // Otherwise, the type is a slice of the sub-expression type. 394 | // The value contains an element for each match 395 | // of the sub-expression. 396 | func (e *RepExpr) Type() string { 397 | switch t := e.Expr.Type(); t { 398 | case "": 399 | return "" 400 | case "string": 401 | return t 402 | default: 403 | return "[]" + t 404 | } 405 | } 406 | 407 | func (e *RepExpr) epsilon() bool { return e.Op == '*' } 408 | func (e *RepExpr) CanFail() bool { return e.Op == '+' && e.Expr.CanFail() } 409 | 410 | func (e *RepExpr) Walk(f func(Expr) bool) bool { 411 | return f(e) && e.Expr.Walk(f) 412 | } 413 | 414 | func (e *RepExpr) substitute(sub map[string]string) Expr { 415 | substitute := *e 416 | substitute.Expr = e.Expr.substitute(sub) 417 | return &substitute 418 | } 419 | 420 | // An OptExpr is an optional expression, which may or may not be matched. 421 | type OptExpr struct { 422 | Expr Expr 423 | // Loc is the location of the ?. 424 | Loc Loc 425 | } 426 | 427 | func (e *OptExpr) Begin() Loc { return e.Expr.Begin() } 428 | func (e *OptExpr) End() Loc { return e.Loc } 429 | 430 | // Type returns the type of the optional expression, 431 | // which is based on the type of its sub-expression. 432 | // 433 | // If the sub-expression type is string, 434 | // the optional expression type is a string. 435 | // The value is the value of the sub-expression if it matched, 436 | // or the empty string if it did not match. 437 | // 438 | // Otherwise, the type is a pointer to the type of the sub-expression. 439 | // The value is a pointer to the sub-expression's value if it matched, 440 | // or a nil pointer if it did not match. 441 | func (e *OptExpr) Type() string { 442 | switch t := e.Expr.Type(); { 443 | case t == "": 444 | return "" 445 | case t == "string": 446 | return t 447 | default: 448 | return "*" + e.Expr.Type() 449 | } 450 | } 451 | 452 | func (e *OptExpr) epsilon() bool { return true } 453 | func (e *OptExpr) CanFail() bool { return false } 454 | 455 | func (e *OptExpr) Walk(f func(Expr) bool) bool { 456 | return f(e) && e.Expr.Walk(f) 457 | } 458 | 459 | func (e *OptExpr) substitute(sub map[string]string) Expr { 460 | substitute := *e 461 | substitute.Expr = e.Expr.substitute(sub) 462 | return &substitute 463 | } 464 | 465 | // An Ident is an identifier referring to the name of anothe rule, 466 | // indicating to match that rule's expression. 467 | type Ident struct { 468 | Name 469 | 470 | // rule is the rule referred to by this identifier. 471 | // It is set during check. 472 | rule *Rule 473 | } 474 | 475 | func (e *Ident) Begin() Loc { return e.Name.Begin() } 476 | func (e *Ident) End() Loc { return e.Name.End() } 477 | func (e *Ident) CanFail() bool { return true } 478 | func (e *Ident) Walk(f func(Expr) bool) bool { return f(e) } 479 | 480 | // Type returns the type of the identifier expression, 481 | // which is the type of its corresponding rule. 482 | func (e *Ident) Type() string { 483 | if e.rule == nil { 484 | return "" 485 | } 486 | return e.rule.Type() 487 | } 488 | 489 | func (e *Ident) epsilon() bool { 490 | if e.rule == nil { 491 | return false 492 | } 493 | return e.rule.epsilon 494 | } 495 | 496 | func (e *Ident) substitute(sub map[string]string) Expr { 497 | substitute := *e 498 | if s, ok := sub[e.Name.String()]; ok { 499 | substitute.Name = Name{ 500 | Name: text{ 501 | str: s, 502 | begin: e.Name.Begin(), 503 | end: e.Name.End(), 504 | }, 505 | } 506 | } 507 | substitute.Args = make([]Text, len(e.Args)) 508 | for i, a := range e.Args { 509 | if s, ok := sub[a.String()]; !ok { 510 | substitute.Args[i] = e.Args[i] 511 | } else { 512 | substitute.Args[i] = text{ 513 | str: s, 514 | begin: a.Begin(), 515 | end: a.End(), 516 | } 517 | } 518 | } 519 | return &substitute 520 | } 521 | 522 | // A SubExpr simply wraps an expression. 523 | // It holds no extra information beyond tracking parentheses. 524 | // It's purpose is to allow easily re-inserting the parentheses 525 | // when stringifying an expression, whithout the need 526 | // to compute precedence inversion for each subexpression. 527 | type SubExpr struct { 528 | Expr 529 | // Open is the location of the open parenthesis. 530 | // Close is the location of the close parenthesis. 531 | Open, Close Loc 532 | } 533 | 534 | func (e *SubExpr) Begin() Loc { return e.Open } 535 | func (e *SubExpr) End() Loc { return e.Close } 536 | func (e *SubExpr) Type() string { return e.Expr.Type() } 537 | func (e *SubExpr) epsilon() bool { return e.Expr.epsilon() } 538 | func (e *SubExpr) CanFail() bool { return e.Expr.CanFail() } 539 | 540 | func (e *SubExpr) Walk(f func(Expr) bool) bool { 541 | return f(e) && e.Expr.Walk(f) 542 | } 543 | 544 | func (e *SubExpr) substitute(sub map[string]string) Expr { 545 | substitute := *e 546 | substitute.Expr = e.Expr.substitute(sub) 547 | return &substitute 548 | } 549 | 550 | // A PredCode is a predicate code expression, 551 | // allowing predication using a Go boolean expression. 552 | // 553 | // TODO: Specify the conditions under which the expression is evaluated. 554 | type PredCode struct { 555 | // Code is a Go boolean expression. 556 | // The Begin and End locations of Code includes the { } delimiters, 557 | // but the string does not. 558 | Code Text 559 | // Neg indicates that the result of the predicate is negated. 560 | Neg bool 561 | // Loc is the location of the operator, & or !. 562 | Loc Loc 563 | 564 | // Labels are the labels that are in scope of this action. 565 | Labels []*LabelExpr 566 | } 567 | 568 | func (e *PredCode) Begin() Loc { return e.Loc } 569 | func (e *PredCode) End() Loc { return e.Code.End() } 570 | 571 | // Type returns the type of the predicate code expression, 572 | // which is a string; the value is always the empty string. 573 | func (e *PredCode) Type() string { return "string" } 574 | 575 | func (e *PredCode) epsilon() bool { return true } 576 | func (e *PredCode) CanFail() bool { return true } 577 | func (e *PredCode) Walk(f func(Expr) bool) bool { return f(e) } 578 | 579 | func (e *PredCode) substitute(sub map[string]string) Expr { 580 | substitute := *e 581 | substitute.Labels = nil 582 | return &substitute 583 | } 584 | 585 | // A Literal matches a literal text string. 586 | type Literal struct { 587 | // Text is the text to match. 588 | // The Begin and End locations of Text includes the ' or " delimiters, 589 | // but the string does not. 590 | Text Text 591 | } 592 | 593 | func (e *Literal) Begin() Loc { return e.Text.Begin() } 594 | func (e *Literal) End() Loc { return e.Text.End() } 595 | func (e *Literal) Type() string { return "string" } 596 | func (e *Literal) epsilon() bool { return false } 597 | func (e *Literal) CanFail() bool { return true } 598 | func (e *Literal) Walk(f func(Expr) bool) bool { return f(e) } 599 | 600 | func (e *Literal) substitute(sub map[string]string) Expr { 601 | substitute := *e 602 | return &substitute 603 | } 604 | 605 | // A CharClass matches a single rune from a set of acceptable 606 | // (or unacceptable if Neg) runes. 607 | type CharClass struct { 608 | // Spans are rune spans accepted (or rejected) by the character class. 609 | // The 0th rune is always ≤ the 1st. 610 | // Single rune matches are a span of both the same rune. 611 | Spans [][2]rune 612 | 613 | // Neg indicates that the input must not match any in the set. 614 | Neg bool 615 | 616 | // Open and Close are the Loc of [ and ] respectively. 617 | Open, Close Loc 618 | } 619 | 620 | func (e *CharClass) Begin() Loc { return e.Open } 621 | func (e *CharClass) End() Loc { return e.Close } 622 | func (e *CharClass) Type() string { return "string" } 623 | func (e *CharClass) epsilon() bool { return false } 624 | func (e *CharClass) CanFail() bool { return true } 625 | func (e *CharClass) Walk(f func(Expr) bool) bool { return f(e) } 626 | 627 | func (e *CharClass) substitute(sub map[string]string) Expr { 628 | substitute := *e 629 | return &substitute 630 | } 631 | 632 | // Any matches any rune. 633 | type Any struct { 634 | // Loc is the location of the . symbol. 635 | Loc Loc 636 | } 637 | 638 | func (e *Any) Begin() Loc { return e.Loc } 639 | func (e *Any) End() Loc { return Loc{Line: e.Loc.Line, Col: e.Loc.Col + 1} } 640 | func (e *Any) Type() string { return "string" } 641 | func (e *Any) epsilon() bool { return false } 642 | func (e *Any) CanFail() bool { return true } 643 | func (e *Any) Walk(f func(Expr) bool) bool { return f(e) } 644 | 645 | func (e *Any) substitute(sub map[string]string) Expr { 646 | substitute := *e 647 | return &substitute 648 | } 649 | -------------------------------------------------------------------------------- /string.go: -------------------------------------------------------------------------------- 1 | // Copyright 2017 The Peggy Authors 2 | // 3 | // Use of this source code is governed by a BSD-style 4 | // license that can be found in the LICENSE file or at 5 | // https://developers.google.com/open-source/licenses/bsd. 6 | 7 | package main 8 | 9 | import ( 10 | "fmt" 11 | "strconv" 12 | "strings" 13 | ) 14 | 15 | // String returns the string representation of the rules. 16 | // The output contains no comments or whitespace, 17 | // except for a single space, " ", 18 | // separating sub-exprsessions of a sequence, 19 | // and on either side of <-. 20 | func String(rules []Rule) string { 21 | var s string 22 | for _, r := range rules { 23 | if s != "" { 24 | s += "\n" 25 | } 26 | s += r.String() 27 | } 28 | return s 29 | } 30 | 31 | // String returns the string representation of a rule. 32 | // The output contains no comments or whitespace, 33 | // except for a single space, " ", 34 | // separating sub-exprsessions of a sequence, 35 | // and on either side of <-. 36 | func (r *Rule) String() string { 37 | var name string 38 | if r.ErrorName != nil { 39 | name = " " + strconv.Quote(r.ErrorName.String()) 40 | } 41 | return r.Name.String() + name + " <- " + r.Expr.String() 42 | } 43 | 44 | func (n Name) String() string { 45 | if len(n.Args) == 0 { 46 | return n.Name.String() 47 | } 48 | s := n.Name.String() + "<" 49 | for i, a := range n.Args { 50 | if i > 0 { 51 | s += ", " 52 | } 53 | s += a.String() 54 | } 55 | return s + ">" 56 | } 57 | 58 | // Ident returns a Go identifier for the name. 59 | func (n Name) Ident() string { 60 | if len(n.Args) == 0 { 61 | return n.Name.String() 62 | } 63 | s := n.Name.String() + "__" 64 | for i, a := range n.Args { 65 | if i > 0 { 66 | s += "__" 67 | } 68 | s += a.String() 69 | } 70 | return s 71 | } 72 | 73 | func (e *Choice) String() string { 74 | s := e.Exprs[0].String() 75 | for _, sub := range e.Exprs[1:] { 76 | s += "/" + sub.String() 77 | } 78 | return s 79 | } 80 | 81 | func (e *Action) String() string { 82 | if *prettyPrint { 83 | return e.Expr.String() 84 | } 85 | return e.Expr.String() + " {…}" 86 | } 87 | 88 | func (e *Sequence) String() string { 89 | s := e.Exprs[0].String() 90 | for _, sub := range e.Exprs[1:] { 91 | s += " " + sub.String() 92 | } 93 | return s 94 | } 95 | 96 | func (e *LabelExpr) String() string { 97 | if *prettyPrint { 98 | return e.Expr.String() 99 | } 100 | return e.Label.String() + ":" + e.Expr.String() 101 | } 102 | 103 | func (e *PredExpr) String() string { 104 | s := "&" 105 | if e.Neg { 106 | s = "!" 107 | } 108 | return s + e.Expr.String() 109 | } 110 | 111 | func (e *RepExpr) String() string { 112 | return e.Expr.String() + string([]rune{e.Op}) 113 | } 114 | 115 | func (e *OptExpr) String() string { 116 | return e.Expr.String() + "?" 117 | } 118 | 119 | func (e *SubExpr) String() string { 120 | return "(" + e.Expr.String() + ")" 121 | } 122 | 123 | func (e *Ident) String() string { 124 | return e.Name.String() 125 | } 126 | 127 | func (e *PredCode) String() string { 128 | s := "&{" 129 | if e.Neg { 130 | s = "!{" 131 | } 132 | return s + "…}" 133 | } 134 | 135 | func (e *Literal) String() string { 136 | s := strconv.QuoteToGraphic(e.Text.String()) 137 | // Replace some combining characters with their escaped version. 138 | for _, sub := range []string{ 139 | "\u0301", 140 | "\u0304", 141 | "\u030C", 142 | "\u0306", 143 | "\u0309", 144 | "\u0302", 145 | "\u0300", 146 | "\u0303", 147 | } { 148 | q := strconv.QuoteToASCII(sub) 149 | s = strings.Replace(s, sub, q[1:len(q)-1], -1) 150 | } 151 | return s 152 | } 153 | 154 | func (e *CharClass) String() string { 155 | s := "[" 156 | if e.Neg { 157 | s += "^" 158 | } 159 | for _, sp := range e.Spans { 160 | if sp[0] == sp[1] { 161 | s += charClassEsc(sp[0]) 162 | } else { 163 | s += charClassEsc(sp[0]) + "-" + charClassEsc(sp[1]) 164 | } 165 | } 166 | return s + "]" 167 | } 168 | 169 | func charClassEsc(r rune) string { 170 | switch r { 171 | case '^': 172 | return `\^` 173 | case '-': 174 | return `\-` 175 | case ']': 176 | return `\]` 177 | } 178 | s := strconv.QuoteRuneToGraphic(r) 179 | return strings.TrimPrefix(strings.TrimSuffix(s, "'"), "'") 180 | } 181 | 182 | func (e *Any) String() string { return "." } 183 | 184 | // FullString returns the fully parenthesized string representation of the rules. 185 | // The output contains no comments or whitespace, 186 | // except for a single space, " ", 187 | // separating sub-exprsessions of a sequence, 188 | // and on either side of <-. 189 | func FullString(rules []Rule) string { 190 | var s string 191 | for _, r := range rules { 192 | if s != "" { 193 | s += "\n" 194 | } 195 | 196 | var name string 197 | if r.ErrorName != nil { 198 | name = " " + strconv.Quote(r.ErrorName.String()) 199 | } 200 | s += fmt.Sprintf("%s%s <- %s", r.Name, name, r.Expr.fullString()) 201 | } 202 | return s 203 | } 204 | 205 | func (e *Choice) fullString() string { 206 | s := strings.Repeat("(", len(e.Exprs)-1) + e.Exprs[0].fullString() 207 | for _, sub := range e.Exprs[1:] { 208 | s += "/" + sub.fullString() + ")" 209 | } 210 | return s 211 | } 212 | 213 | func (e *Action) fullString() string { 214 | return "(" + e.Expr.fullString() + " {" + e.Code.String() + "})" 215 | } 216 | 217 | func (e *Sequence) fullString() string { 218 | s := strings.Repeat("(", len(e.Exprs)-1) + e.Exprs[0].fullString() 219 | for _, sub := range e.Exprs[1:] { 220 | s += " " + sub.fullString() + ")" 221 | } 222 | return s 223 | } 224 | 225 | func (e *LabelExpr) fullString() string { 226 | return fmt.Sprintf("(%s:%s)", e.Label.String(), e.Expr.fullString()) 227 | } 228 | 229 | func (e *PredExpr) fullString() string { 230 | if e.Neg { 231 | return fmt.Sprintf("(!%s)", e.Expr.fullString()) 232 | } 233 | return fmt.Sprintf("(&%s)", e.Expr.fullString()) 234 | } 235 | 236 | func (e *RepExpr) fullString() string { 237 | return fmt.Sprintf("(%s%c)", e.Expr.fullString(), e.Op) 238 | } 239 | 240 | func (e *OptExpr) fullString() string { 241 | return "(" + e.Expr.fullString() + "?)" 242 | } 243 | 244 | func (e *Ident) fullString() string { return "(" + e.String() + ")" } 245 | 246 | func (e *PredCode) fullString() string { 247 | s := "(&{" 248 | if e.Neg { 249 | s = "(!{" 250 | } 251 | return s + e.Code.String() + "})" 252 | } 253 | 254 | func (e *Literal) fullString() string { return "(" + e.String() + ")" } 255 | 256 | func (e *CharClass) fullString() string { return "(" + e.String() + ")" } 257 | 258 | func (e *Any) fullString() string { return "(" + e.String() + ")" } 259 | --------------------------------------------------------------------------------