├── .travis.yml
├── AUTHORS
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── action_test.go
├── check.go
├── check_test.go
├── error.go
├── example
    ├── calc
    │   ├── calc.go
    │   └── calc.peggy
    └── label_names
    │   ├── label_names.go
    │   └── label_names.peggy
├── gen.go
├── gen_test.go
├── go.go
├── go.mod
├── go.sum
├── gok.sh
├── grammar.go
├── grammar.y
├── lex.go
├── main.go
├── parse_test.go
├── peg
    ├── fail.go
    ├── fail_test.go
    ├── loc.go
    ├── loc_test.go
    ├── peg.go
    └── pretty.go
├── rule.go
└── string.go


/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: go
 2 | 
 3 | go: 1.12
 4 | 
 5 | notifications:
 6 |     email: false
 7 | 
 8 | env:
 9 |     - PATH=$HOME/gopath/bin:$PATH
10 | 
11 | install:
12 |     - go get golang.org/x/tools/cmd/goyacc
13 |     - go get golang.org/x/lint/golint
14 |     - go get -t -v ./... && go build -v ./...
15 | 
16 | script:
17 |     - ./gok.sh
18 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | # This is the list of Peggy authors for copyright purposes.
2 | #
3 | # This does not necessarily list everyone who has contributed code, since in
4 | # some cases, their employer may be the copyright holder.  To see the full list
5 | # of contributors, see the revision history in source control.
6 | Google Inc.
7 | 


--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
 1 | # How to Contribute
 2 | 
 3 | We'd love to accept your patches and contributions to this project. There are
 4 | just a few small guidelines you need to follow.
 5 | 
 6 | ## Contributor License Agreement
 7 | 
 8 | Contributions to this project must be accompanied by a Contributor License
 9 | Agreement. You (or your employer) retain the copyright to your contribution,
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to <https://cla.developers.google.com/> to see
12 | your current agreements on file or to sign a new one.
13 | 
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 | 
18 | ## Code reviews
19 | 
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2017, The Peggy Authors
 2 | 
 3 | Redistribution and use in source and binary forms, with or without
 4 | modification, are permitted provided that the following conditions are
 5 | met:
 6 | 
 7 |    * Redistributions of source code must retain the above copyright
 8 | notice, this list of conditions and the following disclaimer.
 9 | 
10 |    * Redistributions in binary form must reproduce the above
11 | copyright notice, this list of conditions and the following disclaimer
12 | in the documentation and/or other materials provided with the
13 | distribution.
14 | 
15 |    * Neither the name of Google Inc. nor the names of its
16 | contributors may be used to endorse or promote products derived from
17 | this software without specific prior written permission.
18 | 
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | [![Build Status](https://travis-ci.org/eaburns/peggy.svg?branch=master)](https://travis-ci.org/eaburns/peggy)
  2 | 
  3 | # Introduction
  4 | 
  5 | Peggy is a Parsing Expression Grammar
  6 | ([PEG](https://en.wikipedia.org/wiki/Parsing_expression_grammar))
  7 | parser generator.
  8 | 
  9 | The generated parser is a
 10 | [packrat parser](https://en.wikipedia.org/wiki/Parsing_expression_grammar#Implementing_parsers_from_parsing_expression_grammars).
 11 | However, the implementation is somewhat novel (I believe).
 12 | 
 13 | # Background
 14 | 
 15 | Packrat parsers work by doing a recursive descent on the grammar rules,
 16 | backtracking when a rule fails to accept.
 17 | To prevent exponential backtracking, a memo table remembers
 18 | the parse result for each rule, for each point in the input.
 19 | This way when the backtracking encounters a subtree of grammar already tried
 20 | it can compute the result in constant time by looking up the memo table
 21 | instead of computing the parse again.
 22 | 
 23 | Because the memo table, packrat parsers for PEG grammars
 24 | parse in time linear in the size of the input
 25 | and use memory linear in the size of the input too.
 26 | (Note that other common parser generators,
 27 | such as yacc for LALR(1) grammars
 28 | are linear time in the size of the input
 29 | and linear space in the _depth of the parse_,
 30 | which can be smaller than the input size.)
 31 | 
 32 | A common way to implement the memo table is to use a hash table.
 33 | The key is a pair of the grammar rule plus the input position,
 34 | and the value is the result (result of any parser actions)
 35 | of parsing the keyed rule at the keyed input position
 36 | and the number of runes consumed, or whether the parse failed.
 37 | 
 38 | A problem that I've found with this approach is that
 39 | for grammars that tend to do a lot of backtracking,
 40 | a significant amount of time is spent modifying and accessing the memo table.
 41 | Hash tables lookups and inserts are expected constant time,
 42 | but in the face of much backtracking, the constant time can add up.
 43 | 
 44 | In addition, hash tables tend to be implemented with linked structures
 45 | which take up additional memory to hold pointers and allocation overhead.
 46 | Finally, as they grow large, linked sturctures take more time to scan
 47 | by a garbage collector.
 48 | 
 49 | I originally implemented Peggy to parse the constructed language
 50 | [Lojban](https://mw.lojban.org/papri/Lojban)
 51 | (see [johaus](http://github.com/eaburns/johaus)).
 52 | My initial hash table based implementation performed very poorly on large texts
 53 | because of the issues described above:
 54 | profiling showed a singificant amount of time spent
 55 | on map accesses and garbage collection scanning,
 56 | and memory use was too high to parse some texts (4kb)
 57 | on my laptop (8GB ram).
 58 | 
 59 | I noticed similar issues with the JavaScript- and Java-based PEG parsers for Lojban.
 60 | 
 61 | Peggy takes a different approach that was tuned for this use-case.
 62 | 
 63 | ## Peggy's approach
 64 | 
 65 | Peggy computes the result of a parse in two passes instead of one.
 66 | The first pass determines whether the grammar accepts,
 67 | and builds a table that tracks for each rule tried at each position:
 68 | whether the rule accepted and if so how much input was consumed,
 69 | or if it failed, how much input was consumed to the deepest failure.
 70 | These values can be stored in an array using only integers.
 71 | 
 72 | If the first pass acceptes the input, a second pass can quickly follow the table
 73 | to try only rules that accept and compute the result of the actions of the rule.
 74 | 
 75 | If the first pass fails to accept, another pass can follow the table
 76 | and compute a tree tracking which rules failed at the deepest point of the parse.
 77 | These can be used to build precise syntax error messages.
 78 | 
 79 | The advantage of Peggy's approach is that
 80 | the first pass only performs a single allocation: the table — an array of integers.
 81 | Accessing the table is just indexing into an array of intergers,
 82 | which is cheaper than most hash table lookups.
 83 | Since the array only contains integers and no pointers,
 84 | it needn't be scanned by the garbage collector.
 85 | And finally, whenever a hash table would be relatively densely populated,
 86 | an array can be memory efficient.
 87 | 
 88 | For the Lojban grammar, this made the difference
 89 | between being able to parse full texts
 90 | (a 4KB text that needed >8GB of memory was reduced to needing only 2GB,
 91 | and multiple minutes were reduced to mere seconds).
 92 | 
 93 | ## Disadvantages
 94 | 
 95 | There are disadvantages to the Peggy approach:
 96 | 
 97 | 1) The interface is not as simple to use.
 98 | However, I hope that you will not find it too difficult.
 99 | See the example in the next section for a fairly short wrapper
100 | that warps the Peggy calls into a single, more typical Go function call.
101 | 
102 | 2) For grammars that do not rely as heavily on the memo table
103 | a hash table could be much more memory efficient.
104 | 
105 | I would like to expand this list, so please send pull requests
106 | if you have other disadvantages of this approach that should be here.
107 | 
108 | Now, let's see how to use it.
109 | 
110 | # Input file format
111 | 
112 | A Peggy input file is UTF-8 encoded.
113 | 
114 | A Peggy grammar file consists of a _prelude_ followed by a set of _rules_.
115 | The prelude is valid Go code enclosed between { and }.
116 | This code is emitted at the beginning of the generated parser .go file.
117 | It should begin with a package statement then any imports used by the parser.
118 | Any other valid Go code is also permitted.
119 | 
120 | After the prelude is a set of _rules_ that define the grammar.
121 | Each rule begins with an _identifier_ that is the name of the rule.
122 | After the name is an optional string giving the rule a human-readable name
123 | and marking it as a _leaf_ rule for error reporting (more below).
124 | After the optional string is the token <-.
125 | Next is the expression that defines the rule.
126 | 
127 | **Example**
128 | ```
129 | A <- "Hello," _ ( "World!" / "世界" )
130 | _ <- ( p:. &{ isUnicodeSpace(p) } )+
131 | ```
132 | 
133 | # Expressions
134 | 
135 | Expressions define the grammar.
136 | The input to each expression is a sequence of runes.
137 | The expression either accepts or rejects the input.
138 | If the expression accepts, it consumes zero or more runes of input,
139 | and evaluates to a result (a Go value).
140 | 
141 | The types of expressions, in order of precedence, are:
142 | * Choice
143 | * Action
144 | * Sequence
145 | * Label
146 | * Predicate
147 | * Repetition
148 | * Literal, Code Predicate, Identifier, and Subexpression
149 | 
150 | ## Choice
151 | 
152 | A choice is a sequence of expressions separated by `/`.
153 | Unlike context free grammars, choices in PEG are ordered.
154 | 
155 | It is an error if the result types of the subexpressions are not all the same.
156 | 
157 | **Accepts:**
158 | A choice accepts if any of its expressions accept.
159 | 
160 | **Consumes:**
161 | A choice consumes the runes consumed by its first accepting subexpression
162 | from left-to-right.
163 | 
164 | **Result:**
165 | The result of a choice has the type and value of its first accepting subexpression
166 | from left-to-right.
167 | 
168 | **Example:**
169 | ```
170 | A / "Hello" / foo:Bar { return string(foo) }
171 | ```
172 | 
173 | ## Sequences
174 | 
175 | A sequence is two a sequence of expressions separated by whitespace.
176 | 
177 | **Accepts:**
178 | A sequence accepts if each of its subexpressions accepts
179 | on the input remaining after each preceeding subexpression consumes.
180 | 
181 | **Consumes:**
182 | The sequence consumes from the input
183 | the sum of the number of runes of all its subexpressions.
184 | 
185 | ***Result:**
186 | It is an error if the type of the result of the first expression
187 | is not the same as the type of the result of the second.
188 | 
189 | If the first expression is a `string`, the type of the sequence is `string`,
190 | and the result is the concatenation of the results of the expressions.
191 | 
192 | If the first expression is any non-`string` type, T,
193 | the type of the result of the sequence is `[]T`,
194 | and the result itself is the slice from
195 | `append()`ing the results of the subexpressions.
196 | 
197 | **Example:**
198 | ```
199 | "Hello," Space "World" Punctiation
200 | ```
201 | 
202 | ## Labels
203 | 
204 | A label is an identifier followed by : followed by an expression.
205 | 
206 | Labels are used to create new identifiers used by actions and code predicates.
207 | 
208 | The scope of a label is its branch in the nearest, containing choice expression,
209 | or in the entire rule if there is no choice expression.
210 | 
211 | For example,
212 | 
213 | 	R <- a:A / a:A / a:A / a:A
214 | 
215 | All `a`s refer to different labels, as they are all scoped to different branches of the choice, `/`.
216 | 
217 | Similarly, in this expression,
218 | 
219 | 	R <- a:A / (a:A / a:A)
220 | 
221 | all `a`s are different labels.
222 | However,
223 | 
224 | 	R <- a:A / a:A a:A
225 | 
226 | is an error, as `a` is re-defined in the right-hand branch of the choice, `/`.
227 | 
228 | **Accepts:**
229 | A label accepts if its subexpression accepts.
230 | 
231 | **Consumes:**
232 | A label consumes the runs of its subexpression.
233 | 
234 | **Result:**
235 | The result type and value of a label are that of its subexpression.
236 | 
237 | **Example:**
238 | ```
239 | hello:"Hello" "," Space world:( "World" / "世界" )
240 | ```
241 | 
242 | ## Predicates
243 | 
244 | A predicate is a & or ! operator followed by an expression.
245 | 
246 | **Accepts:**
247 | A predicate with the operator & accepts if its subexpression accepts.
248 | 
249 | A predicate with the operator ! accepts if its subexpression dose not accept.
250 | 
251 | **Consumes:**
252 | Predicatse consume no runes.
253 | 
254 | **Result:**
255 | The result of a predicate is the empty string.
256 | 
257 | **Example:**
258 | ```
259 | !Keyword [a-ZA-Z_] [a-ZA-Z0-9_]*
260 | ```
261 | 
262 | ## Repetition
263 | 
264 | A repetition is an expression followed by either a *, +, or ? operator.
265 | 
266 | **Accepts:**
267 | A repetition with an operator * or ? always accepts.
268 | 
269 | A repetition with the operator + accepts if its subexpression accepts.
270 | 
271 | **Consumes:**
272 | A repetition with an operator * or + consumes all matches of its subexpression.
273 | 
274 | A repetition with the operator ? consumes at most one match of its subexpression.
275 | 
276 | **Result:**
277 | If the type of the subexpression is `string`, the result of a repetition is `string`,
278 | and the value is the consumed runes.
279 | 
280 | Otherwise, if the type of the subexpression is a type `T`:
281 | * if the operator is * or +, the type of the result is `[]T`
282 | and the value is a slice containing all `append`ed subexpression results.
283 | * if the operatior is ?, the type of the result is `*T`
284 | and the value is a pointer to the subexpression result if it accepted
285 | or `nil`.
286 | 
287 | **Example:**
288 | ```
289 | [a-ZA-Z0-9_]* ":"?
290 | ```
291 | 
292 | ## Literals
293 | 
294 | Literals are String Literals, Character Classes, and Dot.
295 | 
296 | ### String Literals
297 | 
298 | String literals are lexically the same as
299 | [Go String Literals](https://golang.org/ref/spec#String_literals).
300 | 
301 | **Accepts:**
302 | A string literal accepts if the next runes of input are exactly those of the string.
303 | 
304 | **Consumes:**
305 | A stirng literal consumes the matching runes of input.
306 | 
307 | **Result:**
308 | The result is the `string` of consumed runes.
309 | 
310 | **Example:**
311 | ```
312 | "Hello\nWorld!"
313 | ```
314 | 
315 | ### Character Classes
316 | 
317 | A character class is a sequence of characters
318 | between [ and the next, unescaped occurrence of ].
319 | Escapes are treated as per strings.
320 | 
321 | Character classes are much like that of common regular expression libraries.
322 | 
323 | **Accepts:**
324 | A character class accepts if the next rune of input is within the class.
325 | 
326 | If the first character after the opening [ is a ^,
327 | then the character class's acceptance is negated.
328 | 
329 | A pair of characters surrounding on either side of a - define a _span_.
330 | the character class will accept any rune with a number (codepoint)
331 | between (and including) the two characters
332 |  It is an error if the first is not smaller than the last.
333 | 
334 | All other characters in the class are treated as a list of accepted runes.
335 | 
336 | **Consumes:**
337 | A character class consumes one rune of input.
338 | 
339 | **Result:**
340 | The result is the `string` of the consumed rune.
341 | 
342 | **Example:**
343 | ```
344 | [a-ZA-Z0-9_]
345 | ```
346 | 
347 | ### Dot
348 | 
349 | The character . is an expression.
350 | 
351 | **Accepts:**
352 | A dot expression accepts if the input is not empty and the next rune is valid.
353 | 
354 | **Consumes:**
355 | A dot expression consumes a single rune.
356 | 
357 | **Result:**
358 | The result is the `string` of the consumed rune.
359 | 
360 | **Example:**
361 | ```
362 | .
363 | ```
364 | 
365 | ## Code predicates
366 | 
367 | A code predicate is an operator & or ! followed by a Go expression enclosed in { and }.
368 | The expression must result in a boolean value,
369 | and must be syntactically valid as the condition of an
370 | [if statement](https://golang.org/ref/spec#If_statements).
371 | 
372 | Label expressions in scope of the code predicate define identifiers accessible in the Go code.
373 | The value of the identifier is a `string` of the input consumed by the labeled expression.
374 | If the labeled expression has yet to accept at the time the code predicate is evalutade, the string is empty.
375 | 
376 | **Accepts:**
377 | 
378 | A code predicate with the operator & accepts if the expression evaluates to `true`.
379 | 
380 | A code predicate with the operator ! accepts if the expression evaluates to `false`.
381 | 
382 | **Consumes:**
383 | A code predicate consumes no runes of input.
384 | 
385 | **Result:**
386 | The result of a code predicate is the empty string.
387 | 
388 | **Example:**
389 | ```
390 | p:. &{ isUnicodeSpace(p) }
391 | ```
392 | 
393 | ## Identifiers
394 | 
395 | Identifiers begin with any unicode letter or _
396 | followed by a sequence of zero or more letters, numbers, or _.
397 | Identifiers name a rule of the grammar.
398 | It is an error if the identifier is not the name of the rule of the grammar.
399 | 
400 | **Accepts:**
401 | An identifier accepts if its named rule accepts.
402 | 
403 | **Consumes:**
404 | An identifier consumes the runes of its named rule.
405 | 
406 | **Result:**
407 | The result of an identifier has the type and value of that of its named rule.
408 | 
409 | **Example:**
410 | ```
411 | HelloWorld <- Hello "," Space World
412 | Hello <- "Hello" / "こんいちは"
413 | World <- "World" / "世界"
414 | Space <- ( p:. &{ isUnicodeSpace(p) } )+
415 | ```
416 | 
417 | ## Subexpressions
418 | 
419 | A subexpression is an expression enclosed between ( and ).
420 | They are primarily used for grouping.
421 | 
422 | **Accepts:**
423 | A subexpression accepts if its inner expression accepts.
424 | 
425 | **Consumes:**
426 | A subexpression consumes the runes of its inner expression.
427 | 
428 | **Result:**
429 | The result type and value of a subexpression are that of its inner expression.
430 | 
431 | **Example:**
432 | ```
433 | "Hello, " ( "World" / "世界" )
434 | ```
435 | 
436 | ## Actions
437 | 
438 | Actions are an expression followed by Go code between { and }.
439 | The Go code must be valid as the
440 | [body of a function](https://golang.org/ref/spec#Block).
441 | The Go code must end in a
442 | [return statement](https://golang.org/ref/spec#Return_statements),
443 | and the returned value must be one of:
444 | * [a type conversion](https://golang.org/ref/spec#Conversions)
445 | * [a type assertion](https://golang.org/ref/spec#Type_assertions)
446 | * [a function literal](https://golang.org/ref/spec#Function_literals)
447 | * [a composite literal](https://golang.org/ref/spec#Composite_literals)
448 | * [an &-composite literal](https://golang.org/ref/spec#Address_operators)
449 | * [an int literal](https://golang.org/ref/spec#Integer_literals)
450 | * [a float literal](https://golang.org/ref/spec#Floating-point_literals)
451 | * [a rune literal](https://golang.org/ref/spec#Rune_literals)
452 | * [a string literal](https://golang.org/ref/spec#String_literals)
453 | 
454 | Label expressions in scope of the action define identifiers accessible in the Go code.
455 | The value of the identifier is the value of the labeled expression if it accepted.
456 | If the labeled expression has yet to accept at the time the action is evaluated,
457 | the value is the zero value of the corresponding type.
458 | 
459 | In addition there are several other special identifiers accessable to the code:
460 | * `parser` is a pointer to the Peggy `Parser`.
461 | * `start` is the byte offset in the input at which this expression first accepted.
462 | * `end` is the byte offset in the input just after this expression last accepted.
463 | 
464 | **Accepts:**
465 | An action accepts if its subexpression accepts.
466 | 
467 | **Consumes:**
468 | An action consumes the runes of its subexpression.
469 | 
470 | **Result:**
471 | The result of an action has the type of the last return statement
472 | at the end of the block of Go code.
473 | The value is the value returned by the Go code.
474 | 
475 | **Example:**
476 | ```
477 | hello:("Hello" / "こんいちは") ", " world:("World" / "世界") {
478 | 	return HelloWorld{
479 | 		Hello: hello,
480 | 		World: world,
481 | 	}
482 | }
483 | ```
484 | 
485 | # Generated code
486 | 
487 | The output file path is specified by the `-o` command-line option.
488 | 
489 | All package-level definitions in the generated begin with a prefix, defaulting to `_`. This default makes the definitions unexported. The prefix can be overridden with the `-p` command-line option.
490 | 
491 | The generated file has a `Parser` type passed to the various parser functions,
492 | and contains between 2 and 4 of functions for each rule defining
493 | several parser _passes_. The passes are:
494 | 1. the _accepts_ pass,
495 | 2. the _fail_ pass,
496 | 3. optionally the _action_ pass, and
497 | 4. optionally the _node_ pass.
498 | 
499 | A typical flow to use a Peggy-generated parser is to:
500 | * Create a new instance of the `Parser` type on a given input.
501 | * Call the accepts function for the root-level grammar rule.
502 | ** If the rule did not accept, there was a syntax error:
503 | 	call the fail function of the rule to get an `*peg.Fail` tree,
504 | 	and pass that to `peg.SimpleError` to get an `error`
505 | 	describing the syntax error.
506 | ** If the rule accepted, call the action function of the rule
507 | 	to get the result of the parse (an AST, evaluation, whatever),
508 | 	or call the node pass to get a `*peg.Node` of the syntax tree.
509 | 
510 | Here is an example:
511 | 
512 | ```
513 | // Parse returns the AST generated by the grammar rule actions.
514 | func Parse(input string) (AstNode, error) {
515 | 	parser := _NewParser(input)
516 | 	if pos, perr := _RuleAccepts(parser, 0); pos < 0 {
517 | 		_, failTree := _RuleFail(parser, 0, perr)
518 | 		return nil, peg.SimpleError(input, failTree)
519 | 	}
520 | 	// Or, instead call _RuleNode(parser, 0)
521 | 	// and return a *peg.Node with the syntax tree.
522 | 	_, astRoot := _RuleAction(parser, 0)
523 | 	return astRoot, nil
524 | }
525 | ```
526 | 
527 | There are a lot of steps.
528 | This allows advanced uses not described here ☺.
529 | (But see, for example,
530 | [this file](https://github.com/eaburns/johaus/blob/master/parser/error.go)
531 | that showcases how to use the `*peg.Fail` tree to construct more precise error messages).
532 | 
533 | Now let's see what the generated code for each of the passes looks like in moredetail.
534 | 
535 | ## The Parser type
536 | 
537 | The `Parser` type is mostly intended to be treated as opaque.
538 | It maintains information about the parse to communicate between the multiple passes.
539 | 
540 | The `Parser` type will have a field named `data` of type `interface{}`,
541 | which is ignored by the generated code.
542 | This field may be used in code predicates or actions to store auxiliary information.
543 | Such a use is considered advanced, and is not recommended
544 | unless you have a thorough understanding of the generated parser.
545 | 
546 | ## Accepts pass
547 | 
548 | The accepts pass generates a function for each rule of the grammer with a signature of the form:
549 | ```
550 | func <Prefix><RuleName>Accepts(parser *<Prefix>Parser, start int) (deltaPos, deltaErr int)
551 | ```
552 | 
553 | The function determines whether the rule accepts the input
554 | beginning from the byte-offset `start`.
555 | If it accepts `deltaPos` is a non-negative number of bytes accepted.
556 | If it does not accept `deltaErr` is the number of bytes from start
557 | until the last rune of input that could not be consumed.
558 | 
559 | The primary purpose of the accept pass is to determine
560 | whether the language defined by the grammar accepts the input.
561 | The `Parser` maintains state from the accept pass that enables a subsequent
562 | fail, action, or node pass to compute its result without backtracking on rules.
563 | 
564 | ## Fail pass
565 | 
566 | The fail pass generates a function for each rule of the grammar twith a signature of the form:
567 | ```
568 | func <Prefix><RuleName>Fail(parser *<Prefix>Parser, start, errPos int) (int, *peg.Fail)
569 | ```
570 | 
571 | The functions of the fail pass assume that the `Parser` has already been used
572 | as the argument of a corresponding accept pass,
573 | and that the accept pass failed to accept.
574 | 
575 | Each function returns the `*peg.Fail` tree of all attempted rules
576 | that failed to accept the input beginning from `start`,
577 | which failed no earlier than `errPos` bytes into the input.
578 | 
579 | The description is somewhat advanced.
580 | Suffice it to say, this computes a data structured used by the `peg` package
581 | to compute a parse error string with the `peg.SimpleError` function.
582 | More advanced users can inspect the `*peg.Fail` tree
583 | to create more precise or informative parse errors.
584 | 
585 | ## Action pass
586 | 
587 | The action pass generates a function for each rule of the grammar twith a signature of the form:
588 | ```
589 | func <Prefix><RuleName>Action(parser *<Prefix>Parser, start int) (int, *<RuleType>)
590 | ```
591 | 
592 | The functions of the action pass assume that the `Parser` has already been used
593 | as the argument of a corresponding accept pass,
594 | and that the accept pass accepted the rule at this position.
595 | 
596 | Each function returns the number of consumed runes
597 | and a pointer to a value of the rule expression's result type.
598 | 
599 | ## Node pass
600 | 
601 | The node pass generates a function for each rule of the grammar twith a signature of the form:
602 | ```
603 | func <Prefix><RuleName>Node(parser *<Prefix>Parser, start int) (int, *peg.Node)
604 | ````
605 | 
606 | The functions of the node pass assume that the `Parser` has already been used
607 | as the argument of a corresponding accept pass,
608 | and that the accept pass accepted the rule at this position.
609 | 
610 | Each function returns the number of consumed runes
611 | and a *peg.Node that is the root of the syntax tree of the parse.
612 | 
613 | (Peggy is not an official Google product.)


--------------------------------------------------------------------------------
/action_test.go:
--------------------------------------------------------------------------------
  1 | package main
  2 | 
  3 | import (
  4 | 	"encoding/json"
  5 | 	"io"
  6 | 	"os"
  7 | 	"os/exec"
  8 | 	"reflect"
  9 | 	"testing"
 10 | 
 11 | 	"github.com/eaburns/pretty"
 12 | )
 13 | 
 14 | type actionTest struct {
 15 | 	name    string
 16 | 	grammar string
 17 | 	cases   []actionTestCase
 18 | }
 19 | 
 20 | type actionTestCase struct {
 21 | 	input string
 22 | 	want  interface{}
 23 | }
 24 | 
 25 | var actionTests = []actionTest{
 26 | 	{
 27 | 		name:    "literal",
 28 | 		grammar: `A <- "abc☺XYZ"`,
 29 | 		cases: []actionTestCase{
 30 | 			{"abc☺XYZ", "abc☺XYZ"},
 31 | 		},
 32 | 	},
 33 | 	{
 34 | 		name:    "char class",
 35 | 		grammar: `A <- [a-zA-Z0-9☺]`,
 36 | 		cases: []actionTestCase{
 37 | 			{"a", "a"},
 38 | 			{"☺", "☺"},
 39 | 			{"Z", "Z"},
 40 | 			{"5", "5"},
 41 | 		},
 42 | 	},
 43 | 	{
 44 | 		name:    "any char",
 45 | 		grammar: `A <- .`,
 46 | 		cases: []actionTestCase{
 47 | 			{"a", "a"},
 48 | 			{"☺", "☺"},
 49 | 			{"Z", "Z"},
 50 | 			{"5", "5"},
 51 | 		},
 52 | 	},
 53 | 	{
 54 | 		name:    "star",
 55 | 		grammar: `A <- "abc"*`,
 56 | 		cases: []actionTestCase{
 57 | 			{"", ""},
 58 | 			{"abc", "abc"},
 59 | 			{"abcabc", "abcabc"},
 60 | 			{"abcabcabcabc", "abcabcabcabc"},
 61 | 		},
 62 | 	},
 63 | 	{
 64 | 		name:    "plus",
 65 | 		grammar: `A <- "abc"+`,
 66 | 		cases: []actionTestCase{
 67 | 			{"abc", "abc"},
 68 | 			{"abcabc", "abcabc"},
 69 | 			{"abcabcabcabc", "abcabcabcabc"},
 70 | 		},
 71 | 	},
 72 | 	{
 73 | 		name:    "question",
 74 | 		grammar: `A <- "abc"?`,
 75 | 		cases: []actionTestCase{
 76 | 			{"", ""},
 77 | 			{"abc", "abc"},
 78 | 		},
 79 | 	},
 80 | 	{
 81 | 		name:    "single type sequence",
 82 | 		grammar: `A <- "a" "b" "c"`,
 83 | 		cases: []actionTestCase{
 84 | 			{"abc", "abc"},
 85 | 		},
 86 | 	},
 87 | 	{
 88 | 		name:    "single type choice",
 89 | 		grammar: `A <- "abc" / "☺☹" / .`,
 90 | 		cases: []actionTestCase{
 91 | 			{"abc", "abc"},
 92 | 			{"☺☹", "☺☹"},
 93 | 			{"z", "z"},
 94 | 		},
 95 | 	},
 96 | 	{
 97 | 		name:    "multi-type choice",
 98 | 		grammar: `A <- "abc" / "x" "y" "z"`,
 99 | 		cases: []actionTestCase{
100 | 			{"abc", "abc"},
101 | 			{"xyz", "xyz"},
102 | 		},
103 | 	},
104 | 	{
105 | 		name:    "choice branch fails after submatch",
106 | 		grammar: `A <- "xyz"? ( "a" "b" "c" / "a" "c" "b" )`,
107 | 		cases: []actionTestCase{
108 | 			{"acb", "acb"},
109 | 			{"xyzacb", "xyzacb"},
110 | 		},
111 | 	},
112 | 	{
113 | 		name:    "multi-type sequence",
114 | 		grammar: `A <- ("a" "b" "c") "xyz"`,
115 | 		cases: []actionTestCase{
116 | 			{"abcxyz", "abcxyz"},
117 | 		},
118 | 	},
119 | 	{
120 | 		name: "identifier",
121 | 		grammar: `
122 | 			A <- Abc "xyz"
123 | 			Abc <- "a" "b" "c"`,
124 | 		cases: []actionTestCase{
125 | 			{"abcxyz", "abcxyz"},
126 | 		},
127 | 	},
128 | 	{
129 | 		name: "true predicate",
130 | 		grammar: `
131 | 			A <- "123"? &Abc "abc"
132 | 			Abc <- "a" "b" "c"`,
133 | 		cases: []actionTestCase{
134 | 			{"abc", "abc"},
135 | 			{"123abc", "123abc"},
136 | 		},
137 | 	},
138 | 	{
139 | 		name: "false predicate",
140 | 		grammar: `
141 | 			A <- "123"? !Abc "xyz"
142 | 			Abc <- "a" "b" "c"`,
143 | 		cases: []actionTestCase{
144 | 			{"xyz", "xyz"},
145 | 			{"123xyz", "123xyz"},
146 | 		},
147 | 	},
148 | 	{
149 | 		name: "true pred code",
150 | 		grammar: `
151 | 			A <- "abc"? &{ true } "xyz"`,
152 | 		cases: []actionTestCase{
153 | 			{"xyz", "xyz"},
154 | 			{"abcxyz", "abcxyz"},
155 | 		},
156 | 	},
157 | 	{
158 | 		name: "false pred code",
159 | 		grammar: `
160 | 			A <- "abc"? !{ false } "xyz"`,
161 | 		cases: []actionTestCase{
162 | 			{"xyz", "xyz"},
163 | 			{"abcxyz", "abcxyz"},
164 | 		},
165 | 	},
166 | 	{
167 | 		name:    "subexpr",
168 | 		grammar: `A <- ("a" "b" "c")`,
169 | 		cases: []actionTestCase{
170 | 			{"abc", "abc"},
171 | 		},
172 | 	},
173 | 	{
174 | 		name:    "label",
175 | 		grammar: `A <- l1:"a" l2:"b" l3:"c"`,
176 | 		cases: []actionTestCase{
177 | 			{"abc", "abc"},
178 | 		},
179 | 	},
180 | 	{
181 | 		name: "action",
182 | 		grammar: `
183 | 			A <- l1:. l2:. l3:. {
184 | 				return map[string]string{
185 | 					"1": l1,
186 | 					"2": l2,
187 | 					"3": l3,
188 | 				}
189 | 			}`,
190 | 		cases: []actionTestCase{
191 | 			{"abc", map[string]interface{}{
192 | 				"1": "a",
193 | 				"2": "b",
194 | 				"3": "c",
195 | 			}},
196 | 			{"xyz", map[string]interface{}{
197 | 				"1": "x",
198 | 				"2": "y",
199 | 				"3": "z",
200 | 			}},
201 | 		},
202 | 	},
203 | 	{
204 | 		name: "start and end",
205 | 		grammar: `
206 | 			A <- smiley? as v:bs cs { return [2]int(v) }
207 | 			smiley <- '☺'
208 | 			as <- 'a'*
209 | 			bs <- 'b'* { return [2]int{start, end} }
210 | 			cs <- 'c'*
211 | 		`,
212 | 		cases: []actionTestCase{
213 | 			{"", []interface{}{0.0, 0.0}},
214 | 			{"aaaccc", []interface{}{3.0, 3.0}},
215 | 			{"aaabccc", []interface{}{3.0, 4.0}},
216 | 			{"bbb", []interface{}{0.0, 3.0}},
217 | 			{"aaabbbccc", []interface{}{3.0, 6.0}},
218 | 			{"☺aaabbbccc", []interface{}{float64(len("☺") + 3), float64(len("☺") + 6)}},
219 | 		},
220 | 	},
221 | 	{
222 | 		name: "type inference",
223 | 		grammar: `
224 | 			A <- convert / ptr_convert / assert / func / struct / ptr_struct / map / array / slice / int / float / rune / string
225 | 			convert <- x:("convert" { return int32(1) }) { return string(fmt.Sprintf("%T", x)) }
226 | 			ptr_convert <- x:("ptr_convert" { return (*string)(nil) }) { return string(fmt.Sprintf("%T", x)) }
227 | 			assert <- x:("assert" { var c interface{} = peg.Node{}; return c.(peg.Node) }) { return string(fmt.Sprintf("%T", x)) }
228 | 			func <- x:("func" { return func(){} }) { return string(fmt.Sprintf("%T", x)) }
229 | 			struct <- x:("struct" { return peg.Node{} }) { return string(fmt.Sprintf("%T", x)) }
230 | 			ptr_struct <- x:("ptr_struct" { return &peg.Node{} }) { return string(fmt.Sprintf("%T", x)) }
231 | 			map <- x:("map" { return map[string]int{} }) { return string(fmt.Sprintf("%T", x)) }
232 | 			array <- x:("array" { return [5]int{} }) { return string(fmt.Sprintf("%T", x)) }
233 | 			slice <- x:("slice" { return []int{} }) { return string(fmt.Sprintf("%T", x)) }
234 | 			int <- x:("int" { return 0 }) { return string(fmt.Sprintf("%T", x)) }
235 | 			float <- x:("float" { return 0.0 }) { return string(fmt.Sprintf("%T", x)) }
236 | 			rune <- x:("rune" { return 'a' }) { return string(fmt.Sprintf("%T", x)) }
237 | 			string <- x:("string" { return "" }) { return string(fmt.Sprintf("%T", x)) }
238 | 		`,
239 | 		cases: []actionTestCase{
240 | 			{"convert", "int32"},
241 | 			{"ptr_convert", "*string"},
242 | 			{"assert", "peg.Node"},
243 | 			{"func", "func()"},
244 | 			{"struct", "peg.Node"},
245 | 			{"ptr_struct", "*peg.Node"},
246 | 			{"array", "[5]int"},
247 | 			{"slice", "[]int"},
248 | 			{"int", "int"},
249 | 			{"float", "float64"},
250 | 			{"rune", "int32"},
251 | 			{"string", "string"},
252 | 		},
253 | 	},
254 | 
255 | 	// A simple calculator.
256 | 	// BUG: The test grammar has reverse the normal associativity — oops.
257 | 	{
258 | 		name: "calculator",
259 | 		grammar: `
260 | 			A <- Expr
261 | 			Expr <- l:Term op:(Plus / Minus) r:Expr { return int(op(l, r)) } / x:Term { return int(x) }
262 | 			Plus <- "+" { return func(a, b int) int { return a + b } }
263 | 			Minus <- "-" { return func(a, b int) int { return a - b } }
264 | 			Term <- l:Factor op:(Times / Divide) r:Term { return int(op(l, r)) } / x:Factor { return int(x) }
265 | 			Times <- "*" { return func(a, b int) int { return a * b } }
266 | 			Divide <- "/"{ return func(a, b int) int { return a / b } }
267 | 			Factor <- Number / '(' x:Expr ')' { return int(x) }
268 | 			Number <- x:[0-9]+ { var i int; for _, r := range x { i = i * 10 + (int(r) - '0') }; return int(i) }
269 | 		`,
270 | 		cases: []actionTestCase{
271 | 			{"1", 1.0},
272 | 			{"(5)", 5.0},
273 | 			{"2*3", 6.0},
274 | 			{"2+3", 5.0},
275 | 			{"10-3*2", 4.0},
276 | 			{"10-(6/2)*5", -5.0},
277 | 		},
278 | 	},
279 | }
280 | 
281 | func TestActionGen(t *testing.T) {
282 | 	for _, test := range actionTests {
283 | 		test := test
284 | 		t.Run(test.name, func(t *testing.T) {
285 | 			t.Parallel()
286 | 			source := generateTest(actionPrelude, test.grammar)
287 | 			binary := build(source)
288 | 			defer rm(binary)
289 | 			go rm(source)
290 | 
291 | 			for _, c := range test.cases {
292 | 				var got struct {
293 | 					T interface{}
294 | 				}
295 | 				parseJSON(binary, c.input, &got)
296 | 				if !reflect.DeepEqual(got.T, c.want) {
297 | 					t.Errorf("parse(%q)=%s (%#v), want %s",
298 | 						c.input, pretty.String(got.T), got.T,
299 | 						pretty.String(c.want))
300 | 				}
301 | 			}
302 | 
303 | 		})
304 | 	}
305 | }
306 | 
307 | // parseJSON parses an input using the given binary
308 | // and returns the position of either the parse or error
309 | // along with whether the parse succeeded.
310 | // The format for transmitting the result
311 | // from the parser binary to the test harness
312 | // is JSON.
313 | func parseJSON(binary, input string, result interface{}) {
314 | 	cmd := exec.Command(binary)
315 | 	cmd.Stderr = os.Stderr
316 | 	stdin, err := cmd.StdinPipe()
317 | 	if err != nil {
318 | 		panic(err.Error())
319 | 	}
320 | 	stdout, err := cmd.StdoutPipe()
321 | 	if err != nil {
322 | 		panic(err.Error())
323 | 	}
324 | 	if err := cmd.Start(); err != nil {
325 | 		panic(err.Error())
326 | 	}
327 | 	go func() {
328 | 		if _, err := io.WriteString(stdin, input); err != nil {
329 | 			panic(err.Error())
330 | 		}
331 | 		if err := stdin.Close(); err != nil {
332 | 			panic(err.Error())
333 | 		}
334 | 	}()
335 | 	if err := json.NewDecoder(stdout).Decode(result); err != nil {
336 | 		panic(err.Error())
337 | 	}
338 | 	if err := cmd.Wait(); err != nil {
339 | 		panic(err.Error())
340 | 	}
341 | }
342 | 
343 | var actionPrelude = `{
344 | package main
345 | 
346 | import (
347 | 	"encoding/json"
348 | 	"fmt"
349 | 	"io/ioutil"
350 | 	"os"
351 | 
352 | 	"github.com/eaburns/peggy/peg"
353 | )
354 | 
355 | func main() {
356 | 	data, err := ioutil.ReadAll(os.Stdin)
357 | 	if err != nil {
358 | 		os.Stderr.WriteString(err.Error() + "\n")
359 | 		os.Exit(1)
360 | 	}
361 | 	p, err := _NewParser(string(data))
362 | 	if err != nil {
363 | 		os.Stderr.WriteString(err.Error() + "\n")
364 | 		os.Exit(1)
365 | 	}
366 | 	if pos, _ := _AAccepts(p, 0); pos < 0 {
367 | 		os.Stderr.WriteString("parse failed")
368 | 		os.Exit(1)
369 | 	}
370 | 	var result struct {
371 | 		T interface{}
372 | 	}
373 | 	_, result.T = _AAction(p, 0)
374 | 	if err := json.NewEncoder(os.Stdout).Encode(&result); err != nil {
375 | 		// Hack — we need fmt imported for the type inference test.
376 | 		// However, if imported, it must be used.
377 | 		// Here we use it at least once.
378 | 		fmt.Fprintf(os.Stderr, err.Error() + "\n")
379 | 		os.Exit(1)
380 | 	}
381 | }
382 | }
383 | `
384 | 


--------------------------------------------------------------------------------
/check.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The Peggy Authors
  2 | //
  3 | // Use of this source code is governed by a BSD-style
  4 | // license that can be found in the LICENSE file or at
  5 | // https://developers.google.com/open-source/licenses/bsd.
  6 | 
  7 | package main
  8 | 
  9 | import (
 10 | 	"sort"
 11 | )
 12 | 
 13 | // Check does semantic analysis of the rules,
 14 | // setting bookkeeping needed to later generate the parser,
 15 | // returning any errors encountered in order of their begin location.
 16 | func Check(grammar *Grammar) error {
 17 | 	var errs Errors
 18 | 	rules := expandTemplates(grammar.Rules, &errs)
 19 | 	ruleMap := make(map[string]*Rule, len(rules))
 20 | 	for i, r := range rules {
 21 | 		r.N = i
 22 | 		name := r.Name.String()
 23 | 		if other := ruleMap[name]; other != nil {
 24 | 			errs.add(r, "rule %s redefined", name)
 25 | 		}
 26 | 		ruleMap[name] = r
 27 | 	}
 28 | 
 29 | 	var p path
 30 | 	for _, r := range rules {
 31 | 		r.checkLeft(ruleMap, p, &errs)
 32 | 	}
 33 | 	for _, r := range rules {
 34 | 		check(r, ruleMap, &errs)
 35 | 	}
 36 | 	if err := errs.ret(); err != nil {
 37 | 		return err
 38 | 	}
 39 | 	grammar.CheckedRules = rules
 40 | 	return nil
 41 | }
 42 | 
 43 | func expandTemplates(ruleDefs []Rule, errs *Errors) []*Rule {
 44 | 	var expanded, todo []*Rule
 45 | 	tmplNames := make(map[string]*Rule)
 46 | 	for i := range ruleDefs {
 47 | 		r := &ruleDefs[i]
 48 | 		if len(r.Name.Args) > 0 {
 49 | 			seenParams := make(map[string]bool)
 50 | 			for _, param := range r.Name.Args {
 51 | 				n := param.String()
 52 | 				if seenParams[n] {
 53 | 					errs.add(param, "parameter %s redefined", n)
 54 | 				}
 55 | 				seenParams[n] = true
 56 | 			}
 57 | 			tmplNames[r.Name.Name.String()] = r
 58 | 		} else {
 59 | 			expanded = append(expanded, r)
 60 | 			todo = append(todo, r)
 61 | 		}
 62 | 	}
 63 | 
 64 | 	seen := make(map[string]bool)
 65 | 	for i := 0; i < len(todo); i++ {
 66 | 		for _, invok := range invokedTemplates(todo[i]) {
 67 | 			if seen[invok.Name.String()] {
 68 | 				continue
 69 | 			}
 70 | 			seen[invok.Name.String()] = true
 71 | 			tmpl := tmplNames[invok.Name.Name.String()]
 72 | 			if tmpl == nil {
 73 | 				continue // undefined template, error reported elsewhere
 74 | 			}
 75 | 			exp := expand1(tmpl, invok, errs)
 76 | 			if exp == nil {
 77 | 				continue // error expanding, error reported elsewhere
 78 | 			}
 79 | 			todo = append(todo, exp)
 80 | 			expanded = append(expanded, exp)
 81 | 		}
 82 | 	}
 83 | 	return expanded
 84 | }
 85 | 
 86 | func expand1(tmpl *Rule, invok *Ident, errs *Errors) *Rule {
 87 | 	if len(invok.Args) != len(tmpl.Args) {
 88 | 		errs.add(invok, "template %s argument count mismatch: got %d, expected %d",
 89 | 			tmpl.Name, len(invok.Args), len(tmpl.Args))
 90 | 		return nil
 91 | 	}
 92 | 	copy := *tmpl
 93 | 	sub := make(map[string]string, len(tmpl.Args))
 94 | 	for i, arg := range invok.Args {
 95 | 		sub[tmpl.Args[i].String()] = arg.String()
 96 | 	}
 97 | 	copy.Args = invok.Args
 98 | 	copy.Expr = tmpl.Expr.substitute(sub)
 99 | 	return &copy
100 | }
101 | 
102 | func invokedTemplates(r *Rule) []*Ident {
103 | 	var tmpls []*Ident
104 | 	r.Expr.Walk(func(e Expr) bool {
105 | 		if id, ok := e.(*Ident); ok {
106 | 			if len(id.Args) > 0 {
107 | 				tmpls = append(tmpls, id)
108 | 			}
109 | 		}
110 | 		return true
111 | 	})
112 | 	return tmpls
113 | }
114 | 
115 | type path struct {
116 | 	stack []*Rule
117 | 	seen  map[*Rule]bool
118 | }
119 | 
120 | func (p *path) push(r *Rule) bool {
121 | 	if p.seen == nil {
122 | 		p.seen = make(map[*Rule]bool)
123 | 	}
124 | 	if p.seen[r] {
125 | 		return false
126 | 	}
127 | 	p.stack = append(p.stack, r)
128 | 	p.seen[r] = true
129 | 	return true
130 | }
131 | 
132 | func (p *path) pop() {
133 | 	p.stack = p.stack[:len(p.stack)]
134 | }
135 | 
136 | func (p *path) cycle(r *Rule) []*Rule {
137 | 	for i := len(p.stack) - 1; i >= 0; i-- {
138 | 		if p.stack[i] == r {
139 | 			return append(p.stack[i:], r)
140 | 		}
141 | 	}
142 | 	panic("no cycle")
143 | }
144 | 
145 | func cycleString(rules []*Rule) string {
146 | 	var s string
147 | 	for _, r := range rules {
148 | 		if s != "" {
149 | 			s += ", "
150 | 		}
151 | 		s += r.Name.String()
152 | 	}
153 | 	return s
154 | }
155 | 
156 | func (r *Rule) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
157 | 	if r.typ != nil {
158 | 		return
159 | 	}
160 | 	if !p.push(r) {
161 | 		cycle := p.cycle(r)
162 | 		errs.add(cycle[0], "left-recursion: %s", cycleString(cycle))
163 | 		for _, r := range cycle {
164 | 			r.typ = new(string)
165 | 		}
166 | 		return
167 | 	}
168 | 	r.Expr.checkLeft(rules, p, errs)
169 | 	t := r.Expr.Type()
170 | 	r.typ = &t
171 | 	r.epsilon = r.Expr.epsilon()
172 | 	p.pop()
173 | }
174 | 
175 | func (e *Choice) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
176 | 	for _, sub := range e.Exprs {
177 | 		sub.checkLeft(rules, p, errs)
178 | 	}
179 | }
180 | 
181 | func (e *Action) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
182 | 	e.Expr.checkLeft(rules, p, errs)
183 | }
184 | 
185 | func (e *Sequence) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
186 | 	for _, sub := range e.Exprs {
187 | 		sub.checkLeft(rules, p, errs)
188 | 		if !sub.epsilon() {
189 | 			break
190 | 		}
191 | 	}
192 | }
193 | 
194 | func (e *LabelExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
195 | 	e.Expr.checkLeft(rules, p, errs)
196 | }
197 | 
198 | func (e *PredExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
199 | 	e.Expr.checkLeft(rules, p, errs)
200 | }
201 | 
202 | func (e *RepExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
203 | 	e.Expr.checkLeft(rules, p, errs)
204 | }
205 | 
206 | func (e *OptExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
207 | 	e.Expr.checkLeft(rules, p, errs)
208 | }
209 | 
210 | func (e *Ident) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
211 | 	if e.rule = rules[e.Name.String()]; e.rule != nil {
212 | 		e.rule.checkLeft(rules, p, errs)
213 | 	}
214 | }
215 | 
216 | func (e *SubExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
217 | 	e.Expr.checkLeft(rules, p, errs)
218 | }
219 | 
220 | func (e *PredCode) checkLeft(rules map[string]*Rule, p path, errs *Errors) {}
221 | 
222 | func (e *Literal) checkLeft(rules map[string]*Rule, p path, errs *Errors) {}
223 | 
224 | func (e *CharClass) checkLeft(rules map[string]*Rule, p path, errs *Errors) {}
225 | 
226 | func (e *Any) checkLeft(rules map[string]*Rule, p path, errs *Errors) {}
227 | 
228 | type ctx struct {
229 | 	rules     map[string]*Rule
230 | 	allLabels *[]*LabelExpr
231 | 	curLabels map[string]*LabelExpr
232 | }
233 | 
234 | func check(rule *Rule, rules map[string]*Rule, errs *Errors) {
235 | 	ctx := ctx{
236 | 		rules:     rules,
237 | 		allLabels: &rule.Labels,
238 | 		curLabels: make(map[string]*LabelExpr),
239 | 	}
240 | 	rule.Expr.check(ctx, true, errs)
241 | 	sort.Slice(rule.Labels, func(i, j int) bool {
242 | 		return rule.Labels[i].N < rule.Labels[j].N
243 | 	})
244 | }
245 | 
246 | func (e *Choice) check(ctx ctx, valueUsed bool, errs *Errors) {
247 | 	for _, sub := range e.Exprs {
248 | 		subCtx := ctx
249 | 		subCtx.curLabels = make(map[string]*LabelExpr)
250 | 		for n, l := range ctx.curLabels {
251 | 			subCtx.curLabels[n] = l
252 | 		}
253 | 		sub.check(subCtx, valueUsed, errs)
254 | 	}
255 | 	t := e.Exprs[0].Type()
256 | 	for _, sub := range e.Exprs {
257 | 		if got := sub.Type(); *genActions && valueUsed && got != t && got != "" && t != "" {
258 | 			errs.add(sub, "type mismatch: got %s, expected %s", got, t)
259 | 		}
260 | 	}
261 | }
262 | 
263 | func (e *Action) check(ctx ctx, valueUsed bool, errs *Errors) {
264 | 	e.Expr.check(ctx, false, errs)
265 | 	for _, l := range ctx.curLabels {
266 | 		e.Labels = append(e.Labels, l)
267 | 	}
268 | 	sort.Slice(e.Labels, func(i, j int) bool {
269 | 		return e.Labels[i].Label.String() < e.Labels[j].Label.String()
270 | 	})
271 | }
272 | 
273 | // BUG: figure out what to do about sequence types.
274 | func (e *Sequence) check(ctx ctx, valueUsed bool, errs *Errors) {
275 | 	for _, sub := range e.Exprs {
276 | 		sub.check(ctx, valueUsed, errs)
277 | 	}
278 | 	t := e.Exprs[0].Type()
279 | 	for _, sub := range e.Exprs {
280 | 		if got := sub.Type(); *genActions && valueUsed && got != t && got != "" && t != "" {
281 | 			errs.add(sub, "type mismatch: got %s, expected %s", got, t)
282 | 		}
283 | 	}
284 | }
285 | 
286 | func (e *LabelExpr) check(ctx ctx, valueUsed bool, errs *Errors) {
287 | 	e.Expr.check(ctx, true, errs)
288 | 	if _, ok := ctx.curLabels[e.Label.String()]; ok {
289 | 		errs.add(e.Label, "label %s redefined", e.Label.String())
290 | 	}
291 | 	e.N = len(*ctx.allLabels)
292 | 	*ctx.allLabels = append(*ctx.allLabels, e)
293 | 	ctx.curLabels[e.Label.String()] = e
294 | }
295 | 
296 | func (e *PredExpr) check(ctx ctx, valueUsed bool, errs *Errors) {
297 | 	e.Expr.check(ctx, false, errs)
298 | }
299 | 
300 | func (e *RepExpr) check(ctx ctx, valueUsed bool, errs *Errors) {
301 | 	e.Expr.check(ctx, valueUsed, errs)
302 | }
303 | 
304 | func (e *OptExpr) check(ctx ctx, valueUsed bool, errs *Errors) {
305 | 	e.Expr.check(ctx, valueUsed, errs)
306 | }
307 | 
308 | func (e *SubExpr) check(ctx ctx, valueUsed bool, errs *Errors) {
309 | 	e.Expr.check(ctx, valueUsed, errs)
310 | }
311 | 
312 | func (e *Ident) check(ctx ctx, _ bool, errs *Errors) {
313 | 	r, ok := ctx.rules[e.Name.String()]
314 | 	if !ok {
315 | 		errs.add(e, "rule %s undefined", e.Name.String())
316 | 	} else {
317 | 		e.rule = r
318 | 	}
319 | }
320 | 
321 | func (e *PredCode) check(ctx ctx, _ bool, _ *Errors) {
322 | 	for _, l := range ctx.curLabels {
323 | 		e.Labels = append(e.Labels, l)
324 | 	}
325 | 	sort.Slice(e.Labels, func(i, j int) bool {
326 | 		return e.Labels[i].Label.String() < e.Labels[j].Label.String()
327 | 	})
328 | }
329 | 
330 | func (e *Literal) check(ctx, bool, *Errors) {}
331 | 
332 | func (e *CharClass) check(ctx, bool, *Errors) {}
333 | 
334 | func (e *Any) check(ctx, bool, *Errors) {}
335 | 


--------------------------------------------------------------------------------
/check_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The Peggy Authors
  2 | //
  3 | // Use of this source code is governed by a BSD-style
  4 | // license that can be found in the LICENSE file or at
  5 | // https://developers.google.com/open-source/licenses/bsd.
  6 | 
  7 | package main
  8 | 
  9 | import (
 10 | 	"regexp"
 11 | 	"strings"
 12 | 	"testing"
 13 | )
 14 | 
 15 | type checkTest struct {
 16 | 	name        string
 17 | 	in          string
 18 | 	err         string
 19 | 	skipActions bool
 20 | }
 21 | 
 22 | func (test checkTest) Run(t *testing.T) {
 23 | 	in := strings.NewReader(test.in)
 24 | 	g, err := Parse(in, "test.file")
 25 | 	if err != nil {
 26 | 		t.Errorf("Parse(%q, _)=_, %v, want _,nil", test.in, err)
 27 | 		return
 28 | 	}
 29 | 	err = Check(g)
 30 | 	if test.err == "" {
 31 | 		if err != nil {
 32 | 			t.Errorf("Check(%q)=%v, want nil", test.in, err)
 33 | 		}
 34 | 		return
 35 | 	}
 36 | 	re := regexp.MustCompile(test.err)
 37 | 	if err == nil || !re.MatchString(err.Error()) {
 38 | 		var e string
 39 | 		if err != nil {
 40 | 			e = err.Error()
 41 | 		}
 42 | 		t.Errorf("Check(%q)=%v, but expected to match %q",
 43 | 			test.in, e, test.err)
 44 | 		return
 45 | 	}
 46 | }
 47 | 
 48 | func TestCheck(t *testing.T) {
 49 | 	tests := []checkTest{
 50 | 		{
 51 | 			name: "empty OK",
 52 | 			in:   "",
 53 | 			err:  "",
 54 | 		},
 55 | 		{
 56 | 			name: "various OK",
 57 | 			in: `A <- (G/B C)*
 58 | B <- &{pred}*
 59 | C <- !{pred}* { return string(act) }
 60 | D <- .* !B
 61 | E <- C*
 62 | F <- "cde"*
 63 | G <- [fgh]*`,
 64 | 			err: "",
 65 | 		},
 66 | 		{
 67 | 			name: "redefined rule",
 68 | 			in:   "A <- [x]\nA <- [y]",
 69 | 			err:  "^test.file:2.1,2.9: rule A redefined",
 70 | 		},
 71 | 		{
 72 | 			name: "undefined rule",
 73 | 			in:   "A <- B",
 74 | 			err:  "^test.file:1.6,1.7: rule B undefined",
 75 | 		},
 76 | 		{
 77 | 			name: "redefined label",
 78 | 			in:   "A <- a:[a] a:[a]",
 79 | 			err:  "^test.file:1.12,1.13: label a redefined",
 80 | 		},
 81 | 		{
 82 | 			name: "non-redefined label with same name in different branch",
 83 | 			in:   "A <- a:[a] / (a:[a] / a:[a]) / a:[a]",
 84 | 			err:  "",
 85 | 		},
 86 | 		{
 87 | 			name: "redefined label in same choice branch",
 88 | 			in:   "A <- a:[a] / a:[a] a:[a]",
 89 | 			err:  "^test.file:1.20,1.21: label a redefined",
 90 | 		},
 91 | 		{
 92 | 			name: "choice first error",
 93 | 			in:   "A <- Undefined / A",
 94 | 			err:  ".+",
 95 | 		},
 96 | 		{
 97 | 			name: "choice second error",
 98 | 			in:   "A <- B / Undefined\nB <- [x]",
 99 | 			err:  ".+",
100 | 		},
101 | 		{
102 | 			name: "seq first error",
103 | 			in:   "A <- Undefined A",
104 | 			err:  ".+",
105 | 		},
106 | 		{
107 | 			name: "sequence second error",
108 | 			in:   "A <- B Undefined\nB <- [x]",
109 | 			err:  ".+",
110 | 		},
111 | 		{
112 | 			name: "template parameter OK",
113 | 			in: `A<x> <- x
114 | 				B <- A<C>
115 | 				C <- "c"`,
116 | 			err: "",
117 | 		},
118 | 		{
119 | 			name: "template parameter redef",
120 | 			in: `A<x, x> <- x
121 | 				B <- A<C, C>
122 | 				C <- "c"`,
123 | 			err: "^test.file:1.6,1.7: parameter x redefined$",
124 | 		},
125 | 		{
126 | 			name: "template arg count mismatch",
127 | 			in: `A<x> <- x
128 | 				B <- A<C, C>
129 | 				C <- "c"`,
130 | 			err: "test.file:2.10,2.16: template A<x> argument count mismatch: got 2, expected 1",
131 | 		},
132 | 		{
133 | 			name: "multiple errors",
134 | 			in:   "A <- U1 U2\nA <- u:[x] u:[x]",
135 | 			err: "test.file:1.6,1.8: rule U1 undefined\n" +
136 | 				"test.file:1.9,1.11: rule U2 undefined\n" +
137 | 				"test.file:2.1,2.17: rule A redefined\n" +
138 | 				"test.file:2.12,2.13: label u redefined",
139 | 		},
140 | 		{
141 | 			name: "right recursion is OK",
142 | 			in: `A <- "b" B
143 | 				B <- A`,
144 | 		},
145 | 		{
146 | 			name: "direct left-recursion",
147 | 			in:   `A <- A`,
148 | 			err:  "^test.file:1.1,1.7: left-recursion: A, A$",
149 | 		},
150 | 		{
151 | 			name: "indirect left-recursion",
152 | 			in: `A <- C0
153 | 				C0 <- C1
154 | 				C1 <- C2
155 | 				C2 <- C0`,
156 | 			err: "^test.file:2.5,2.13: left-recursion: C0, C1, C2, C0$",
157 | 		},
158 | 		{
159 | 			name: "choice left-recursion",
160 | 			in: `A <- B / C / D
161 | 				B <- "b"
162 | 				C <- "c"
163 | 				D <- A`,
164 | 			err: "^test.file:1.1,1.15: left-recursion: A, D, A$",
165 | 		},
166 | 		{
167 | 			name: "sequence left-recursion",
168 | 			in: `A <- !B C D E
169 | 				B <- "b"
170 | 				C <- !"c"
171 | 				D <- C # non-consuming through C
172 | 				E <- A`,
173 | 			err: "^test.file:1.1,1.14: left-recursion: A, E, A$",
174 | 		},
175 | 		{
176 | 			name: "various expr left-recursion",
177 | 			in: `Choice <- "a" / Sequence
178 | 				Sequence <- SubExpr "b"
179 | 				SubExpr <- ( PredExpr )
180 | 				PredExpr <- &RepExpr
181 | 				RepExpr <- OptExpr+
182 | 				OptExpr <- Action?
183 | 				Action <- Choice { return "" }`,
184 | 			err: "^test.file:1.1,1.25: left-recursion: Choice, Sequence, SubExpr, PredExpr, RepExpr, OptExpr, Action, Choice$",
185 | 		},
186 | 		{
187 | 			name: "templates calling templates",
188 | 			in: `A <- B<X>
189 | 				B<X> <- C<X>
190 | 				C<X> <- "a" D<X> C<X>?
191 | 				D<X> <- X
192 | 				X <- "x"`,
193 | 			err: "", // this should work fine.
194 | 		},
195 | 		{
196 | 			name: "template left-recursion",
197 | 			in: `A <- C0
198 | 				C0 <- C1
199 | 				C1 <- C2<C0>
200 | 				C2<X> <- X`,
201 | 			err: "^test.file:2.5,2.13: left-recursion: C0, C1, C2<C0>, C0$",
202 | 		},
203 | 		{
204 | 			name: "multiple left-recursion errors",
205 | 			in: `A <- A
206 | 				B <- C
207 | 				C <- B`,
208 | 			err: "^test.file:1.1,1.7: left-recursion: A, A\n" +
209 | 				"test.file:2.5,2.11: left-recursion: B, C, B$",
210 | 		},
211 | 		{
212 | 			name: "right-recursion is OK",
213 | 			in: `A <- B C A?
214 | 				B <- "b" B / C
215 | 				C <- "c"`,
216 | 			err: "",
217 | 		},
218 | 
219 | 		{
220 | 			name: "choice type mismatch",
221 | 			in:   `A <- "a" / "b" { return 5 }`,
222 | 			err:  "^test.file:1.12,1.28: type mismatch: got int, expected string",
223 | 		},
224 | 		{
225 | 			name: "sequence type mismatch",
226 | 			in:   `A <- "a" ( "b" { return 5 } )`,
227 | 			err:  "^test.file:1.10,1.29: type mismatch: got int, expected string",
228 | 		},
229 | 		{
230 | 			name: "unused choice, no mismatch",
231 | 			in:   `A <- ( "a" / "b" { return 5 } ) { return 6 }`,
232 | 			err:  "",
233 | 		},
234 | 		{
235 | 			name: "unused sequence, no mismatch",
236 | 			in:   `A <- "a" ( "b" { return 5 } ) { return 6 }`,
237 | 			err:  "",
238 | 		},
239 | 		{
240 | 			name: "&-pred subexpression is unused",
241 | 			in:   `A <- "a" !( "b" { return 5 } )`,
242 | 			err:  "",
243 | 		},
244 | 		{
245 | 			name: "!-pred subexpression is unused",
246 | 			in:   `A <- "a" !( "b" { return 5 } )`,
247 | 			err:  "",
248 | 		},
249 | 		{
250 | 			name: "multiple type errors",
251 | 			in: `A <- B ( "c" { return 0 } )
252 | 				B <- "b" / ( "c" { return 0 } )`,
253 | 			err: "^test.file:1.8,1.27: type mismatch: got int, expected string\n" +
254 | 				"test.file:2.16,2.35: type mismatch: got int, expected string$",
255 | 		},
256 | 	}
257 | 	for _, test := range tests {
258 | 		test := test
259 | 		t.Run(test.name, func(t *testing.T) {
260 | 			t.Parallel()
261 | 			test.Run(t)
262 | 		})
263 | 	}
264 | }
265 | 
266 | func TestGenActionsFalse(t *testing.T) {
267 | 	// This set of tests cannot be run in parallel.
268 | 	*genActions = false
269 | 	defer func() { *genActions = true }()
270 | 
271 | 	tests := []checkTest{
272 | 		{
273 | 			name: "choice type mismatch: no error",
274 | 			in:   `A <- "a" / "b" { return 5 }`,
275 | 		},
276 | 		{
277 | 			name: "sequence type mismatch: no error",
278 | 			in:   `A <- "a" ( "b" { return 5 } )`,
279 | 		},
280 | 	}
281 | 	for _, test := range tests {
282 | 		t.Run(test.name, test.Run)
283 | 	}
284 | }
285 | 


--------------------------------------------------------------------------------
/error.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 The Peggy Authors
 2 | //
 3 | // Use of this source code is governed by a BSD-style
 4 | // license that can be found in the LICENSE file or at
 5 | // https://developers.google.com/open-source/licenses/bsd.
 6 | 
 7 | package main
 8 | 
 9 | import (
10 | 	"fmt"
11 | 	"sort"
12 | )
13 | 
14 | // Located is an interface representing anything located within the input stream.
15 | type Located interface {
16 | 	Begin() Loc
17 | 	End() Loc
18 | }
19 | 
20 | // Errors implements error, containing multiple errors.
21 | type Errors struct {
22 | 	Errs []Error
23 | }
24 | 
25 | func (err *Errors) ret() error {
26 | 	if len(err.Errs) == 0 {
27 | 		return nil
28 | 	}
29 | 	sort.Slice(err.Errs, func(i, j int) bool {
30 | 		return err.Errs[i].Begin().Less(err.Errs[j].Begin())
31 | 	})
32 | 	return err
33 | }
34 | 
35 | func (err *Errors) add(loc Located, format string, args ...interface{}) {
36 | 	err.Errs = append(err.Errs, Err(loc, format, args...))
37 | }
38 | 
39 | // Error returns the string representation of the Errors,
40 | // which is the string of each Error, one per-line.
41 | func (err Errors) Error() string {
42 | 	var s string
43 | 	for i, e := range err.Errs {
44 | 		if i > 0 {
45 | 			s += "\n"
46 | 		}
47 | 		s += e.Error()
48 | 	}
49 | 	return s
50 | }
51 | 
52 | // Error is an error tied to an element of the Peggy input file.
53 | type Error struct {
54 | 	Located
55 | 	Msg string
56 | }
57 | 
58 | func (err Error) Error() string {
59 | 	b, e := err.Begin(), err.End()
60 | 	l0, c0 := b.Line, b.Col
61 | 	l1, c1 := e.Line, e.Col
62 | 	switch {
63 | 	case l0 == l1 && c0 == c1:
64 | 		return fmt.Sprintf("%s:%d.%d: %s", b.File, l0, c0, err.Msg)
65 | 	default:
66 | 		return fmt.Sprintf("%s:%d.%d,%d.%d: %s", b.File, l0, c0, l1, c1, err.Msg)
67 | 	}
68 | }
69 | 
70 | // Err returns an error containing the location and formatted message.
71 | func Err(loc Located, format string, args ...interface{}) Error {
72 | 	return Error{Located: loc, Msg: fmt.Sprintf(format, args...)}
73 | }
74 | 


--------------------------------------------------------------------------------
/example/calc/calc.peggy:
--------------------------------------------------------------------------------
 1 | {
 2 | // Calc is an example calculator program.
 3 | // You can build it from calc.peggy with
 4 | // 	peggy -o calc.go calc.peggy
 5 | package main
 6 | 
 7 | import (
 8 | 	"bufio"
 9 | 	"fmt"
10 | 	"math/big"
11 | 	"os"
12 | 	"unicode"
13 | 	"unicode/utf8"
14 | 
15 | 	"github.com/eaburns/peggy/peg"
16 | )
17 | 
18 | func main() {
19 | 	scanner := bufio.NewScanner(os.Stdin)
20 | 	for scanner.Scan() {
21 | 		line := scanner.Text()
22 | 		p, err := _NewParser(line)
23 | 		if err != nil {
24 | 			fmt.Println(err)
25 | 			os.Exit(1)
26 | 		}
27 | 		if pos, perr := _ExprAccepts(p, 0); pos < 0 {
28 | 			_, fail := _ExprFail(p, 0 ,perr)
29 | 			fmt.Println(peg.SimpleError(line, fail))
30 | 			continue
31 | 		}
32 | 		_, result := _ExprAction(p, 0)
33 | 		fmt.Println((*result).String())
34 | 	}
35 | 	if err := scanner.Err(); err != nil {
36 | 		fmt.Println(err)
37 | 		os.Exit(1)
38 | 	}
39 | }
40 | 
41 | type op func(*big.Float, *big.Float, *big.Float) *big.Float
42 | 
43 | type tail struct {
44 | 	op op
45 | 	r *big.Float
46 | }
47 | 
48 | func evalTail(l big.Float, tail []tail) big.Float {
49 | 	for _, t := range tail {
50 | 		t.op(&l, &l, t.r)
51 | 	}
52 | 	return l
53 | }
54 | 
55 | func isSpace(s string) bool {
56 | 	r, _ := utf8.DecodeRuneInString(s)
57 | 	return unicode.IsSpace(r)
58 | }
59 | }
60 | 
61 | Expr <- s:Sum EOF { return (*big.Float)(&s) }
62 | 
63 | Sum <- l:Product tail:SumTail* { return (big.Float)(evalTail(l, tail)) }
64 | 
65 | SumTail <- op:AddOp r:Product { return tail{op, &r} }
66 | 
67 | AddOp "operator" <-
68 | 	_ "+"  { return op((*big.Float).Add) } /
69 | 	_ "-" { return op((*big.Float).Sub) }
70 | 
71 | Product <- l:Value tail:ProductTail* { return (big.Float)(evalTail(l, tail)) }
72 | 
73 | ProductTail <- op:MulOp r:Value { return tail{op, &r} }
74 | 
75 | MulOp "operator" <-
76 | 	_ "*"  { return op((*big.Float).Mul) } /
77 | 	_ "/" { return op((*big.Float).Quo) }
78 | 
79 | Value <- Num / _ "(" e:Sum _ ")" { return (big.Float)(e) }
80 | 
81 | Num "number" <- _ n:( [0-9]+ ("." [0-9]+)? ) {
82 | 	var f big.Float
83 | 	 f.Parse(n, 10)
84 | 	 return (big.Float)(f)
85 | }
86 | 
87 | _ "space" <- ( s:. &{ isSpace(s) } )*
88 | 
89 | EOF "end of file" <- !.


--------------------------------------------------------------------------------
/example/label_names/label_names.go:
--------------------------------------------------------------------------------
  1 | // Test labels with the same name but in different choice branches.
  2 | // 	peggy -o label_names.go label_names.peggy
  3 | package main
  4 | 
  5 | import (
  6 | 	"bufio"
  7 | 	"fmt"
  8 | 	"os"
  9 | 
 10 | 	"github.com/eaburns/peggy/peg"
 11 | )
 12 | 
 13 | func main() {
 14 | 	scanner := bufio.NewScanner(os.Stdin)
 15 | 	for scanner.Scan() {
 16 | 		line := scanner.Text()
 17 | 		p, err := _NewParser(line)
 18 | 		if err != nil {
 19 | 			fmt.Println(err)
 20 | 			os.Exit(1)
 21 | 		}
 22 | 		if pos, perr := _ExprAccepts(p, 0); pos < 0 {
 23 | 			_, fail := _ExprFail(p, 0, perr)
 24 | 			fmt.Println(peg.SimpleError(line, fail))
 25 | 			continue
 26 | 		}
 27 | 		_, result := _ExprAction(p, 0)
 28 | 		fmt.Println(*result)
 29 | 	}
 30 | 	if err := scanner.Err(); err != nil {
 31 | 		fmt.Println(err)
 32 | 		os.Exit(1)
 33 | 	}
 34 | }
 35 | 
 36 | const (
 37 | 	_Expr int = 0
 38 | 
 39 | 	_N int = 1
 40 | )
 41 | 
 42 | type _Parser struct {
 43 | 	text     string
 44 | 	deltaPos [][_N]int32
 45 | 	deltaErr [][_N]int32
 46 | 	node     map[_key]*peg.Node
 47 | 	fail     map[_key]*peg.Fail
 48 | 	act      map[_key]interface{}
 49 | 	lastFail int
 50 | 	data     interface{}
 51 | }
 52 | 
 53 | type _key struct {
 54 | 	start int
 55 | 	rule  int
 56 | }
 57 | 
 58 | type tooBigError struct{}
 59 | 
 60 | func (tooBigError) Error() string { return "input is too big" }
 61 | 
 62 | func _NewParser(text string) (*_Parser, error) {
 63 | 	n := len(text) + 1
 64 | 	if n < 0 {
 65 | 		return nil, tooBigError{}
 66 | 	}
 67 | 	p := &_Parser{
 68 | 		text:     text,
 69 | 		deltaPos: make([][_N]int32, n),
 70 | 		deltaErr: make([][_N]int32, n),
 71 | 		node:     make(map[_key]*peg.Node),
 72 | 		fail:     make(map[_key]*peg.Fail),
 73 | 		act:      make(map[_key]interface{}),
 74 | 	}
 75 | 	return p, nil
 76 | }
 77 | 
 78 | func _max(a, b int) int {
 79 | 	if a > b {
 80 | 		return a
 81 | 	}
 82 | 	return b
 83 | }
 84 | 
 85 | func _memoize(parser *_Parser, rule, start, pos, perr int) (int, int) {
 86 | 	parser.lastFail = perr
 87 | 	derr := perr - start
 88 | 	parser.deltaErr[start][rule] = int32(derr + 1)
 89 | 	if pos >= 0 {
 90 | 		dpos := pos - start
 91 | 		parser.deltaPos[start][rule] = int32(dpos + 1)
 92 | 		return dpos, derr
 93 | 	}
 94 | 	parser.deltaPos[start][rule] = -1
 95 | 	return -1, derr
 96 | }
 97 | 
 98 | func _memo(parser *_Parser, rule, start int) (int, int, bool) {
 99 | 	dp := parser.deltaPos[start][rule]
100 | 	if dp == 0 {
101 | 		return 0, 0, false
102 | 	}
103 | 	if dp > 0 {
104 | 		dp--
105 | 	}
106 | 	de := parser.deltaErr[start][rule] - 1
107 | 	return int(dp), int(de), true
108 | }
109 | 
110 | func _failMemo(parser *_Parser, rule, start, errPos int) (int, *peg.Fail) {
111 | 	if start > parser.lastFail {
112 | 		return -1, &peg.Fail{}
113 | 	}
114 | 	dp := parser.deltaPos[start][rule]
115 | 	de := parser.deltaErr[start][rule]
116 | 	if start+int(de-1) < errPos {
117 | 		if dp > 0 {
118 | 			return start + int(dp-1), &peg.Fail{}
119 | 		}
120 | 		return -1, &peg.Fail{}
121 | 	}
122 | 	f := parser.fail[_key{start: start, rule: rule}]
123 | 	if dp < 0 && f != nil {
124 | 		return -1, f
125 | 	}
126 | 	if dp > 0 && f != nil {
127 | 		return start + int(dp-1), f
128 | 	}
129 | 	return start, nil
130 | }
131 | 
132 | func _accept(parser *_Parser, f func(*_Parser, int) (int, int), pos, perr *int) bool {
133 | 	dp, de := f(parser, *pos)
134 | 	*perr = _max(*perr, *pos+de)
135 | 	if dp < 0 {
136 | 		return false
137 | 	}
138 | 	*pos += dp
139 | 	return true
140 | }
141 | 
142 | func _node(parser *_Parser, f func(*_Parser, int) (int, *peg.Node), node *peg.Node, pos *int) bool {
143 | 	p, kid := f(parser, *pos)
144 | 	if kid == nil {
145 | 		return false
146 | 	}
147 | 	node.Kids = append(node.Kids, kid)
148 | 	*pos = p
149 | 	return true
150 | }
151 | 
152 | func _fail(parser *_Parser, f func(*_Parser, int, int) (int, *peg.Fail), errPos int, node *peg.Fail, pos *int) bool {
153 | 	p, kid := f(parser, *pos, errPos)
154 | 	if kid.Want != "" || len(kid.Kids) > 0 {
155 | 		node.Kids = append(node.Kids, kid)
156 | 	}
157 | 	if p < 0 {
158 | 		return false
159 | 	}
160 | 	*pos = p
161 | 	return true
162 | }
163 | 
164 | func _next(parser *_Parser, pos int) (rune, int) {
165 | 	r, w := peg.DecodeRuneInString(parser.text[pos:])
166 | 	return r, w
167 | }
168 | 
169 | func _sub(parser *_Parser, start, end int, kids []*peg.Node) *peg.Node {
170 | 	node := &peg.Node{
171 | 		Text: parser.text[start:end],
172 | 		Kids: make([]*peg.Node, len(kids)),
173 | 	}
174 | 	copy(node.Kids, kids)
175 | 	return node
176 | }
177 | 
178 | func _leaf(parser *_Parser, start, end int) *peg.Node {
179 | 	return &peg.Node{Text: parser.text[start:end]}
180 | }
181 | 
182 | // A no-op function to mark a variable as used.
183 | func use(interface{}) {}
184 | 
185 | func _ExprAccepts(parser *_Parser, start int) (deltaPos, deltaErr int) {
186 | 	var labels [2]string
187 | 	use(labels)
188 | 	if dp, de, ok := _memo(parser, _Expr, start); ok {
189 | 		return dp, de
190 | 	}
191 | 	pos, perr := start, -1
192 | 	// letter:[a] {…}/letter:[b] {…}
193 | 	{
194 | 		pos3 := pos
195 | 		// action
196 | 		// letter:[a]
197 | 		{
198 | 			pos5 := pos
199 | 			// [a]
200 | 			if r, w := _next(parser, pos); r != 'a' {
201 | 				perr = _max(perr, pos)
202 | 				goto fail4
203 | 			} else {
204 | 				pos += w
205 | 			}
206 | 			labels[0] = parser.text[pos5:pos]
207 | 		}
208 | 		goto ok0
209 | 	fail4:
210 | 		pos = pos3
211 | 		// action
212 | 		// letter:[b]
213 | 		{
214 | 			pos7 := pos
215 | 			// [b]
216 | 			if r, w := _next(parser, pos); r != 'b' {
217 | 				perr = _max(perr, pos)
218 | 				goto fail6
219 | 			} else {
220 | 				pos += w
221 | 			}
222 | 			labels[1] = parser.text[pos7:pos]
223 | 		}
224 | 		goto ok0
225 | 	fail6:
226 | 		pos = pos3
227 | 		goto fail
228 | 	ok0:
229 | 	}
230 | 	return _memoize(parser, _Expr, start, pos, perr)
231 | fail:
232 | 	return _memoize(parser, _Expr, start, -1, perr)
233 | }
234 | 
235 | func _ExprNode(parser *_Parser, start int) (int, *peg.Node) {
236 | 	var labels [2]string
237 | 	use(labels)
238 | 	dp := parser.deltaPos[start][_Expr]
239 | 	if dp < 0 {
240 | 		return -1, nil
241 | 	}
242 | 	key := _key{start: start, rule: _Expr}
243 | 	node := parser.node[key]
244 | 	if node != nil {
245 | 		return start + int(dp-1), node
246 | 	}
247 | 	pos := start
248 | 	node = &peg.Node{Name: "Expr"}
249 | 	// letter:[a] {…}/letter:[b] {…}
250 | 	{
251 | 		pos3 := pos
252 | 		nkids1 := len(node.Kids)
253 | 		// action
254 | 		// letter:[a]
255 | 		{
256 | 			pos5 := pos
257 | 			// [a]
258 | 			if r, w := _next(parser, pos); r != 'a' {
259 | 				goto fail4
260 | 			} else {
261 | 				node.Kids = append(node.Kids, _leaf(parser, pos, pos+w))
262 | 				pos += w
263 | 			}
264 | 			labels[0] = parser.text[pos5:pos]
265 | 		}
266 | 		goto ok0
267 | 	fail4:
268 | 		node.Kids = node.Kids[:nkids1]
269 | 		pos = pos3
270 | 		// action
271 | 		// letter:[b]
272 | 		{
273 | 			pos7 := pos
274 | 			// [b]
275 | 			if r, w := _next(parser, pos); r != 'b' {
276 | 				goto fail6
277 | 			} else {
278 | 				node.Kids = append(node.Kids, _leaf(parser, pos, pos+w))
279 | 				pos += w
280 | 			}
281 | 			labels[1] = parser.text[pos7:pos]
282 | 		}
283 | 		goto ok0
284 | 	fail6:
285 | 		node.Kids = node.Kids[:nkids1]
286 | 		pos = pos3
287 | 		goto fail
288 | 	ok0:
289 | 	}
290 | 	node.Text = parser.text[start:pos]
291 | 	parser.node[key] = node
292 | 	return pos, node
293 | fail:
294 | 	return -1, nil
295 | }
296 | 
297 | func _ExprFail(parser *_Parser, start, errPos int) (int, *peg.Fail) {
298 | 	var labels [2]string
299 | 	use(labels)
300 | 	pos, failure := _failMemo(parser, _Expr, start, errPos)
301 | 	if failure != nil {
302 | 		return pos, failure
303 | 	}
304 | 	failure = &peg.Fail{
305 | 		Name: "Expr",
306 | 		Pos:  int(start),
307 | 	}
308 | 	key := _key{start: start, rule: _Expr}
309 | 	// letter:[a] {…}/letter:[b] {…}
310 | 	{
311 | 		pos3 := pos
312 | 		// action
313 | 		// letter:[a]
314 | 		{
315 | 			pos5 := pos
316 | 			// [a]
317 | 			if r, w := _next(parser, pos); r != 'a' {
318 | 				if pos >= errPos {
319 | 					failure.Kids = append(failure.Kids, &peg.Fail{
320 | 						Pos:  int(pos),
321 | 						Want: "[a]",
322 | 					})
323 | 				}
324 | 				goto fail4
325 | 			} else {
326 | 				pos += w
327 | 			}
328 | 			labels[0] = parser.text[pos5:pos]
329 | 		}
330 | 		goto ok0
331 | 	fail4:
332 | 		pos = pos3
333 | 		// action
334 | 		// letter:[b]
335 | 		{
336 | 			pos7 := pos
337 | 			// [b]
338 | 			if r, w := _next(parser, pos); r != 'b' {
339 | 				if pos >= errPos {
340 | 					failure.Kids = append(failure.Kids, &peg.Fail{
341 | 						Pos:  int(pos),
342 | 						Want: "[b]",
343 | 					})
344 | 				}
345 | 				goto fail6
346 | 			} else {
347 | 				pos += w
348 | 			}
349 | 			labels[1] = parser.text[pos7:pos]
350 | 		}
351 | 		goto ok0
352 | 	fail6:
353 | 		pos = pos3
354 | 		goto fail
355 | 	ok0:
356 | 	}
357 | 	parser.fail[key] = failure
358 | 	return pos, failure
359 | fail:
360 | 	parser.fail[key] = failure
361 | 	return -1, failure
362 | }
363 | 
364 | func _ExprAction(parser *_Parser, start int) (int, *string) {
365 | 	var labels [2]string
366 | 	use(labels)
367 | 	var label0 string
368 | 	var label1 string
369 | 	dp := parser.deltaPos[start][_Expr]
370 | 	if dp < 0 {
371 | 		return -1, nil
372 | 	}
373 | 	key := _key{start: start, rule: _Expr}
374 | 	n := parser.act[key]
375 | 	if n != nil {
376 | 		n := n.(string)
377 | 		return start + int(dp-1), &n
378 | 	}
379 | 	var node string
380 | 	pos := start
381 | 	// letter:[a] {…}/letter:[b] {…}
382 | 	{
383 | 		pos3 := pos
384 | 		var node2 string
385 | 		// action
386 | 		{
387 | 			start5 := pos
388 | 			// letter:[a]
389 | 			{
390 | 				pos6 := pos
391 | 				// [a]
392 | 				if r, w := _next(parser, pos); r != 'a' {
393 | 					goto fail4
394 | 				} else {
395 | 					label0 = parser.text[pos : pos+w]
396 | 					pos += w
397 | 				}
398 | 				labels[0] = parser.text[pos6:pos]
399 | 			}
400 | 			node = func(
401 | 				start, end int, letter string) string {
402 | 				fmt.Printf("a=[%s]\n", letter)
403 | 				return string(letter)
404 | 			}(
405 | 				start5, pos, label0)
406 | 		}
407 | 		goto ok0
408 | 	fail4:
409 | 		node = node2
410 | 		pos = pos3
411 | 		// action
412 | 		{
413 | 			start8 := pos
414 | 			// letter:[b]
415 | 			{
416 | 				pos9 := pos
417 | 				// [b]
418 | 				if r, w := _next(parser, pos); r != 'b' {
419 | 					goto fail7
420 | 				} else {
421 | 					label1 = parser.text[pos : pos+w]
422 | 					pos += w
423 | 				}
424 | 				labels[1] = parser.text[pos9:pos]
425 | 			}
426 | 			node = func(
427 | 				start, end int, letter string) string {
428 | 				fmt.Printf("b=[%s]\n", letter)
429 | 				return string(letter)
430 | 			}(
431 | 				start8, pos, label1)
432 | 		}
433 | 		goto ok0
434 | 	fail7:
435 | 		node = node2
436 | 		pos = pos3
437 | 		goto fail
438 | 	ok0:
439 | 	}
440 | 	parser.act[key] = node
441 | 	return pos, &node
442 | fail:
443 | 	return -1, nil
444 | }
445 | 


--------------------------------------------------------------------------------
/example/label_names/label_names.peggy:
--------------------------------------------------------------------------------
 1 | {
 2 | // Test labels with the same name but in different choice branches.
 3 | // 	peggy -o label_names.go label_names.peggy
 4 | package main
 5 | 
 6 | import (
 7 | 	"bufio"
 8 | 	"fmt"
 9 | 	"os"
10 | 
11 | 	"github.com/eaburns/peggy/peg"
12 | )
13 | 
14 | func main() {
15 | 	scanner := bufio.NewScanner(os.Stdin)
16 | 	for scanner.Scan() {
17 | 		line := scanner.Text()
18 | 		p, err := _NewParser(line)
19 | 		if err != nil {
20 | 			fmt.Println(err)
21 | 			os.Exit(1)
22 | 		}
23 | 		if pos, perr := _ExprAccepts(p, 0); pos < 0 {
24 | 			_, fail := _ExprFail(p, 0 ,perr)
25 | 			fmt.Println(peg.SimpleError(line, fail))
26 | 			continue
27 | 		}
28 | 		_, result := _ExprAction(p, 0)
29 | 		fmt.Println(*result)
30 | 	}
31 | 	if err := scanner.Err(); err != nil {
32 | 		fmt.Println(err)
33 | 		os.Exit(1)
34 | 	}
35 | }
36 | }
37 | 
38 | Expr <-
39 | 	letter:[a] { fmt.Printf("a=[%s]\n", letter); return string(letter) } /
40 | 	letter:[b] { fmt.Printf("b=[%s]\n", letter); return string(letter)  }
41 | 


--------------------------------------------------------------------------------
/gen.go:
--------------------------------------------------------------------------------
   1 | // Copyright 2017 The Peggy Authors
   2 | //
   3 | // Use of this source code is governed by a BSD-style
   4 | // license that can be found in the LICENSE file or at
   5 | // https://developers.google.com/open-source/licenses/bsd.
   6 | 
   7 | package main
   8 | 
   9 | import (
  10 | 	"bytes"
  11 | 	"errors"
  12 | 	"go/format"
  13 | 	"go/parser"
  14 | 	"go/token"
  15 | 	"io"
  16 | 	"os"
  17 | 	"reflect"
  18 | 	"strconv"
  19 | 	"text/template"
  20 | )
  21 | 
  22 | // Generate generates a parser for the rules,
  23 | // using a default Config:
  24 | // 	Config{Prefix: "_"}
  25 | func Generate(w io.Writer, file string, grammar *Grammar) error {
  26 | 	return Config{Prefix: "_"}.Generate(w, file, grammar)
  27 | }
  28 | 
  29 | // A Config specifies code generation options.
  30 | type Config struct {
  31 | 	Prefix string
  32 | }
  33 | 
  34 | // Generate generates a parser for the rules.
  35 | func (c Config) Generate(w io.Writer, file string, gr *Grammar) error {
  36 | 	b := bytes.NewBuffer(nil)
  37 | 	if err := writePrelude(b, file, gr); err != nil {
  38 | 		return err
  39 | 	}
  40 | 	if err := writeDecls(b, c, gr); err != nil {
  41 | 		return err
  42 | 	}
  43 | 	for _, r := range gr.CheckedRules {
  44 | 		if err := writeRule(b, c, r); err != nil {
  45 | 			return err
  46 | 		}
  47 | 	}
  48 | 	return gofmt(w, b.String())
  49 | }
  50 | 
  51 | func gofmt(w io.Writer, s string) error {
  52 | 	fset := token.NewFileSet()
  53 | 	root, err := parser.ParseFile(fset, "", s, parser.ParseComments)
  54 | 	if err != nil {
  55 | 		io.WriteString(os.Stderr, s)
  56 | 		io.WriteString(w, s)
  57 | 		return err
  58 | 	}
  59 | 	if err := format.Node(w, fset, root); err != nil {
  60 | 		io.WriteString(w, s)
  61 | 		return err
  62 | 	}
  63 | 	return nil
  64 | }
  65 | 
  66 | func writePrelude(w io.Writer, file string, gr *Grammar) error {
  67 | 	if gr.Prelude == nil {
  68 | 		return nil
  69 | 	}
  70 | 	_, err := io.WriteString(w, gr.Prelude.String())
  71 | 	return err
  72 | }
  73 | 
  74 | func writeDecls(w io.Writer, c Config, gr *Grammar) error {
  75 | 	tmp, err := template.New("Decls").Parse(declsTemplate)
  76 | 	if err != nil {
  77 | 		return err
  78 | 	}
  79 | 	return tmp.Execute(w, map[string]interface{}{
  80 | 		"Config":  c,
  81 | 		"Grammar": gr,
  82 | 	})
  83 | }
  84 | 
  85 | func writeRule(w io.Writer, c Config, r *Rule) error {
  86 | 	funcs := map[string]interface{}{
  87 | 		"gen":   gen,
  88 | 		"quote": strconv.Quote,
  89 | 		"makeAcceptState": func(r *Rule) state {
  90 | 			return state{
  91 | 				Config:      c,
  92 | 				Rule:        r,
  93 | 				n:           new(int),
  94 | 				AcceptsPass: true,
  95 | 			}
  96 | 		},
  97 | 		"makeNodeState": func(r *Rule) state {
  98 | 			return state{
  99 | 				Config:   c,
 100 | 				Rule:     r,
 101 | 				n:        new(int),
 102 | 				NodePass: true,
 103 | 			}
 104 | 		},
 105 | 		"makeFailState": func(r *Rule) state {
 106 | 			return state{
 107 | 				Config:   c,
 108 | 				Rule:     r,
 109 | 				n:        new(int),
 110 | 				FailPass: true,
 111 | 			}
 112 | 		},
 113 | 		"makeActionState": func(r *Rule) state {
 114 | 			return state{
 115 | 				Config:     c,
 116 | 				Rule:       r,
 117 | 				n:          new(int),
 118 | 				ActionPass: true,
 119 | 			}
 120 | 		},
 121 | 	}
 122 | 	data := map[string]interface{}{
 123 | 		"Config":       c,
 124 | 		"Rule":         r,
 125 | 		"GenActions":   *genActions,
 126 | 		"GenParseTree": *genParseTree,
 127 | 	}
 128 | 	tmp, err := template.New("rule").Parse(ruleTemplate)
 129 | 	if err != nil {
 130 | 		return err
 131 | 	}
 132 | 	for _, ts := range [][2]string{
 133 | 		{"ruleAccepts", ruleAccepts},
 134 | 		{"ruleNode", ruleNode},
 135 | 		{"ruleFail", ruleFail},
 136 | 		{"stringLabels", stringLabels},
 137 | 		{"ruleAction", ruleAction},
 138 | 	} {
 139 | 		name, text := ts[0], ts[1]
 140 | 		tmp, err = tmp.New(name).Funcs(funcs).Parse(text)
 141 | 		if err != nil {
 142 | 			return err
 143 | 		}
 144 | 	}
 145 | 	return tmp.ExecuteTemplate(w, "rule", data)
 146 | }
 147 | 
 148 | type state struct {
 149 | 	Config
 150 | 	Rule *Rule
 151 | 	Expr Expr
 152 | 	Fail string
 153 | 	// Node is the ident into which to assign action-pass value, or "".
 154 | 	Node string
 155 | 	n    *int
 156 | 	// AcceptsPass indicates whether to generate the accepts pass.
 157 | 	AcceptsPass bool
 158 | 	// NodePass indicates whether to generate the node pass.
 159 | 	NodePass bool
 160 | 	// FailPass indicates whether to generate the error pass.
 161 | 	FailPass bool
 162 | 	// ActionPass indicates whether to generate the action pass.
 163 | 	ActionPass bool
 164 | }
 165 | 
 166 | func (s state) id(str string) string {
 167 | 	(*s.n)++
 168 | 	return str + strconv.Itoa(*s.n-1)
 169 | }
 170 | 
 171 | func gen(parentState state, expr Expr, node, fail string) (string, error) {
 172 | 	t := reflect.TypeOf(expr)
 173 | 	tmpString, ok := templates[reflect.TypeOf(expr)]
 174 | 	if !ok {
 175 | 		return "", errors.New("gen not found: " + t.String())
 176 | 	}
 177 | 	funcs := map[string]interface{}{
 178 | 		"quote":     strconv.Quote,
 179 | 		"quoteRune": strconv.QuoteRune,
 180 | 		"id":        parentState.id,
 181 | 		"gen":       gen,
 182 | 		"last":      func(i int, exprs []Expr) bool { return i == len(exprs)-1 },
 183 | 	}
 184 | 	tmp, err := template.New(t.String()).Funcs(funcs).Parse(tmpString)
 185 | 	if err != nil {
 186 | 		return "", err
 187 | 	}
 188 | 	if err := addGlobalTemplates(tmp); err != nil {
 189 | 		return "", err
 190 | 	}
 191 | 	b := bytes.NewBuffer(nil)
 192 | 	state := parentState
 193 | 	state.Expr = expr
 194 | 	state.Fail = fail
 195 | 	state.Node = node
 196 | 	err = tmp.Execute(b, state)
 197 | 	return b.String(), err
 198 | }
 199 | 
 200 | var globalTemplates = [][2]string{
 201 | 	{"charClassCondition", charClassCondition},
 202 | }
 203 | 
 204 | func addGlobalTemplates(tmp *template.Template) error {
 205 | 	for _, p := range globalTemplates {
 206 | 		var err error
 207 | 		if tmp, err = tmp.New(p[0]).Parse(p[1]); err != nil {
 208 | 			return err
 209 | 		}
 210 | 	}
 211 | 	return nil
 212 | }
 213 | 
 214 | // A note on formatting in Expr templates
 215 | //
 216 | // gofmt properly fixes any horizontal spacing issues.
 217 | // However, while it eliminates duplicate empty lines,
 218 | // it does not eliminate empty lines.
 219 | // For example, it will convert a sequence of 2 or more empty lines
 220 | // into a single empty line, but it will not remove the empty line.
 221 | // So it's important to handle newlines propertly
 222 | // to maintain a nice, consistent formatting.
 223 | //
 224 | // There are two rules:
 225 | // 	1) Templates must end with a newline, or the codegen will be invalid.
 226 | // 	2) Templates should not begin with an newline, or the codegen will be ugly.
 227 | 
 228 | var declsTemplate = `
 229 | 	{{$pre := $.Config.Prefix -}}
 230 | 
 231 | 	const (
 232 | 		{{range $r := $.Grammar.CheckedRules -}}
 233 | 			{{$pre}}{{$r.Name.Ident}} int = {{$r.N}}
 234 | 		{{end}}
 235 | 		{{$pre}}N int = {{len $.Grammar.CheckedRules}}
 236 | 	)
 237 | 
 238 | 	type {{$pre}}Parser struct {
 239 | 		text string
 240 | 		deltaPos [][{{$pre}}N]int32
 241 | 		deltaErr [][{{$pre}}N]int32
 242 | 		node map[{{$pre}}key]*peg.Node
 243 | 		fail map[{{$pre}}key]*peg.Fail
 244 | 		act map[{{$pre}}key]interface{}
 245 | 		lastFail int
 246 | 		data interface{}
 247 | 	}
 248 | 
 249 | 	type {{$pre}}key struct {
 250 | 		start int
 251 | 		rule int
 252 | 	}
 253 | 
 254 | 	type tooBigError struct{}
 255 | 	func (tooBigError) Error() string { return "input is too big" }
 256 | 
 257 | 	func {{$pre}}NewParser(text string) (*{{$pre}}Parser, error) {
 258 | 		n := len(text)+1
 259 | 		if n < 0 {
 260 | 			return nil, tooBigError{}
 261 | 		}
 262 | 		p := &{{$pre}}Parser{
 263 | 			text: text,
 264 | 			deltaPos: make([][{{$pre}}N]int32, n),
 265 | 			deltaErr: make([][{{$pre}}N]int32, n),
 266 | 			node: make(map[{{$pre}}key]*peg.Node),
 267 | 			fail: make(map[{{$pre}}key]*peg.Fail),
 268 | 			act: make(map[{{$pre}}key]interface{}),
 269 | 		}
 270 | 		return p, nil
 271 | 	}
 272 | 
 273 | 	func {{$pre}}max(a, b int) int {
 274 | 		if a > b {
 275 | 			return a
 276 | 		}
 277 | 		return b
 278 | 	}
 279 | 
 280 | 	func {{$pre}}memoize(parser *{{$pre}}Parser, rule, start, pos, perr int) (int, int) {
 281 | 		parser.lastFail = perr
 282 | 		derr := perr - start
 283 | 		parser.deltaErr[start][rule] = int32(derr+1)
 284 | 		if pos >= 0 {
 285 | 			dpos := pos - start
 286 | 			parser.deltaPos[start][rule] = int32(dpos + 1)
 287 | 			return dpos, derr
 288 | 		}
 289 | 		parser.deltaPos[start][rule] = -1
 290 | 		return -1, derr
 291 | 	}
 292 | 
 293 | 	func {{$pre}}memo(parser *{{$pre}}Parser, rule, start int) (int, int, bool) {
 294 | 		dp := parser.deltaPos[start][rule]
 295 | 		if dp == 0 {
 296 | 			return 0, 0, false
 297 | 		}
 298 | 		if dp > 0 {
 299 | 			dp--
 300 | 		}
 301 | 		de := parser.deltaErr[start][rule] - 1
 302 | 		return int(dp), int(de), true
 303 | 	}
 304 | 
 305 | 	func {{$pre}}failMemo(parser *{{$pre}}Parser, rule, start, errPos int) (int, *peg.Fail) {
 306 | 		if start > parser.lastFail {
 307 | 			return -1, &peg.Fail{}
 308 | 		}
 309 | 		dp := parser.deltaPos[start][rule]
 310 | 		de := parser.deltaErr[start][rule]
 311 | 		if start+int(de-1) < errPos {
 312 | 			if dp > 0 {
 313 | 				return start + int(dp-1), &peg.Fail{}
 314 | 			}
 315 | 			return -1, &peg.Fail{}
 316 | 		}
 317 | 		f := parser.fail[_key{start: start, rule: rule}]
 318 | 		if dp < 0 && f != nil {
 319 | 			return -1, f
 320 | 		}
 321 | 		if dp > 0 && f != nil {
 322 | 			return start + int(dp-1), f
 323 | 		}
 324 | 		return start, nil
 325 | 	}
 326 | 
 327 | 	func {{$pre}}accept(parser *{{$pre}}Parser, f func(*{{$pre}}Parser, int) (int, int), pos, perr *int) bool {
 328 | 		dp, de := f(parser, *pos)
 329 | 		*perr = _max(*perr, *pos+de)
 330 | 		if dp < 0 {
 331 | 			return false
 332 | 		}
 333 | 		*pos += dp
 334 | 		return true
 335 | 	}
 336 | 
 337 | 	func {{$pre}}node(parser *{{$pre}}Parser, f func(*{{$pre}}Parser, int) (int, *peg.Node), node *peg.Node, pos *int) bool {
 338 | 		p, kid := f(parser, *pos)
 339 | 		if kid == nil {
 340 | 			return false
 341 | 		}
 342 | 		node.Kids = append(node.Kids, kid)
 343 | 		*pos = p
 344 | 		return true
 345 | 	}
 346 | 
 347 | 	func {{$pre}}fail(parser *{{$pre}}Parser, f func(*{{$pre}}Parser, int, int) (int, *peg.Fail), errPos int, node *peg.Fail, pos *int) bool {
 348 | 		p, kid := f(parser, *pos, errPos)
 349 | 		if kid.Want != "" || len(kid.Kids) > 0 {
 350 | 			node.Kids = append(node.Kids, kid)
 351 | 		}
 352 | 		if p < 0 {
 353 | 			return false
 354 | 		}
 355 | 		*pos = p
 356 | 		return true
 357 | 	}
 358 | 
 359 | 	func {{$pre}}next(parser *{{$pre}}Parser, pos int) (rune, int) {
 360 | 		r, w := peg.DecodeRuneInString(parser.text[pos:])
 361 | 		return r, w
 362 | 	}
 363 | 
 364 | 	func {{$pre}}sub(parser *{{$pre}}Parser, start, end int, kids []*peg.Node) *peg.Node {
 365 | 		node := &peg.Node{
 366 | 			Text: parser.text[start:end],
 367 | 			Kids: make([]*peg.Node, len(kids)),
 368 | 		}
 369 | 		copy(node.Kids, kids)
 370 | 		return node
 371 | 	}
 372 | 
 373 | 	func {{$pre}}leaf(parser *{{$pre}}Parser, start, end int) *peg.Node {
 374 | 		return &peg.Node{Text: parser.text[start:end]}
 375 | 	}
 376 | 
 377 | 	// A no-op function to mark a variable as used.
 378 | 	func use(interface{}) {}
 379 | `
 380 | 
 381 | // templates contains a mapping from Expr types to their templates.
 382 | // These templates parse the input text and compute
 383 | // for each <rule, pos> pair encountered by the parse,
 384 | // the position immediately following the text accepted by the rule,
 385 | // or the position of the furthest error encountered by the rule.
 386 | //
 387 | // When generating the parse tree pass,
 388 | // the templates also add peg.Nodes to the kids slice.
 389 | //
 390 | // Variables for use by the templates:
 391 | // 	parser is the *Parser.
 392 | // 		parser.text is the input text.
 393 | // 	pos is the byte offset into parser.text of where to begin parsing.
 394 | // 		If the Expr fails to parse, pos must be set to the position of the error.
 395 | // 		If if the Expr succeeds to parse, pos must be set
 396 | // 		to the position just after the accepted text.
 397 | //
 398 | // On the accepts pass these variables are also defined:
 399 | // 	perr is the position of the max error position found so far.
 400 | // 		It is only defined if Rule.Expr.CanFail.
 401 | // 		It is initialized to -1 at the beginning of the parse.
 402 | // 		It is updated by Choice nodes when branches fail,
 403 | // 		and by rules when their entire parse fails.
 404 | // 	ok is a scratch boolean variable.
 405 | // 		It may be either true or false before and after each Expr template.
 406 | // 		Each template that wants to use ok must set it before using it.
 407 | //
 408 | // On the node tree pass these variables are also defined:
 409 | // 	node is the *peg.Node of the Rule being parsed.
 410 | //
 411 | // On the action tree pass these variables are also defined:
 412 | // 	node is an interface{} containing the current action tree node value.
 413 | //
 414 | // On the fail tree pass these variables are also defined:
 415 | // 	failure is the *peg.Fail of the Rule being parsed.
 416 | // 	errPos is the position before which Fail nodes are not generated.
 417 | var templates = map[reflect.Type]string{
 418 | 	reflect.TypeOf(&Choice{}):    choiceTemplate,
 419 | 	reflect.TypeOf(&Action{}):    actionTemplate,
 420 | 	reflect.TypeOf(&Sequence{}):  sequenceTemplate,
 421 | 	reflect.TypeOf(&LabelExpr{}): labelExprTemplate,
 422 | 	reflect.TypeOf(&PredExpr{}):  predExprTemplate,
 423 | 	reflect.TypeOf(&RepExpr{}):   repExprTemplate,
 424 | 	reflect.TypeOf(&OptExpr{}):   optExprTemplate,
 425 | 	reflect.TypeOf(&SubExpr{}):   subExprTemplate,
 426 | 	reflect.TypeOf(&PredCode{}):  predCodeTemplate,
 427 | 	reflect.TypeOf(&Ident{}):     identTemplate,
 428 | 	reflect.TypeOf(&Literal{}):   literalTemplate,
 429 | 	reflect.TypeOf(&Any{}):       anyTemplate,
 430 | 	reflect.TypeOf(&CharClass{}): charClassTemplate,
 431 | }
 432 | 
 433 | var ruleTemplate = `
 434 | 	{{template "ruleAccepts" $}}
 435 | 	{{if $.GenParseTree -}}
 436 | 		{{template "ruleNode" $}}
 437 | 	{{end -}}
 438 | 	{{template "ruleFail" $}}
 439 | 	{{if $.GenActions -}}
 440 | 		{{template "ruleAction" $}}
 441 | 	{{end -}}
 442 | `
 443 | 
 444 | var stringLabels = `
 445 | 	{{- if $.Rule.Labels -}}
 446 | 		var labels [{{len $.Rule.Labels}}]string
 447 | 		use(labels)
 448 | 	{{- end -}}
 449 | `
 450 | 
 451 | var ruleAccepts = `
 452 | 	{{$pre := $.Config.Prefix -}}
 453 | 	{{- $id := $.Rule.Name.Ident -}}
 454 | 	func {{$pre}}{{$id}}Accepts(parser *{{$pre}}Parser, start int) (deltaPos, deltaErr int) {
 455 | 		{{- template "stringLabels" $}}
 456 | 		if dp, de, ok := {{$pre}}memo(parser, {{$pre}}{{$id}}, start); ok {
 457 | 			return dp, de
 458 | 		}
 459 | 		pos, perr := start, -1
 460 | 		{{gen (makeAcceptState $.Rule) $.Rule.Expr "" "fail" -}}
 461 | 
 462 | 		{{if $.Rule.ErrorName -}}
 463 | 			perr = start
 464 | 		{{end -}}
 465 | 		return {{$pre}}memoize(parser, {{$pre}}{{$id}}, start, pos, perr)
 466 | 	{{if $.Rule.Expr.CanFail -}}
 467 | 	fail:
 468 | 		return {{$pre}}memoize(parser, {{$pre}}{{$id}}, start, -1, perr)
 469 | 	{{end -}}
 470 | 	}
 471 | `
 472 | 
 473 | var ruleNode = `
 474 | 	{{$pre := $.Config.Prefix -}}
 475 | 	{{- $id := $.Rule.Name.Ident -}}
 476 | 	{{- $name := $.Rule.Name.String -}}
 477 | 	func {{$pre}}{{$id}}Node(parser *{{$pre}}Parser, start int) (int, *peg.Node) {
 478 | 		{{- template "stringLabels" $}}
 479 | 		dp := parser.deltaPos[start][{{$pre}}{{$id}}]
 480 | 		if dp < 0 {
 481 | 			return -1, nil
 482 | 		}
 483 | 		key := {{$pre}}key{start: start, rule: {{$pre}}{{$id}}}
 484 | 		node := parser.node[key]
 485 | 		if node != nil {
 486 | 			return start + int(dp - 1), node
 487 | 		}
 488 | 		pos := start
 489 | 		node = &peg.Node{Name: {{quote $name}}}
 490 | 		{{gen (makeNodeState $.Rule) $.Rule.Expr "" "fail" -}}
 491 | 
 492 | 		node.Text = parser.text[start:pos]
 493 | 		parser.node[key] = node
 494 | 		return pos, node
 495 | 	{{if $.Rule.Expr.CanFail -}}
 496 | 	fail:
 497 | 		return -1, nil
 498 | 	{{end -}}
 499 | 	}
 500 | `
 501 | 
 502 | var ruleFail = `
 503 | 	{{$pre := $.Config.Prefix -}}
 504 | 	{{- $id := $.Rule.Name.Ident -}}
 505 | 	func {{$pre}}{{$id}}Fail(parser *{{$pre}}Parser, start, errPos int) (int, *peg.Fail) {
 506 | 		{{- template "stringLabels" $}}
 507 | 		pos, failure := {{$pre}}failMemo(parser, {{$pre}}{{$id}}, start, errPos)
 508 | 		if failure != nil {
 509 | 			return pos, failure
 510 | 		}
 511 | 		failure = &peg.Fail{
 512 | 			Name: {{quote $id}},
 513 | 			Pos: int(start),
 514 | 		}
 515 | 		key := {{$pre}}key{start: start, rule: {{$pre}}{{$id}}}
 516 | 		{{gen (makeFailState $.Rule) $.Rule.Expr "" "fail" -}}
 517 | 
 518 | 		{{if $.Rule.ErrorName -}}
 519 | 			failure.Kids = nil
 520 | 		{{end -}}
 521 | 		parser.fail[key] = failure
 522 | 		return pos, failure
 523 | 	{{if $.Rule.Expr.CanFail -}}
 524 | 	fail:
 525 | 		{{if $.Rule.ErrorName -}}
 526 | 			failure.Kids = nil
 527 | 			failure.Want = {{quote $.Rule.ErrorName.String}}
 528 | 		{{end -}}
 529 | 		parser.fail[key] = failure
 530 | 		return -1, failure
 531 | 	{{end -}}
 532 | 	}
 533 | `
 534 | 
 535 | var ruleAction = `
 536 | 	{{$pre := $.Config.Prefix -}}
 537 | 	{{- $id := $.Rule.Name.Ident -}}
 538 | 	{{- $type := $.Rule.Expr.Type -}}
 539 | 	func {{$pre}}{{$id}}Action(parser *{{$pre}}Parser, start int) (int, *{{$type}}) {
 540 | 		{{- template "stringLabels" $}}
 541 | 		{{if $.Rule.Labels -}}
 542 | 			{{range $l := $.Rule.Labels -}}
 543 | 				var label{{$l.N}} {{$l.Type}}
 544 | 			{{end}}
 545 | 		{{- end -}}
 546 | 		dp := parser.deltaPos[start][{{$pre}}{{$id}}]
 547 | 		if dp < 0 {
 548 | 			return -1, nil
 549 | 		}
 550 | 		key := {{$pre}}key{start: start, rule: {{$pre}}{{$id}}}
 551 | 		n := parser.act[key]
 552 | 		if n != nil {
 553 | 			n := n.({{$type}})
 554 | 			return start + int(dp - 1), &n
 555 | 		}
 556 | 		var node {{$type}}
 557 | 		pos := start
 558 | 		{{gen (makeActionState $.Rule) $.Rule.Expr "node" "fail" -}}
 559 | 
 560 | 		parser.act[key] = node
 561 | 		return pos,  &node
 562 | 	{{if $.Rule.Expr.CanFail -}}
 563 | 	fail:
 564 | 		return -1, nil
 565 | 	{{end -}}
 566 | 	}
 567 | `
 568 | 
 569 | var choiceTemplate = `// {{$.Expr.String}}
 570 | {
 571 | 	{{- $ok := id "ok" -}}
 572 | 	{{- $nkids := id "nkids" -}}
 573 | 	{{- $node0 := id "node" -}}
 574 | 	{{- $pos0 := id "pos" -}}
 575 | 	{{$pos0}} := pos
 576 | 	{{if $.NodePass -}}
 577 | 		{{$nkids}} := len(node.Kids)
 578 | 	{{else if (and $.Node $.ActionPass) -}}
 579 | 		var {{$node0}} {{$.Expr.Type}}
 580 | 	{{end -}}
 581 | 	{{- range $i, $subExpr := $.Expr.Exprs -}}
 582 | 		{{- $fail := id "fail" -}}
 583 | 		{{gen $ $subExpr $.Node $fail -}}
 584 | 
 585 | 		{{if $subExpr.CanFail -}}
 586 | 			goto {{$ok}}
 587 | 			{{$fail}}:
 588 | 				{{if $.NodePass -}}
 589 | 					node.Kids = node.Kids[:{{$nkids}}]
 590 | 				{{else if (and $.Node $.ActionPass) -}}
 591 | 					{{$.Node}} = {{$node0}}
 592 | 				{{end -}}
 593 | 				pos = {{$pos0}}
 594 | 			{{if last $i $.Expr.Exprs -}}
 595 | 				goto {{$.Fail}}
 596 | 			{{end -}}
 597 | 		{{end -}}
 598 | 	{{end -}}
 599 | 	{{$ok}}:
 600 | }
 601 | `
 602 | 
 603 | var actionTemplate = `// action
 604 | 	{{if $.ActionPass -}}
 605 | 		{
 606 | 			{{$start := id "start" -}}
 607 | 			{{$start}} := pos
 608 | 			{{gen $ $.Expr.Expr "" $.Fail -}}
 609 | 			{{/* TODO: don't put the func in the scope of the rule. */ -}}
 610 | 			{{if $.Node}}{{$.Node}} = {{end}} func(
 611 | 				start, end int,
 612 | 				{{- if $.Expr.Labels -}}
 613 | 					{{range $lexpr := $.Expr.Labels -}}
 614 | 						{{$lexpr.Label}} {{$lexpr.Type}},
 615 | 					{{- end -}}
 616 | 				{{- end -}})
 617 | 				{{- $.Expr.Type}} { {{$.Expr.Code}} }(
 618 | 					{{$start}}, pos,
 619 | 					{{- if $.Expr.Labels -}}
 620 | 						{{range $lexpr := $.Expr.Labels -}}
 621 | 							label{{$lexpr.N}},
 622 | 						{{- end -}}
 623 | 					{{- end -}}
 624 | 			)
 625 | 		}
 626 | 	{{else -}}
 627 | 		{{gen $ $.Expr.Expr "" $.Fail -}}
 628 | 	{{end -}}
 629 | `
 630 | 
 631 | var sequenceTemplate = `// {{$.Expr.String}}
 632 | 	{{$node := id "node" -}}
 633 | 	{{if (and $.ActionPass $.Node (eq $.Expr.Type "string")) -}}
 634 | 		{
 635 | 			var {{$node}} string
 636 | 	{{else if (and $.ActionPass $.Node) -}}
 637 | 		{{$.Node}} = make({{$.Expr.Type}}, {{len $.Expr.Exprs}})
 638 | 	{{end -}}
 639 | 
 640 | 	{{range $i, $subExpr := $.Expr.Exprs -}}
 641 | 		{{if (and $.ActionPass $.Node (eq $.Expr.Type "string")) -}}
 642 | 			{{gen $ $subExpr $node $.Fail -}}
 643 | 			{{$.Node}}, {{$node}} = {{$.Node}}+{{$node}}, ""
 644 | 		{{else if (and $.ActionPass $.Node) -}}
 645 | 			{{gen $ $subExpr (printf "%s[%d]" $.Node $i) $.Fail -}}
 646 | 		{{else -}}
 647 | 			{{gen $ $subExpr "" $.Fail -}}
 648 | 		{{end -}}
 649 | 	{{end -}}
 650 | 
 651 | 	{{if (and $.ActionPass $.Node (eq $.Expr.Type "string")) -}}
 652 | 		}
 653 | 	{{end -}}
 654 | `
 655 | 
 656 | var labelExprTemplate = `// {{$.Expr.String}}
 657 | 	{{$name := $.Expr.Label.String -}}
 658 | 	{{- $pos0 := id "pos" -}}
 659 | 	{{- $subExpr := $.Expr.Expr -}}
 660 | 	{
 661 | 		{{$pos0}} := pos
 662 | 		{{if $.ActionPass -}}
 663 | 			{{gen $ $subExpr (printf "label%d" $.Expr.N) $.Fail -}}
 664 | 			{{if $.Node -}}
 665 | 				{{$.Node}} = label{{$.Expr.N}}
 666 | 			{{end -}}
 667 | 		{{else -}}
 668 | 			{{gen $ $subExpr "" $.Fail -}}
 669 | 		{{end -}}
 670 | 		labels[{{$.Expr.N}}] = parser.text[{{$pos0}}:pos]
 671 | 	}
 672 | `
 673 | 
 674 | var predExprTemplate = `// {{$.Expr.String}}
 675 | {
 676 | 	{{- $pre := $.Config.Prefix -}}
 677 | 	{{- $ok := id "ok" -}}
 678 | 	{{- $subExpr := $.Expr.Expr -}}
 679 | 	{{- $pos0 := id "pos" -}}
 680 | 	{{- $nkids := id "nkids" -}}
 681 | 	{{- $perr0 := id "perr" -}}
 682 | 	{{$pos0}} := pos
 683 | 	{{if $.AcceptsPass -}}
 684 | 		{{$perr0}} := perr
 685 | 	{{else if $.NodePass -}}
 686 | 		{{$nkids}} := len(node.Kids)
 687 | 	{{else if $.FailPass -}}
 688 | 		{{$nkids}} := len(failure.Kids)
 689 | 	{{end -}}
 690 | 
 691 | 	{{- if $.Expr.Neg -}}
 692 | 		{{gen $ $subExpr "" $ok -}}
 693 | 		pos = {{$pos0}}
 694 | 		{{if $.NodePass -}}
 695 | 			node.Kids = node.Kids[:{{$nkids}}]
 696 | 		{{else if $.AcceptsPass -}}
 697 | 			perr = {{$pre}}max({{$perr0}}, pos)
 698 | 		{{else if $.FailPass -}}
 699 | 			failure.Kids = failure.Kids[:{{$nkids}}]
 700 | 			if pos >= errPos {
 701 | 				failure.Kids = append(failure.Kids, &peg.Fail{
 702 | 					Pos: int(pos),
 703 | 					Want: {{quote $.Expr.String}},
 704 | 				})
 705 | 			}
 706 | 		{{end -}}
 707 | 		goto {{$.Fail}}
 708 | 	{{else -}}
 709 | 		{{- $fail := id "fail" -}}
 710 | 		{{gen $ $subExpr "" $fail -}}
 711 | 		goto {{$ok}}
 712 | 		{{$fail}}:
 713 | 			pos = {{$pos0}}
 714 | 			{{if $.AcceptsPass -}}
 715 | 				perr = {{$pre}}max({{$perr0}}, pos)
 716 | 			{{else if $.FailPass -}}
 717 | 				failure.Kids = failure.Kids[:{{$nkids}}]
 718 | 				if pos >= errPos {
 719 | 					failure.Kids = append(failure.Kids, &peg.Fail{
 720 | 						Pos: int(pos),
 721 | 						Want: {{quote $.Expr.String}},
 722 | 					})
 723 | 				}
 724 | 			{{end -}}
 725 | 			goto {{$.Fail}}
 726 | 	{{end -}}
 727 | 
 728 | 	{{$ok}}:
 729 | 	pos = {{$pos0}}
 730 | 	{{if $.AcceptsPass -}}
 731 | 		perr = {{$perr0}}
 732 | 	{{else if $.NodePass -}}
 733 | 		node.Kids = node.Kids[:{{$nkids}}]
 734 | 	{{else if $.FailPass -}}
 735 | 		failure.Kids = failure.Kids[:{{$nkids}}]
 736 | 	{{else if (and $.ActionPass $.Node) -}}
 737 | 		{{$.Node}} = ""
 738 | 	{{end -}}
 739 | }
 740 | `
 741 | 
 742 | var repExprTemplate = `// {{$.Expr.String}}
 743 | 	{{$nkids := id "nkids" -}}
 744 | 	{{$pos0 := id "pos" -}}
 745 | 	{{$node := id "node" -}}
 746 | 	{{- $fail := id "fail" -}}
 747 | 	{{- $subExpr := $.Expr.Expr -}}
 748 | 	{{if eq $.Expr.Op '+' -}}
 749 | 		{{if (and $.ActionPass $.Node) -}}
 750 | 			{
 751 | 			var {{$node}} {{$subExpr.Type}}
 752 | 			{{gen $ $subExpr $node $.Fail -}}
 753 | 			{{if (eq $.Expr.Type "string") -}}
 754 | 				{{$.Node}} += {{$node}}
 755 | 			{{else -}}
 756 | 				{{$.Node}} = append({{$.Node}}, {{$node}})
 757 | 			{{end -}}
 758 | 			}
 759 | 		{{else -}}
 760 | 			{{gen $ $subExpr "" $.Fail -}}
 761 | 		{{end -}}
 762 | 	{{end -}}
 763 | 	for {
 764 | 		{{if $.NodePass -}}
 765 | 			{{$nkids}} := len(node.Kids)
 766 | 		{{end -}}
 767 | 		{{$pos0}} := pos
 768 | 		{{if (and $.ActionPass $.Node) -}}
 769 | 			var {{$node}} {{$subExpr.Type}}
 770 | 			{{gen $ $subExpr $node $fail -}}
 771 | 			{{if (eq $.Expr.Type "string") -}}
 772 | 				{{$.Node}} += {{$node}}
 773 | 			{{else -}}
 774 | 				{{$.Node}} = append({{$.Node}}, {{$node}})
 775 | 			{{end -}}
 776 | 		{{else -}}
 777 | 			{{gen $ $subExpr "" $fail -}}
 778 | 		{{end -}}
 779 | 		continue
 780 | 		{{$fail}}:
 781 | 			{{if $.NodePass -}}
 782 | 				node.Kids = node.Kids[:{{$nkids}}]
 783 | 			{{end -}}
 784 | 			pos = {{$pos0}}
 785 | 			break
 786 | 	}
 787 | `
 788 | 
 789 | var optExprTemplate = `// {{$.Expr.String}}
 790 | 	{{$nkids := id "nkids" -}}
 791 | 	{{$pos0 := id "pos" -}}
 792 | 	{{- $fail := id "fail" -}}
 793 | 	{{- $subExpr := $.Expr.Expr -}}
 794 | 	{{- if $subExpr.CanFail -}}
 795 | 	{
 796 | 		{{if $.NodePass -}}
 797 | 			{{$nkids}} := len(node.Kids)
 798 | 		{{end -}}
 799 | 		{{$pos0}} := pos
 800 | 		{{if (and $.ActionPass $.Node (eq $subExpr.Type "string")) -}}
 801 | 			{{gen $ $subExpr $.Node $fail -}}
 802 | 		{{else if (and $.ActionPass $.Node) -}}
 803 | 			{{$.Node}} = new({{$subExpr.Type}})
 804 | 			{{gen $ $subExpr (printf "*%s" $.Node) $fail -}}
 805 | 		{{else -}}
 806 | 			{{gen $ $subExpr "" $fail -}}
 807 | 		{{end -}}
 808 | 		{{- $ok := id "ok" -}}
 809 | 		goto {{$ok}}
 810 | 		{{$fail}}:
 811 | 			{{if $.NodePass -}}
 812 | 				node.Kids = node.Kids[:{{$nkids}}]
 813 | 			{{else if (and $.ActionPass $.Node (eq $subExpr.Type "string")) -}}
 814 | 				{{$.Node}} = ""
 815 | 			{{else if (and $.ActionPass $.Node) -}}
 816 | 				{{$.Node}} = nil
 817 | 			{{end -}}
 818 | 			pos = {{$pos0}}
 819 | 		{{$ok}}:
 820 | 	}
 821 | 	{{else -}}
 822 | 		{{- /* TODO: disallow this case in check */ -}}
 823 | 		{{gen $ $subExpr $fail -}}
 824 | 	{{- end -}}
 825 | `
 826 | 
 827 | var subExprTemplate = `// {{$.Expr.String}}
 828 | 	{{if $.NodePass -}}
 829 | 	{
 830 | 		{{- $pre := $.Config.Prefix -}}
 831 | 		{{$nkids := id "nkids" -}}
 832 | 		{{$nkids}} := len(node.Kids)
 833 | 		{{$pos0 := id "pos0" -}}
 834 | 		{{$pos0}} := pos
 835 | 		{{gen $ $.Expr.Expr $.Node $.Fail -}}
 836 | 		sub := {{$pre}}sub(parser, {{$pos0}}, pos, node.Kids[{{$nkids}}:])
 837 | 		node.Kids = append(node.Kids[:{{$nkids}}], sub)
 838 | 	}
 839 | 	{{else -}}
 840 | 		{{gen $ $.Expr.Expr $.Node $.Fail -}}
 841 | 	{{end -}}
 842 | `
 843 | 
 844 | // TODO: instead, create a function for each predicate
 845 | // with params that are the parser followed by
 846 | // a string for each defined label.
 847 | // Predicate code shouldn't have access to the label.Kids,
 848 | // because it's undefined for the Accepts and Fail pass.
 849 | // NOTE: kids are OK for actions,
 850 | // because actions are only to be called by the Node pass
 851 | // on a successful parse.
 852 | var predCodeTemplate = `// pred code
 853 | 	if ok := func(
 854 | 		{{- if $.Expr.Labels -}}
 855 | 			{{range $lexpr := $.Expr.Labels -}}
 856 | 				{{$lexpr.Label}} string,
 857 | 			{{- end -}}
 858 | 		{{- end -}}) bool { return {{$.Expr.Code}} }(
 859 | 		{{- if $.Expr.Labels -}}
 860 | 			{{range $lexpr := $.Expr.Labels -}}
 861 | 				labels[{{$lexpr.N}}],
 862 | 			{{- end -}}
 863 | 		{{- end -}}
 864 | 	); {{if not $.Expr.Neg}}!{{end}}ok {
 865 | 		{{if $.AcceptsPass -}}
 866 | 			{{- $pre := $.Config.Prefix -}}
 867 | 			perr = {{$pre}}max(perr, pos)
 868 | 		{{else if $.FailPass -}}
 869 | 			if pos >= errPos {
 870 | 				failure.Kids = append(failure.Kids, &peg.Fail{
 871 | 					Pos: int(pos),
 872 | 					Want:
 873 | 					{{- if $.Expr.Neg}}"!{"{{else}}"&{"{{end}}+
 874 | 					{{- quote $.Expr.Code.String}}+"}",
 875 | 				})
 876 | 			}
 877 | 		{{end -}}
 878 | 		goto {{$.Fail}}
 879 | 	}
 880 | 	{{if (and $.ActionPass $.Node) -}}
 881 | 		{{$.Node}} = ""
 882 | 	{{end -}}
 883 | `
 884 | 
 885 | var identTemplate = `// {{$.Expr.String}}
 886 | 	{{$pre := $.Config.Prefix -}}
 887 | 	{{- $name := $.Expr.Name.Ident -}}
 888 | 	{{if $.AcceptsPass -}}
 889 | 		if !{{$pre}}accept(parser, {{$pre}}{{$name}}Accepts, &pos, &perr) {
 890 | 			goto {{$.Fail}}
 891 | 		}
 892 | 	{{else if $.NodePass -}}
 893 | 		if !{{$pre}}node(parser, {{$pre}}{{$name}}Node, node, &pos) {
 894 | 			goto {{$.Fail}}
 895 | 		}
 896 | 	{{else if $.FailPass -}}
 897 | 		if !{{$pre}}fail(parser, {{$pre}}{{$name}}Fail, errPos, failure, &pos) {
 898 | 			goto {{$.Fail}}
 899 | 		}
 900 | 	{{else if $.ActionPass -}}
 901 | 		if p, n := {{$pre}}{{$name}}Action(parser, pos); n == nil {
 902 | 			goto {{$.Fail}}
 903 | 		} else {
 904 | 			{{if (and $.ActionPass $.Node) -}}
 905 | 				{{$.Node}} = *n
 906 | 			{{end -}}
 907 | 			pos = p
 908 | 		}
 909 | 	{{end -}}
 910 | `
 911 | 
 912 | var literalTemplate = `// {{$.Expr.String}}
 913 | 	{{$want := quote $.Expr.Text.String -}}
 914 | 	{{- $n := len $.Expr.Text.String -}}
 915 | 	if len(parser.text[pos:]) < {{$n}} || parser.text[pos:pos+{{$n}}] != {{$want}} {
 916 | 		{{if $.AcceptsPass -}}
 917 | 			{{- $pre := $.Config.Prefix -}}
 918 | 			perr = {{$pre}}max(perr, pos)
 919 | 		{{else if $.FailPass -}}
 920 | 			if pos >= errPos {
 921 | 				failure.Kids = append(failure.Kids, &peg.Fail{
 922 | 					Pos: int(pos),
 923 | 					Want: {{quote $.Expr.String}},
 924 | 				})
 925 | 			}
 926 | 		{{end -}}
 927 | 		goto {{$.Fail}}
 928 | 	}
 929 | 	{{$pre := $.Config.Prefix -}}
 930 | 	{{if $.NodePass -}}
 931 | 		node.Kids = append(node.Kids, {{$pre}}leaf(parser, pos, pos + {{$n}}))
 932 | 	{{else if (and $.ActionPass $.Node) -}}
 933 | 		{{$.Node}} = parser.text[pos:pos+{{$n}}]
 934 | 	{{end -}}
 935 | 	{{if eq $n 1 -}}
 936 | 		pos++
 937 | 	{{- else -}}
 938 | 		pos += {{$n}}
 939 | 	{{- end}}
 940 | `
 941 | 
 942 | var anyTemplate = `// {{$.Expr.String}}
 943 | 	{{$pre := $.Config.Prefix -}}
 944 | 	{{- /* \uFFFD is utf8.RuneError */ -}}
 945 | 	if r, w := {{$pre}}next(parser, pos); w == 0 || r == '\uFFFD' {
 946 | 		{{if $.AcceptsPass -}}
 947 | 			{{- $pre := $.Config.Prefix -}}
 948 | 			perr = {{$pre}}max(perr, pos)
 949 | 		{{else if $.FailPass -}}
 950 | 			if pos >= errPos {
 951 | 				failure.Kids = append(failure.Kids, &peg.Fail{
 952 | 					Pos: int(pos),
 953 | 					Want: ".",
 954 | 				})
 955 | 			}
 956 | 		{{end -}}
 957 | 		goto {{$.Fail}}
 958 | 	} else {
 959 | 		{{$pre := $.Config.Prefix -}}
 960 | 		{{if $.NodePass -}}
 961 | 			node.Kids = append(node.Kids, {{$pre}}leaf(parser, pos, pos + w))
 962 | 		{{else if (and $.ActionPass $.Node) -}}
 963 | 			{{$.Node}} = parser.text[pos:pos+w]
 964 | 		{{end -}}
 965 | 		pos += w
 966 | 	}
 967 | `
 968 | 
 969 | // charClassCondition emits the if-condition for a character class,
 970 | // assuming that r and w are the rune and its width respectively.
 971 | var charClassCondition = `
 972 | 	{{- /* \uFFFD is utf8.RuneError */ -}}
 973 | 	{{- if $.Expr.Neg -}}w == 0 || r == '\uFFFD' ||{{end}}
 974 | 	{{- range $i, $span := $.Expr.Spans -}}
 975 | 		{{- $first := index $span 0 -}}
 976 | 		{{- $second := index $span 1 -}}
 977 | 		{{- if $.Expr.Neg -}}
 978 | 			{{- if gt $i 0 -}} || {{- end -}}
 979 | 			{{- if eq $first $second -}}
 980 | 				r == {{quoteRune $first}}
 981 | 			{{- else -}}
 982 | 				(r >= {{quoteRune $first}} && r <= {{quoteRune $second}})
 983 | 			{{- end -}}
 984 | 		{{- else -}}
 985 | 			{{- if gt $i 0}} && {{end -}}
 986 | 			{{- if eq $first $second -}}
 987 | 				r != {{quoteRune $first}}
 988 | 			{{- else -}}
 989 | 				(r < {{quoteRune $first}} ||  r > {{quoteRune $second}})
 990 | 			{{- end -}}
 991 | 		{{- end -}}
 992 | 	{{- end -}}
 993 | `
 994 | 
 995 | var charClassTemplate = `// {{$.Expr.String}}
 996 | 	{{$pre := $.Config.Prefix -}}
 997 | 	if r, w := {{$pre}}next(parser, pos);
 998 | 		{{template "charClassCondition" $}} {
 999 | 		{{if $.AcceptsPass -}}
1000 | 			{{- $pre := $.Config.Prefix -}}
1001 | 			perr = {{$pre}}max(perr, pos)
1002 | 		{{else if $.FailPass -}}
1003 | 			if pos >= errPos {
1004 | 				failure.Kids = append(failure.Kids, &peg.Fail{
1005 | 					Pos: int(pos),
1006 | 					Want: {{quote $.Expr.String}},
1007 | 				})
1008 | 			}
1009 | 		{{end -}}
1010 | 		goto {{$.Fail}}
1011 | 	} else {
1012 | 		{{$pre := $.Config.Prefix -}}
1013 | 		{{if $.NodePass -}}
1014 | 			{{$pre := $.Config.Prefix -}}
1015 | 			node.Kids = append(node.Kids, {{$pre}}leaf(parser, pos, pos + w))
1016 | 		{{else if (and $.ActionPass $.Node) -}}
1017 | 			{{$.Node}} = parser.text[pos:pos+w]
1018 | 		{{end -}}
1019 | 		pos += w
1020 | 	}
1021 | `
1022 | 


--------------------------------------------------------------------------------
/go.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The Peggy Authors
  2 | //
  3 | // Use of this source code is governed by a BSD-style
  4 | // license that can be found in the LICENSE file or at
  5 | // https://developers.google.com/open-source/licenses/bsd.
  6 | 
  7 | package main
  8 | 
  9 | import (
 10 | 	"go/ast"
 11 | 	"go/parser"
 12 | 	"go/printer"
 13 | 	"go/scanner"
 14 | 	"go/token"
 15 | 	"strings"
 16 | )
 17 | 
 18 | // ParseGoFile parses go function body statements, returning any syntax errors.
 19 | // The errors contain location information starting from the given Loc.
 20 | func ParseGoFile(loc Loc, code string) error {
 21 | 	_, err := parser.ParseFile(token.NewFileSet(), loc.File, code, 0)
 22 | 	if err == nil {
 23 | 		return nil
 24 | 	}
 25 | 
 26 | 	el, ok := err.(scanner.ErrorList)
 27 | 	if !ok {
 28 | 		return err
 29 | 	}
 30 | 	p := el[0].Pos
 31 | 	loc.Line += p.Line - 1 // -1 because p.Line is 1-based.
 32 | 	if p.Line > 1 {
 33 | 		loc.Col = 1
 34 | 	}
 35 | 	loc.Col += p.Column - 1
 36 | 	return Err(loc, el[0].Msg)
 37 | }
 38 | 
 39 | // ParseGoBody parses go function body statements, returning any syntax errors.
 40 | // The errors contain location information starting from the given Loc.
 41 | func ParseGoBody(loc Loc, code string) (string, error) {
 42 | 	code = "package main; func p() interface{} {\n" + code + "}"
 43 | 	fset := token.NewFileSet()
 44 | 	file, err := parser.ParseFile(fset, loc.File, code, 0)
 45 | 	if err == nil {
 46 | 		return inferType(loc, fset, file)
 47 | 	}
 48 | 
 49 | 	el, ok := err.(scanner.ErrorList)
 50 | 	if !ok {
 51 | 		return "", err
 52 | 	}
 53 | 	p := el[0].Pos
 54 | 	loc.Line += p.Line - 2 // -2 because p.Line is 1-based and the func line.
 55 | 	if p.Line > 2 {
 56 | 		loc.Col = 1
 57 | 	}
 58 | 	loc.Col += p.Column - 1
 59 | 	return "", Err(loc, el[0].Msg)
 60 | }
 61 | 
 62 | // inferType infers the type of a function by considering its first return statement.
 63 | // If the returned expression is:
 64 | // 	* a type conversion, the type is returned.
 65 | // 	* a type assertion, the type is returned.
 66 | // 	* a function literal, the type is returned.
 67 | // 	* a composite literal, the type is returned.
 68 | // 	* an &-composite literal, the type is returned.
 69 | // 	* an int literal, int is returned.
 70 | // 	* a float literal, float64 is returned.
 71 | // 	* a character literal, rune is returned.
 72 | // 	* a string literal, string is returned.
 73 | //
 74 | // If the file does not have exactly one top-level funciton, inferType panics.
 75 | // If the function has no return statement, an error is returned.
 76 | // If the return statement does not have exactly one returned value, an error is returned.
 77 | // If the returned value is not an expression in the list above, an error is returned.
 78 | func inferType(loc Loc, fset *token.FileSet, file *ast.File) (string, error) {
 79 | 	var funcDecl *ast.FuncDecl
 80 | 	for _, decl := range file.Decls {
 81 | 		if d, ok := decl.(*ast.FuncDecl); ok {
 82 | 			if funcDecl != nil {
 83 | 				panic("multiple function declarations")
 84 | 			}
 85 | 			funcDecl = d
 86 | 		}
 87 | 	}
 88 | 	if funcDecl == nil {
 89 | 		panic("no function declarations")
 90 | 	}
 91 | 
 92 | 	var v findReturnVisitor
 93 | 	ast.Walk(&v, funcDecl)
 94 | 	if v.retStmt == nil {
 95 | 		return "", Err(loc, "no return statement")
 96 | 	}
 97 | 	if len(v.retStmt.Results) != 1 {
 98 | 		return "", Err(loc, "must return exactly one value")
 99 | 	}
100 | 
101 | 	var typ interface{}
102 | 	switch e := v.retStmt.Results[0].(type) {
103 | 	case *ast.CallExpr:
104 | 		if len(e.Args) != 1 {
105 | 			var s strings.Builder
106 | 			printer.Fprint(&s, fset, e)
107 | 			return "", Err(loc, "cannot infer type from a function call: "+s.String())
108 | 		}
109 | 		typ = e.Fun
110 | 	case *ast.TypeAssertExpr:
111 | 		typ = e.Type
112 | 	case *ast.FuncLit:
113 | 		typ = e.Type
114 | 	case *ast.CompositeLit:
115 | 		typ = e.Type
116 | 	case *ast.BasicLit:
117 | 		switch e.Kind {
118 | 		case token.INT:
119 | 			return "int", nil
120 | 		case token.FLOAT:
121 | 			return "float64", nil
122 | 		case token.CHAR:
123 | 			return "rune", nil
124 | 		case token.STRING:
125 | 			return "string", nil
126 | 		}
127 | 	case *ast.UnaryExpr:
128 | 		lit, ok := e.X.(*ast.CompositeLit)
129 | 		if !ok || e.Op != token.AND {
130 | 			return "", Err(loc, "cannot infer type")
131 | 		}
132 | 		var s strings.Builder
133 | 		printer.Fprint(&s, fset, lit.Type)
134 | 		return "*" + s.String(), nil
135 | 	default:
136 | 		return "", Err(loc, "cannot infer type")
137 | 	}
138 | 	var s strings.Builder
139 | 	printer.Fprint(&s, fset, typ)
140 | 	return s.String(), nil
141 | }
142 | 
143 | type findReturnVisitor struct {
144 | 	retStmt *ast.ReturnStmt
145 | }
146 | 
147 | func (v *findReturnVisitor) Visit(n ast.Node) ast.Visitor {
148 | 	if r, ok := n.(*ast.ReturnStmt); ok {
149 | 		v.retStmt = r
150 | 		return nil
151 | 	}
152 | 	return v
153 | }
154 | 
155 | // ParseGoExpr parses a go expression, returning any syntax errors.
156 | // The errors contain location information starting from the given Loc.
157 | func ParseGoExpr(loc Loc, code string) error {
158 | 	_, err := parser.ParseExprFrom(token.NewFileSet(), loc.File, code, 0)
159 | 	if err == nil {
160 | 		return nil
161 | 	}
162 | 
163 | 	el, ok := err.(scanner.ErrorList)
164 | 	if !ok {
165 | 		return err
166 | 	}
167 | 	p := el[0].Pos
168 | 	loc.Line += p.Line - 1 // -1 because p.Line is 1-based.
169 | 	if p.Line > 1 {
170 | 		loc.Col = 1
171 | 	}
172 | 	loc.Col += p.Column - 1
173 | 	return Err(loc, el[0].Msg)
174 | }
175 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/eaburns/peggy
2 | 
3 | go 1.13
4 | 
5 | require github.com/eaburns/pretty v1.0.0
6 | 


--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/eaburns/pretty v1.0.0 h1:00W1wrrtMXUSqLPN0txS8j7g9qFXy6nA5vZVqVQOo6w=
2 | github.com/eaburns/pretty v1.0.0/go.mod h1:retcK8A0KEgdmb0nuxhvyxixwCmEPO7SKlK0IJhjg8A=
3 | 


--------------------------------------------------------------------------------
/gok.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | # Copyright 2017 The Peggy Authors
 3 | #
 4 | # Use of this source code is governed by a BSD-style
 5 | # license that can be found in the LICENSE file or at
 6 | # https://developers.google.com/open-source/licenses/bsd.
 7 | 
 8 | #
 9 | # Verifies that go code passes go fmt, go vet, golint, and go test.
10 | #
11 | 
12 | o=$(mktemp tmp.XXXXXXXXXX)
13 | 
14 | fail() {
15 | 	echo Failed
16 | 	cat $o
17 | 	rm $o
18 | 	exit 1
19 | }
20 | 
21 | trap fail INT TERM
22 | 
23 | #echo Generating
24 | #go generate . || fail
25 | 
26 | echo Formatting
27 | gofmt -l $(find . -name '*.go') > $o 2>&1
28 | test $(wc -l $o | awk '{ print $1 }') = "0" || fail
29 | 
30 | echo Vetting
31 | go vet ./... > $o 2>&1 || fail
32 | 
33 | echo Testing
34 | go test -test.timeout=60s ./... > $o 2>&1 || fail
35 | 
36 | echo Linting
37 | golint ./... \
38 | 	| grep -v 'receiver name peggyrcvr should be consistent'\
39 | 	| grep -v 'const peggyEofCode should be peggyEOFCode'\
40 | 	| egrep -v 'grammar.y.*ALL_CAPS'\
41 | 	| egrep -v '(Begin|End|FullParenString|Type|CanFail|Walk).*should have comment or be unexported'\
42 | 	| egrep -v 'GenAccept should have comment or'\
43 | 	| egrep -v 'calc.go.*use underscores'\
44 | 	| egrep -v 'calc.go.*const __ should be _'\
45 | 	> $o 2>&1
46 | # Silly: diff the grepped golint output with empty.
47 | # If it's non-empty, error, otherwise succeed.
48 | e=$(mktemp tmp.XXXXXXXXXX)
49 | touch $e
50 | diff $o $e > /dev/null || { rm $e; fail; }
51 | 
52 | rm $o $e
53 | 


--------------------------------------------------------------------------------
/grammar.go:
--------------------------------------------------------------------------------
  1 | //line grammar.y:8
  2 | package main
  3 | 
  4 | import __yyfmt__ "fmt"
  5 | 
  6 | //line grammar.y:8
  7 | import "io"
  8 | 
  9 | //line grammar.y:13
 10 | type peggySymType struct {
 11 | 	yys     int
 12 | 	text    text
 13 | 	cclass  *CharClass
 14 | 	loc     Loc
 15 | 	expr    Expr
 16 | 	action  *Action
 17 | 	rule    Rule
 18 | 	rules   []Rule
 19 | 	texts   []Text
 20 | 	name    Name
 21 | 	grammar Grammar
 22 | }
 23 | 
 24 | const _ERROR = 57346
 25 | const _IDENT = 57347
 26 | const _STRING = 57348
 27 | const _CODE = 57349
 28 | const _ARROW = 57350
 29 | const _CHARCLASS = 57351
 30 | 
 31 | var peggyToknames = [...]string{
 32 | 	"$end",
 33 | 	"error",
 34 | 	"$unk",
 35 | 	"_ERROR",
 36 | 	"_IDENT",
 37 | 	"_STRING",
 38 | 	"_CODE",
 39 | 	"_ARROW",
 40 | 	"_CHARCLASS",
 41 | 	"'.'",
 42 | 	"'*'",
 43 | 	"'+'",
 44 | 	"'?'",
 45 | 	"':'",
 46 | 	"'/'",
 47 | 	"'!'",
 48 | 	"'&'",
 49 | 	"'('",
 50 | 	"')'",
 51 | 	"'^'",
 52 | 	"'<'",
 53 | 	"'>'",
 54 | 	"','",
 55 | 	"'\\n'",
 56 | }
 57 | var peggyStatenames = [...]string{}
 58 | 
 59 | const peggyEofCode = 1
 60 | const peggyErrCode = 2
 61 | const peggyInitialStackSize = 16
 62 | 
 63 | //line grammar.y:174
 64 | 
 65 | // Parse parses a Peggy input file, and returns the Grammar.
 66 | func Parse(in io.RuneScanner, fileName string) (*Grammar, error) {
 67 | 	x := &lexer{
 68 | 		in:   in,
 69 | 		file: fileName,
 70 | 		line: 1,
 71 | 	}
 72 | 	peggyParse(x)
 73 | 	if x.err != nil {
 74 | 		return nil, x.err
 75 | 	}
 76 | 	return &x.result, nil
 77 | }
 78 | 
 79 | //line yacctab:1
 80 | var peggyExca = [...]int{
 81 | 	-1, 1,
 82 | 	1, -1,
 83 | 	-2, 0,
 84 | 	-1, 64,
 85 | 	19, 42,
 86 | 	-2, 0,
 87 | }
 88 | 
 89 | const peggyPrivate = 57344
 90 | 
 91 | const peggyLast = 118
 92 | 
 93 | var peggyAct = [...]int{
 94 | 
 95 | 	2, 31, 26, 27, 60, 68, 29, 4, 14, 42,
 96 | 	43, 18, 48, 69, 9, 44, 22, 21, 44, 18,
 97 | 	25, 3, 38, 41, 56, 10, 12, 4, 13, 15,
 98 | 	20, 24, 11, 49, 50, 46, 10, 54, 10, 7,
 99 | 	17, 15, 16, 1, 55, 57, 51, 52, 53, 58,
100 | 	23, 59, 62, 19, 11, 63, 8, 64, 6, 45,
101 | 	66, 65, 11, 39, 61, 67, 40, 37, 35, 34,
102 | 	28, 5, 0, 33, 32, 36, 30, 39, 47, 0,
103 | 	40, 37, 0, 0, 0, 0, 0, 33, 32, 36,
104 | 	11, 39, 0, 0, 40, 37, 0, 0, 0, 0,
105 | 	0, 33, 32, 36, 30, 39, 0, 0, 40, 37,
106 | 	0, 0, 0, 0, 0, 33, 32, 36,
107 | }
108 | var peggyPact = [...]int{
109 | 
110 | 	-17, -1000, 49, -1000, -17, -1000, -17, -17, -1000, -1000,
111 | 	34, -10, -1000, 27, -1000, 27, -17, 8, 26, -17,
112 | 	-1000, 99, -17, -13, -1000, -1000, 0, -1000, 71, -1000,
113 | 	-2, -1000, -17, -17, 35, -1000, -17, -1000, -1000, -1000,
114 | 	-1000, 99, -1000, 19, -17, -1000, -1000, -1000, -17, 57,
115 | 	57, -1000, -1000, -1000, 99, 0, -1000, 99, 85, -1000,
116 | 	-1000, -1000, -1000, -1000, 3, -1000, -1000, -6, -1000, -1000,
117 | }
118 | var peggyPgo = [...]int{
119 | 
120 | 	0, 71, 2, 3, 70, 6, 1, 69, 68, 59,
121 | 	4, 58, 50, 14, 39, 22, 43, 0, 21,
122 | }
123 | var peggyR1 = [...]int{
124 | 
125 | 	0, 16, 1, 1, 11, 14, 14, 14, 13, 13,
126 | 	15, 15, 12, 12, 2, 2, 3, 3, 4, 4,
127 | 	5, 5, 6, 6, 6, 7, 7, 7, 7, 8,
128 | 	8, 8, 8, 8, 8, 8, 8, 10, 9, 18,
129 | 	18, 17, 17,
130 | }
131 | var peggyR2 = [...]int{
132 | 
133 | 	0, 2, 4, 2, 1, 3, 1, 0, 4, 5,
134 | 	4, 1, 1, 3, 4, 1, 2, 1, 2, 1,
135 | 	4, 1, 3, 3, 1, 2, 2, 2, 1, 5,
136 | 	3, 3, 1, 1, 1, 1, 4, 1, 1, 2,
137 | 	1, 1, 0,
138 | }
139 | var peggyChk = [...]int{
140 | 
141 | 	-1000, -16, -17, -18, 24, -1, -11, -14, 7, -13,
142 | 	-15, 5, -18, -18, -17, -18, 8, 6, 21, -14,
143 | 	-13, -17, 8, -12, 5, -17, -2, -3, -4, -5,
144 | 	5, -6, 17, 16, -7, -8, 18, 10, -15, 6,
145 | 	9, -17, 22, 23, 15, -9, -5, 7, 14, -17,
146 | 	-17, 11, 12, 13, -17, -2, 5, -17, -17, -6,
147 | 	-10, 7, -6, -10, -2, -3, -6, -17, 2, 19,
148 | }
149 | var peggyDef = [...]int{
150 | 
151 | 	42, -2, 7, 41, 40, 1, 0, 42, 4, 6,
152 | 	0, 11, 39, 7, 3, 41, 42, 0, 0, 42,
153 | 	5, 0, 42, 0, 12, 2, 8, 15, 17, 19,
154 | 	11, 21, 42, 42, 24, 28, 42, 32, 33, 34,
155 | 	35, 0, 10, 0, 42, 16, 18, 38, 42, 0,
156 | 	0, 25, 26, 27, 0, 9, 13, 0, 0, 22,
157 | 	30, 37, 23, 31, -2, 14, 20, 0, 36, 29,
158 | }
159 | var peggyTok1 = [...]int{
160 | 
161 | 	1, 3, 3, 3, 3, 3, 3, 3, 3, 3,
162 | 	24, 3, 3, 3, 3, 3, 3, 3, 3, 3,
163 | 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
164 | 	3, 3, 3, 16, 3, 3, 3, 3, 17, 3,
165 | 	18, 19, 11, 12, 23, 3, 10, 15, 3, 3,
166 | 	3, 3, 3, 3, 3, 3, 3, 3, 14, 3,
167 | 	21, 3, 22, 13, 3, 3, 3, 3, 3, 3,
168 | 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
169 | 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
170 | 	3, 3, 3, 3, 20,
171 | }
172 | var peggyTok2 = [...]int{
173 | 
174 | 	2, 3, 4, 5, 6, 7, 8, 9,
175 | }
176 | var peggyTok3 = [...]int{
177 | 	0,
178 | }
179 | 
180 | var peggyErrorMessages = [...]struct {
181 | 	state int
182 | 	token int
183 | 	msg   string
184 | }{}
185 | 
186 | //line yaccpar:1
187 | 
188 | /*	parser for yacc output	*/
189 | 
190 | var (
191 | 	peggyDebug        = 0
192 | 	peggyErrorVerbose = false
193 | )
194 | 
195 | type peggyLexer interface {
196 | 	Lex(lval *peggySymType) int
197 | 	Error(s string)
198 | }
199 | 
200 | type peggyParser interface {
201 | 	Parse(peggyLexer) int
202 | 	Lookahead() int
203 | }
204 | 
205 | type peggyParserImpl struct {
206 | 	lval  peggySymType
207 | 	stack [peggyInitialStackSize]peggySymType
208 | 	char  int
209 | }
210 | 
211 | func (p *peggyParserImpl) Lookahead() int {
212 | 	return p.char
213 | }
214 | 
215 | func peggyNewParser() peggyParser {
216 | 	return &peggyParserImpl{}
217 | }
218 | 
219 | const peggyFlag = -1000
220 | 
221 | func peggyTokname(c int) string {
222 | 	if c >= 1 && c-1 < len(peggyToknames) {
223 | 		if peggyToknames[c-1] != "" {
224 | 			return peggyToknames[c-1]
225 | 		}
226 | 	}
227 | 	return __yyfmt__.Sprintf("tok-%v", c)
228 | }
229 | 
230 | func peggyStatname(s int) string {
231 | 	if s >= 0 && s < len(peggyStatenames) {
232 | 		if peggyStatenames[s] != "" {
233 | 			return peggyStatenames[s]
234 | 		}
235 | 	}
236 | 	return __yyfmt__.Sprintf("state-%v", s)
237 | }
238 | 
239 | func peggyErrorMessage(state, lookAhead int) string {
240 | 	const TOKSTART = 4
241 | 
242 | 	if !peggyErrorVerbose {
243 | 		return "syntax error"
244 | 	}
245 | 
246 | 	for _, e := range peggyErrorMessages {
247 | 		if e.state == state && e.token == lookAhead {
248 | 			return "syntax error: " + e.msg
249 | 		}
250 | 	}
251 | 
252 | 	res := "syntax error: unexpected " + peggyTokname(lookAhead)
253 | 
254 | 	// To match Bison, suggest at most four expected tokens.
255 | 	expected := make([]int, 0, 4)
256 | 
257 | 	// Look for shiftable tokens.
258 | 	base := peggyPact[state]
259 | 	for tok := TOKSTART; tok-1 < len(peggyToknames); tok++ {
260 | 		if n := base + tok; n >= 0 && n < peggyLast && peggyChk[peggyAct[n]] == tok {
261 | 			if len(expected) == cap(expected) {
262 | 				return res
263 | 			}
264 | 			expected = append(expected, tok)
265 | 		}
266 | 	}
267 | 
268 | 	if peggyDef[state] == -2 {
269 | 		i := 0
270 | 		for peggyExca[i] != -1 || peggyExca[i+1] != state {
271 | 			i += 2
272 | 		}
273 | 
274 | 		// Look for tokens that we accept or reduce.
275 | 		for i += 2; peggyExca[i] >= 0; i += 2 {
276 | 			tok := peggyExca[i]
277 | 			if tok < TOKSTART || peggyExca[i+1] == 0 {
278 | 				continue
279 | 			}
280 | 			if len(expected) == cap(expected) {
281 | 				return res
282 | 			}
283 | 			expected = append(expected, tok)
284 | 		}
285 | 
286 | 		// If the default action is to accept or reduce, give up.
287 | 		if peggyExca[i+1] != 0 {
288 | 			return res
289 | 		}
290 | 	}
291 | 
292 | 	for i, tok := range expected {
293 | 		if i == 0 {
294 | 			res += ", expecting "
295 | 		} else {
296 | 			res += " or "
297 | 		}
298 | 		res += peggyTokname(tok)
299 | 	}
300 | 	return res
301 | }
302 | 
303 | func peggylex1(lex peggyLexer, lval *peggySymType) (char, token int) {
304 | 	token = 0
305 | 	char = lex.Lex(lval)
306 | 	if char <= 0 {
307 | 		token = peggyTok1[0]
308 | 		goto out
309 | 	}
310 | 	if char < len(peggyTok1) {
311 | 		token = peggyTok1[char]
312 | 		goto out
313 | 	}
314 | 	if char >= peggyPrivate {
315 | 		if char < peggyPrivate+len(peggyTok2) {
316 | 			token = peggyTok2[char-peggyPrivate]
317 | 			goto out
318 | 		}
319 | 	}
320 | 	for i := 0; i < len(peggyTok3); i += 2 {
321 | 		token = peggyTok3[i+0]
322 | 		if token == char {
323 | 			token = peggyTok3[i+1]
324 | 			goto out
325 | 		}
326 | 	}
327 | 
328 | out:
329 | 	if token == 0 {
330 | 		token = peggyTok2[1] /* unknown char */
331 | 	}
332 | 	if peggyDebug >= 3 {
333 | 		__yyfmt__.Printf("lex %s(%d)\n", peggyTokname(token), uint(char))
334 | 	}
335 | 	return char, token
336 | }
337 | 
338 | func peggyParse(peggylex peggyLexer) int {
339 | 	return peggyNewParser().Parse(peggylex)
340 | }
341 | 
342 | func (peggyrcvr *peggyParserImpl) Parse(peggylex peggyLexer) int {
343 | 	var peggyn int
344 | 	var peggyVAL peggySymType
345 | 	var peggyDollar []peggySymType
346 | 	_ = peggyDollar // silence set and not used
347 | 	peggyS := peggyrcvr.stack[:]
348 | 
349 | 	Nerrs := 0   /* number of errors */
350 | 	Errflag := 0 /* error recovery flag */
351 | 	peggystate := 0
352 | 	peggyrcvr.char = -1
353 | 	peggytoken := -1 // peggyrcvr.char translated into internal numbering
354 | 	defer func() {
355 | 		// Make sure we report no lookahead when not parsing.
356 | 		peggystate = -1
357 | 		peggyrcvr.char = -1
358 | 		peggytoken = -1
359 | 	}()
360 | 	peggyp := -1
361 | 	goto peggystack
362 | 
363 | ret0:
364 | 	return 0
365 | 
366 | ret1:
367 | 	return 1
368 | 
369 | peggystack:
370 | 	/* put a state and value onto the stack */
371 | 	if peggyDebug >= 4 {
372 | 		__yyfmt__.Printf("char %v in %v\n", peggyTokname(peggytoken), peggyStatname(peggystate))
373 | 	}
374 | 
375 | 	peggyp++
376 | 	if peggyp >= len(peggyS) {
377 | 		nyys := make([]peggySymType, len(peggyS)*2)
378 | 		copy(nyys, peggyS)
379 | 		peggyS = nyys
380 | 	}
381 | 	peggyS[peggyp] = peggyVAL
382 | 	peggyS[peggyp].yys = peggystate
383 | 
384 | peggynewstate:
385 | 	peggyn = peggyPact[peggystate]
386 | 	if peggyn <= peggyFlag {
387 | 		goto peggydefault /* simple state */
388 | 	}
389 | 	if peggyrcvr.char < 0 {
390 | 		peggyrcvr.char, peggytoken = peggylex1(peggylex, &peggyrcvr.lval)
391 | 	}
392 | 	peggyn += peggytoken
393 | 	if peggyn < 0 || peggyn >= peggyLast {
394 | 		goto peggydefault
395 | 	}
396 | 	peggyn = peggyAct[peggyn]
397 | 	if peggyChk[peggyn] == peggytoken { /* valid shift */
398 | 		peggyrcvr.char = -1
399 | 		peggytoken = -1
400 | 		peggyVAL = peggyrcvr.lval
401 | 		peggystate = peggyn
402 | 		if Errflag > 0 {
403 | 			Errflag--
404 | 		}
405 | 		goto peggystack
406 | 	}
407 | 
408 | peggydefault:
409 | 	/* default state action */
410 | 	peggyn = peggyDef[peggystate]
411 | 	if peggyn == -2 {
412 | 		if peggyrcvr.char < 0 {
413 | 			peggyrcvr.char, peggytoken = peggylex1(peggylex, &peggyrcvr.lval)
414 | 		}
415 | 
416 | 		/* look through exception table */
417 | 		xi := 0
418 | 		for {
419 | 			if peggyExca[xi+0] == -1 && peggyExca[xi+1] == peggystate {
420 | 				break
421 | 			}
422 | 			xi += 2
423 | 		}
424 | 		for xi += 2; ; xi += 2 {
425 | 			peggyn = peggyExca[xi+0]
426 | 			if peggyn < 0 || peggyn == peggytoken {
427 | 				break
428 | 			}
429 | 		}
430 | 		peggyn = peggyExca[xi+1]
431 | 		if peggyn < 0 {
432 | 			goto ret0
433 | 		}
434 | 	}
435 | 	if peggyn == 0 {
436 | 		/* error ... attempt to resume parsing */
437 | 		switch Errflag {
438 | 		case 0: /* brand new error */
439 | 			peggylex.Error(peggyErrorMessage(peggystate, peggytoken))
440 | 			Nerrs++
441 | 			if peggyDebug >= 1 {
442 | 				__yyfmt__.Printf("%s", peggyStatname(peggystate))
443 | 				__yyfmt__.Printf(" saw %s\n", peggyTokname(peggytoken))
444 | 			}
445 | 			fallthrough
446 | 
447 | 		case 1, 2: /* incompletely recovered error ... try again */
448 | 			Errflag = 3
449 | 
450 | 			/* find a state where "error" is a legal shift action */
451 | 			for peggyp >= 0 {
452 | 				peggyn = peggyPact[peggyS[peggyp].yys] + peggyErrCode
453 | 				if peggyn >= 0 && peggyn < peggyLast {
454 | 					peggystate = peggyAct[peggyn] /* simulate a shift of "error" */
455 | 					if peggyChk[peggystate] == peggyErrCode {
456 | 						goto peggystack
457 | 					}
458 | 				}
459 | 
460 | 				/* the current p has no shift on "error", pop stack */
461 | 				if peggyDebug >= 2 {
462 | 					__yyfmt__.Printf("error recovery pops state %d\n", peggyS[peggyp].yys)
463 | 				}
464 | 				peggyp--
465 | 			}
466 | 			/* there is no state on the stack with an error shift ... abort */
467 | 			goto ret1
468 | 
469 | 		case 3: /* no shift yet; clobber input char */
470 | 			if peggyDebug >= 2 {
471 | 				__yyfmt__.Printf("error recovery discards %s\n", peggyTokname(peggytoken))
472 | 			}
473 | 			if peggytoken == peggyEofCode {
474 | 				goto ret1
475 | 			}
476 | 			peggyrcvr.char = -1
477 | 			peggytoken = -1
478 | 			goto peggynewstate /* try again in the same state */
479 | 		}
480 | 	}
481 | 
482 | 	/* reduction by production peggyn */
483 | 	if peggyDebug >= 2 {
484 | 		__yyfmt__.Printf("reduce %v in:\n\t%v\n", peggyn, peggyStatname(peggystate))
485 | 	}
486 | 
487 | 	peggynt := peggyn
488 | 	peggypt := peggyp
489 | 	_ = peggypt // guard against "declared and not used"
490 | 
491 | 	peggyp -= peggyR2[peggyn]
492 | 	// peggyp is now the index of $0. Perform the default action. Iff the
493 | 	// reduced production is ε, $1 is possibly out of range.
494 | 	if peggyp+1 >= len(peggyS) {
495 | 		nyys := make([]peggySymType, len(peggyS)*2)
496 | 		copy(nyys, peggyS)
497 | 		peggyS = nyys
498 | 	}
499 | 	peggyVAL = peggyS[peggyp+1]
500 | 
501 | 	/* consult goto table to find next state */
502 | 	peggyn = peggyR1[peggyn]
503 | 	peggyg := peggyPgo[peggyn]
504 | 	peggyj := peggyg + peggyS[peggyp].yys + 1
505 | 
506 | 	if peggyj >= peggyLast {
507 | 		peggystate = peggyAct[peggyg]
508 | 	} else {
509 | 		peggystate = peggyAct[peggyj]
510 | 		if peggyChk[peggystate] != -peggyn {
511 | 			peggystate = peggyAct[peggyg]
512 | 		}
513 | 	}
514 | 	// dummy call; replaced with literal code
515 | 	switch peggynt {
516 | 
517 | 	case 1:
518 | 		peggyDollar = peggyS[peggypt-2 : peggypt+1]
519 | 		//line grammar.y:43
520 | 		{
521 | 			peggylex.(*lexer).result = peggyDollar[2].grammar
522 | 		}
523 | 	case 2:
524 | 		peggyDollar = peggyS[peggypt-4 : peggypt+1]
525 | 		//line grammar.y:46
526 | 		{
527 | 			peggyVAL.grammar = Grammar{Prelude: peggyDollar[1].text, Rules: peggyDollar[3].rules}
528 | 		}
529 | 	case 3:
530 | 		peggyDollar = peggyS[peggypt-2 : peggypt+1]
531 | 		//line grammar.y:47
532 | 		{
533 | 			peggyVAL.grammar = Grammar{Rules: peggyDollar[1].rules}
534 | 		}
535 | 	case 4:
536 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
537 | 		//line grammar.y:51
538 | 		{
539 | 			loc := peggyDollar[1].text.Begin()
540 | 			loc.Col++ // skip the open {.
541 | 			err := ParseGoFile(loc, peggyDollar[1].text.String())
542 | 			if err != nil {
543 | 				peggylex.(*lexer).err = err
544 | 			}
545 | 			peggyVAL.text = peggyDollar[1].text
546 | 		}
547 | 	case 5:
548 | 		peggyDollar = peggyS[peggypt-3 : peggypt+1]
549 | 		//line grammar.y:62
550 | 		{
551 | 			peggyVAL.rules = append(peggyDollar[1].rules, peggyDollar[3].rule)
552 | 		}
553 | 	case 6:
554 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
555 | 		//line grammar.y:63
556 | 		{
557 | 			peggyVAL.rules = []Rule{peggyDollar[1].rule}
558 | 		}
559 | 	case 7:
560 | 		peggyDollar = peggyS[peggypt-0 : peggypt+1]
561 | 		//line grammar.y:67
562 | 		{
563 | 			peggyVAL.rules = nil
564 | 		}
565 | 	case 8:
566 | 		peggyDollar = peggyS[peggypt-4 : peggypt+1]
567 | 		//line grammar.y:70
568 | 		{
569 | 			peggyVAL.rule = Rule{Name: peggyDollar[1].name, Expr: peggyDollar[4].expr}
570 | 		}
571 | 	case 9:
572 | 		peggyDollar = peggyS[peggypt-5 : peggypt+1]
573 | 		//line grammar.y:73
574 | 		{
575 | 			peggyVAL.rule = Rule{Name: peggyDollar[1].name, ErrorName: peggyDollar[2].text, Expr: peggyDollar[5].expr}
576 | 		}
577 | 	case 10:
578 | 		peggyDollar = peggyS[peggypt-4 : peggypt+1]
579 | 		//line grammar.y:78
580 | 		{
581 | 			peggyVAL.name = Name{Name: peggyDollar[1].text, Args: peggyDollar[3].texts}
582 | 		}
583 | 	case 11:
584 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
585 | 		//line grammar.y:79
586 | 		{
587 | 			peggyVAL.name = Name{Name: peggyDollar[1].text}
588 | 		}
589 | 	case 12:
590 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
591 | 		//line grammar.y:82
592 | 		{
593 | 			peggyVAL.texts = []Text{peggyDollar[1].text}
594 | 		}
595 | 	case 13:
596 | 		peggyDollar = peggyS[peggypt-3 : peggypt+1]
597 | 		//line grammar.y:83
598 | 		{
599 | 			peggyVAL.texts = append(peggyDollar[1].texts, peggyDollar[3].text)
600 | 		}
601 | 	case 14:
602 | 		peggyDollar = peggyS[peggypt-4 : peggypt+1]
603 | 		//line grammar.y:87
604 | 		{
605 | 			e, ok := peggyDollar[1].expr.(*Choice)
606 | 			if !ok {
607 | 				e = &Choice{Exprs: []Expr{peggyDollar[1].expr}}
608 | 			}
609 | 			e.Exprs = append(e.Exprs, peggyDollar[4].expr)
610 | 			peggyVAL.expr = e
611 | 		}
612 | 	case 15:
613 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
614 | 		//line grammar.y:95
615 | 		{
616 | 			peggyVAL.expr = peggyDollar[1].expr
617 | 		}
618 | 	case 16:
619 | 		peggyDollar = peggyS[peggypt-2 : peggypt+1]
620 | 		//line grammar.y:99
621 | 		{
622 | 			peggyDollar[2].action.Expr = peggyDollar[1].expr
623 | 			peggyVAL.expr = peggyDollar[2].action
624 | 		}
625 | 	case 17:
626 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
627 | 		//line grammar.y:103
628 | 		{
629 | 			peggyVAL.expr = peggyDollar[1].expr
630 | 		}
631 | 	case 18:
632 | 		peggyDollar = peggyS[peggypt-2 : peggypt+1]
633 | 		//line grammar.y:107
634 | 		{
635 | 			e, ok := peggyDollar[1].expr.(*Sequence)
636 | 			if !ok {
637 | 				e = &Sequence{Exprs: []Expr{peggyDollar[1].expr}}
638 | 			}
639 | 			e.Exprs = append(e.Exprs, peggyDollar[2].expr)
640 | 			peggyVAL.expr = e
641 | 		}
642 | 	case 19:
643 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
644 | 		//line grammar.y:115
645 | 		{
646 | 			peggyVAL.expr = peggyDollar[1].expr
647 | 		}
648 | 	case 20:
649 | 		peggyDollar = peggyS[peggypt-4 : peggypt+1]
650 | 		//line grammar.y:118
651 | 		{
652 | 			peggyVAL.expr = &LabelExpr{Label: peggyDollar[1].text, Expr: peggyDollar[4].expr}
653 | 		}
654 | 	case 21:
655 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
656 | 		//line grammar.y:119
657 | 		{
658 | 			peggyVAL.expr = peggyDollar[1].expr
659 | 		}
660 | 	case 22:
661 | 		peggyDollar = peggyS[peggypt-3 : peggypt+1]
662 | 		//line grammar.y:122
663 | 		{
664 | 			peggyVAL.expr = &PredExpr{Expr: peggyDollar[3].expr, Loc: peggyDollar[1].loc}
665 | 		}
666 | 	case 23:
667 | 		peggyDollar = peggyS[peggypt-3 : peggypt+1]
668 | 		//line grammar.y:123
669 | 		{
670 | 			peggyVAL.expr = &PredExpr{Neg: true, Expr: peggyDollar[3].expr, Loc: peggyDollar[1].loc}
671 | 		}
672 | 	case 24:
673 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
674 | 		//line grammar.y:124
675 | 		{
676 | 			peggyVAL.expr = peggyDollar[1].expr
677 | 		}
678 | 	case 25:
679 | 		peggyDollar = peggyS[peggypt-2 : peggypt+1]
680 | 		//line grammar.y:127
681 | 		{
682 | 			peggyVAL.expr = &RepExpr{Op: '*', Expr: peggyDollar[1].expr, Loc: peggyDollar[2].loc}
683 | 		}
684 | 	case 26:
685 | 		peggyDollar = peggyS[peggypt-2 : peggypt+1]
686 | 		//line grammar.y:128
687 | 		{
688 | 			peggyVAL.expr = &RepExpr{Op: '+', Expr: peggyDollar[1].expr, Loc: peggyDollar[2].loc}
689 | 		}
690 | 	case 27:
691 | 		peggyDollar = peggyS[peggypt-2 : peggypt+1]
692 | 		//line grammar.y:129
693 | 		{
694 | 			peggyVAL.expr = &OptExpr{Expr: peggyDollar[1].expr, Loc: peggyDollar[2].loc}
695 | 		}
696 | 	case 28:
697 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
698 | 		//line grammar.y:130
699 | 		{
700 | 			peggyVAL.expr = peggyDollar[1].expr
701 | 		}
702 | 	case 29:
703 | 		peggyDollar = peggyS[peggypt-5 : peggypt+1]
704 | 		//line grammar.y:133
705 | 		{
706 | 			peggyVAL.expr = &SubExpr{Expr: peggyDollar[3].expr, Open: peggyDollar[1].loc, Close: peggyDollar[5].loc}
707 | 		}
708 | 	case 30:
709 | 		peggyDollar = peggyS[peggypt-3 : peggypt+1]
710 | 		//line grammar.y:134
711 | 		{
712 | 			peggyVAL.expr = &PredCode{Code: peggyDollar[3].text, Loc: peggyDollar[1].loc}
713 | 		}
714 | 	case 31:
715 | 		peggyDollar = peggyS[peggypt-3 : peggypt+1]
716 | 		//line grammar.y:135
717 | 		{
718 | 			peggyVAL.expr = &PredCode{Neg: true, Code: peggyDollar[3].text, Loc: peggyDollar[1].loc}
719 | 		}
720 | 	case 32:
721 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
722 | 		//line grammar.y:136
723 | 		{
724 | 			peggyVAL.expr = &Any{Loc: peggyDollar[1].loc}
725 | 		}
726 | 	case 33:
727 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
728 | 		//line grammar.y:137
729 | 		{
730 | 			peggyVAL.expr = &Ident{Name: peggyDollar[1].name}
731 | 		}
732 | 	case 34:
733 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
734 | 		//line grammar.y:138
735 | 		{
736 | 			peggyVAL.expr = &Literal{Text: peggyDollar[1].text}
737 | 		}
738 | 	case 35:
739 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
740 | 		//line grammar.y:139
741 | 		{
742 | 			peggyVAL.expr = peggyDollar[1].cclass
743 | 		}
744 | 	case 36:
745 | 		peggyDollar = peggyS[peggypt-4 : peggypt+1]
746 | 		//line grammar.y:140
747 | 		{
748 | 			peggylex.Error("unexpected end of file")
749 | 		}
750 | 	case 37:
751 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
752 | 		//line grammar.y:144
753 | 		{
754 | 			loc := peggyDollar[1].text.Begin()
755 | 			loc.Col++ // skip the open {.
756 | 			err := ParseGoExpr(loc, peggyDollar[1].text.String())
757 | 			if err != nil {
758 | 				peggylex.(*lexer).err = err
759 | 			}
760 | 			peggyVAL.text = peggyDollar[1].text
761 | 		}
762 | 	case 38:
763 | 		peggyDollar = peggyS[peggypt-1 : peggypt+1]
764 | 		//line grammar.y:156
765 | 		{
766 | 			loc := peggyDollar[1].text.Begin()
767 | 			loc.Col++ // skip the open {.
768 | 			typ, err := ParseGoBody(loc, peggyDollar[1].text.String())
769 | 			if err != nil {
770 | 				peggylex.(*lexer).err = err
771 | 			}
772 | 			peggyVAL.action = &Action{Code: peggyDollar[1].text, ReturnType: typ}
773 | 		}
774 | 	}
775 | 	goto peggystack /* stack new state and value */
776 | }
777 | 


--------------------------------------------------------------------------------
/grammar.y:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The Peggy Authors
  2 | //
  3 | // Use of this source code is governed by a BSD-style
  4 | // license that can be found in the LICENSE file or at
  5 | // https://developers.google.com/open-source/licenses/bsd.
  6 | 
  7 | %{
  8 | package main
  9 | 
 10 | import "io"
 11 | %}
 12 | 
 13 | %union{
 14 | 	text text
 15 | 	cclass *CharClass
 16 | 	loc Loc
 17 | 	expr Expr
 18 | 	action *Action
 19 | 	rule Rule
 20 | 	rules []Rule
 21 | 	texts []Text
 22 | 	name Name
 23 | 	grammar Grammar
 24 | }
 25 | 
 26 | %type <grammar> Grammar
 27 | %type <expr> Expr, ActExpr, SeqExpr, LabelExpr, PredExpr, RepExpr, Operand
 28 | %type <action> GoAction
 29 | %type <text> GoPred Prelude
 30 | %type <texts> Args
 31 | %type <rule> Rule
 32 | %type <rules> Rules
 33 | %type <name> Name
 34 | 
 35 | %token _ERROR
 36 | %token <text> _IDENT _STRING _CODE _ARROW
 37 | %token <cclass> _CHARCLASS
 38 | %token <loc> '.', '*', '+', '?', ':', '/', '!', '&', '(', ')', '^', '<', '>', ','
 39 | 
 40 | %%
 41 | 
 42 | Top:
 43 | 	Nl Grammar { peggylex.(*lexer).result = $2 }
 44 | 
 45 | Grammar:
 46 | 	Prelude NewLine Rules Nl { $$ = Grammar{ Prelude: $1, Rules: $3 } }
 47 | |	Rules Nl { $$ = Grammar{ Rules: $1 } }
 48 | 
 49 | Prelude:
 50 | 	_CODE
 51 | 	{
 52 | 		loc := $1.Begin()
 53 | 		loc.Col++ // skip the open {.
 54 | 		err := ParseGoFile(loc, $1.String())
 55 | 		if err != nil {
 56 | 			peggylex.(*lexer).err = err
 57 | 		}
 58 | 		$$ = $1
 59 | 	}
 60 | 
 61 | Rules:
 62 | 	Rules NewLine Rule { $$ = append($1, $3) }
 63 | |	Rule { $$ = []Rule{ $1 } }
 64 | // The following production adds a shift/reduce conflict:
 65 | // 	reduce the empty string or shift into a Rule?
 66 | // Yacc always prefers shift in the case of both, which is the desired behavior.
 67 | |	{ $$ = nil }
 68 | 
 69 | Rule:
 70 | 	Name _ARROW Nl Expr {
 71 | 		$$ = Rule{ Name: $1, Expr: $4 }
 72 | 	}
 73 | |	Name _STRING _ARROW Nl Expr {
 74 | 		$$ = Rule{ Name: $1, ErrorName: $2, Expr: $5 }
 75 | 	}
 76 | 
 77 | Name:
 78 | 	_IDENT '<' Args '>' { $$ = Name{ Name: $1, Args: $3 } }
 79 | |	_IDENT { $$ = Name{ Name: $1 } }
 80 | 
 81 | Args:
 82 | 	_IDENT { $$ = []Text{$1} }
 83 | |	Args ',' _IDENT { $$ = append($1, $3) }
 84 | 
 85 | Expr:
 86 | 	Expr '/' Nl ActExpr
 87 | 	{
 88 | 		e, ok := $1.(*Choice)
 89 | 		if !ok {
 90 | 			e = &Choice{ Exprs: []Expr{$1} }
 91 | 		}
 92 | 		e.Exprs = append(e.Exprs, $4)
 93 | 		$$ = e
 94 | 	}
 95 | |	ActExpr { $$ = $1 }
 96 | 
 97 | ActExpr:
 98 | 	SeqExpr GoAction
 99 | 	{
100 | 		$2.Expr = $1
101 | 		$$ = $2
102 | 	}
103 | |	SeqExpr { $$ = $1 }
104 | 
105 | SeqExpr:
106 | 	SeqExpr LabelExpr
107 | 	{
108 | 		e, ok := $1.(*Sequence)
109 | 		if !ok {
110 | 			e = &Sequence{ Exprs: []Expr{$1} }
111 | 		}
112 | 		e.Exprs = append(e.Exprs, $2)
113 | 		$$ = e
114 | 	}
115 | |	LabelExpr { $$ = $1 }
116 | 
117 | LabelExpr:
118 | 	_IDENT ':' Nl PredExpr { $$ = &LabelExpr{ Label: $1, Expr: $4 } }
119 | |	PredExpr { $$ = $1 }
120 | 
121 | PredExpr:
122 | 	'&' Nl PredExpr { $$ = &PredExpr{ Expr: $3, Loc: $1 } }
123 | |	'!' Nl PredExpr { $$ = &PredExpr{ Neg: true, Expr: $3, Loc: $1 } }
124 | |	RepExpr { $$ = $1 }
125 | 
126 | RepExpr:
127 | 	RepExpr '*' { $$ = &RepExpr{ Op: '*', Expr: $1, Loc: $2 } }
128 | |	RepExpr '+' { $$ = &RepExpr{ Op: '+', Expr: $1, Loc: $2 } }
129 | |	RepExpr '?' { $$ = &OptExpr{ Expr: $1, Loc: $2 } }
130 | |	Operand { $$ = $1 }
131 | 
132 | Operand:
133 | 	'(' Nl Expr Nl ')' { $$ = &SubExpr{ Expr: $3, Open: $1, Close: $5 } }
134 | |	'&' Nl  GoPred { $$ = &PredCode{ Code: $3, Loc: $1 } }
135 | |	'!' Nl GoPred { $$ = &PredCode{ Neg: true, Code: $3, Loc: $1 } }
136 | |	'.' { $$ = &Any{ Loc: $1 } }
137 | |	Name { $$ = &Ident{ Name: $1 } }
138 | |	_STRING { $$ = &Literal{ Text: $1 } }
139 | |	_CHARCLASS { $$ =$1 }
140 | |	'(' Nl Expr error { peggylex.Error("unexpected end of file") }
141 | 
142 | GoPred:
143 | 	_CODE
144 | 	{
145 | 		loc := $1.Begin()
146 | 		loc.Col++ // skip the open {.
147 | 		err := ParseGoExpr(loc, $1.String())
148 | 		if err != nil {
149 | 			peggylex.(*lexer).err = err
150 | 		}
151 | 		$$ = $1
152 | 	}
153 | 
154 | GoAction:
155 | 	_CODE
156 | 	{
157 | 		loc := $1.Begin()
158 | 		loc.Col++ // skip the open {.
159 | 		typ, err := ParseGoBody(loc, $1.String())
160 | 		if err != nil {
161 | 			peggylex.(*lexer).err = err
162 | 		}
163 | 		$$ = &Action{ Code: $1, ReturnType: typ }
164 | 	}
165 | 
166 | NewLine:
167 | 	'\n' NewLine
168 | |	'\n'
169 | 
170 | Nl:
171 | 	NewLine
172 | |
173 | 
174 | %%
175 | 
176 | // Parse parses a Peggy input file, and returns the Grammar.
177 | func Parse(in io.RuneScanner, fileName string) (*Grammar, error) {
178 | 	x := &lexer{
179 | 		in:   in,
180 | 		file: fileName,
181 | 		line: 1,
182 | 	}
183 | 	peggyParse(x)
184 | 	if x.err != nil {
185 | 		return nil, x.err
186 | 	}
187 | 	return &x.result, nil
188 | }
189 | 


--------------------------------------------------------------------------------
/lex.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The Peggy Authors
  2 | //
  3 | // Use of this source code is governed by a BSD-style
  4 | // license that can be found in the LICENSE file or at
  5 | // https://developers.google.com/open-source/licenses/bsd.
  6 | 
  7 | package main
  8 | 
  9 | import (
 10 | 	"errors"
 11 | 	"fmt"
 12 | 	"io"
 13 | 	"unicode"
 14 | )
 15 | 
 16 | const eof = -1
 17 | 
 18 | type text struct {
 19 | 	str        string
 20 | 	begin, end Loc
 21 | }
 22 | 
 23 | func (t text) PrettyPrint() string {
 24 | 	return fmt.Sprintf(`Text{%d:%d-%d:%d: "%s"}`,
 25 | 		t.begin.Line, t.begin.Col,
 26 | 		t.end.Line, t.end.Col,
 27 | 		t.str)
 28 | }
 29 | 
 30 | func (t text) String() string { return t.str }
 31 | func (t text) Begin() Loc     { return t.begin }
 32 | func (t text) End() Loc       { return t.end }
 33 | 
 34 | type lexer struct {
 35 | 	in                                io.RuneScanner
 36 | 	file                              string
 37 | 	n, line, lineStart, prevLineStart int
 38 | 	eof                               bool
 39 | 
 40 | 	// prevBegin is the beginning of the most-recently scanned token.
 41 | 	// prevEnd is the end of the most-recently scanned token.
 42 | 	// These are used for error reporting.
 43 | 	prevBegin, prevEnd Loc
 44 | 
 45 | 	// err is non-nil if there was an error during parsing.
 46 | 	err error
 47 | 	// result contains the Grammar resulting from a successful parse.
 48 | 	result Grammar
 49 | }
 50 | 
 51 | // Begin returns the begin location of the last returned token.
 52 | func (x *lexer) Begin() Loc { return x.prevBegin }
 53 | 
 54 | // End returns the end location of the last returned token.
 55 | func (x *lexer) End() Loc { return x.prevEnd }
 56 | 
 57 | func (x *lexer) loc() Loc {
 58 | 	return Loc{
 59 | 		File: x.file,
 60 | 		Line: x.line,
 61 | 		Col:  x.n - x.lineStart + 1,
 62 | 	}
 63 | }
 64 | 
 65 | func (x *lexer) next() (rune, error) {
 66 | 	if x.eof {
 67 | 		return eof, nil
 68 | 	}
 69 | 	r, _, err := x.in.ReadRune()
 70 | 	if err == io.EOF {
 71 | 		x.eof = true
 72 | 		return eof, nil
 73 | 	}
 74 | 	x.n++
 75 | 	if r == '\n' {
 76 | 		x.prevLineStart = x.lineStart
 77 | 		x.lineStart = x.n
 78 | 		x.line++
 79 | 	}
 80 | 	return r, err
 81 | }
 82 | 
 83 | func (x *lexer) back() error {
 84 | 	if x.eof {
 85 | 		return nil
 86 | 	}
 87 | 	if x.lineStart == x.n {
 88 | 		x.lineStart = x.prevLineStart
 89 | 		x.line--
 90 | 	}
 91 | 	x.n--
 92 | 	return x.in.UnreadRune()
 93 | }
 94 | 
 95 | func (x *lexer) Error(s string) {
 96 | 	if x.err != nil {
 97 | 		return
 98 | 	}
 99 | 	x.err = Err(x, s)
100 | }
101 | 
102 | func (x *lexer) Lex(lval *peggySymType) (v int) {
103 | 	defer func() { x.prevEnd = x.loc() }()
104 | 	for {
105 | 		x.prevBegin = x.loc()
106 | 		lval.text.begin = x.loc()
107 | 		lval.loc = x.loc()
108 | 		r, err := x.next()
109 | 
110 | 		switch {
111 | 		case err != nil:
112 | 			break
113 | 
114 | 		case r == '#':
115 | 			if err = comment(x); err != nil {
116 | 				break
117 | 			}
118 | 			return '\n'
119 | 
120 | 		case unicode.IsLetter(r) || r == '_':
121 | 			if lval.text.str, err = ident(x); err != nil {
122 | 				break
123 | 			}
124 | 			lval.text.str = string([]rune{r}) + lval.text.str
125 | 			lval.text.end = x.loc()
126 | 			return _IDENT
127 | 
128 | 		case r == '<':
129 | 			b := x.loc()
130 | 			if r, err = x.next(); err != nil {
131 | 				break
132 | 			}
133 | 			lval.text.str = string([]rune{'<', r})
134 | 			lval.text.end = x.loc()
135 | 			if r != '-' {
136 | 				x.back()
137 | 				x.prevBegin = b
138 | 				return int('<')
139 | 			}
140 | 			return _ARROW
141 | 
142 | 		case r == '{':
143 | 			if lval.text.str, err = code(x); err != nil {
144 | 				break
145 | 			}
146 | 			lval.text.end = x.loc()
147 | 			return _CODE
148 | 
149 | 		case r == '[':
150 | 			if err = x.back(); err != nil {
151 | 				break
152 | 			}
153 | 			if lval.cclass, err = charClass(x); err != nil {
154 | 				x.err = err
155 | 				return _ERROR
156 | 			}
157 | 			return _CHARCLASS
158 | 
159 | 		case r == '\'' || r == '"':
160 | 			if lval.text.str, err = delimited(x, r); err != nil {
161 | 				break
162 | 			}
163 | 			lval.text.end = x.loc()
164 | 			return _STRING
165 | 
166 | 		case unicode.IsSpace(r) && r != '\n':
167 | 			continue
168 | 
169 | 		default:
170 | 			return int(r)
171 | 		}
172 | 		x.prevEnd = x.loc()
173 | 		x.Error(err.Error())
174 | 		return _ERROR
175 | 	}
176 | }
177 | 
178 | func delimited(x *lexer, d rune) (string, error) {
179 | 	var rs []rune
180 | 	for {
181 | 		r, esc, err := x.nextUnesc(d)
182 | 		switch {
183 | 		case err != nil:
184 | 			return "", err
185 | 		case r == eof:
186 | 			return "", errors.New("unclosed " + string([]rune{d}))
187 | 		case r == d && !esc:
188 | 			return string(rs), nil
189 | 		}
190 | 		rs = append(rs, r)
191 | 	}
192 | }
193 | 
194 | func ident(x *lexer) (string, error) {
195 | 	var rs []rune
196 | 	for {
197 | 		r, err := x.next()
198 | 		if err != nil {
199 | 			return "", err
200 | 		}
201 | 		if !isIdentRune(r) {
202 | 			return string(rs), x.back()
203 | 		}
204 | 		rs = append(rs, r)
205 | 	}
206 | }
207 | 
208 | func isIdentRune(r rune) bool {
209 | 	return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
210 | }
211 | 
212 | func code(x *lexer) (string, error) {
213 | 	var rs []rune
214 | 	var n int
215 | 	for {
216 | 		r, err := x.next()
217 | 		if err != nil {
218 | 			return "", err
219 | 		}
220 | 		if r == eof {
221 | 			return "", errors.New("unclosed {")
222 | 		}
223 | 		if r == '{' {
224 | 			n++
225 | 		}
226 | 		if r == '}' {
227 | 			if n == 0 {
228 | 				break
229 | 			}
230 | 			n--
231 | 		}
232 | 		rs = append(rs, r)
233 | 	}
234 | 	return string(rs), nil
235 | }
236 | 
237 | func comment(x *lexer) error {
238 | 	for {
239 | 		r, err := x.next()
240 | 		if err != nil {
241 | 			return err
242 | 		}
243 | 		if r == '\n' || r == eof {
244 | 			return nil
245 | 		}
246 | 	}
247 | }
248 | 
249 | func charClass(x *lexer) (*CharClass, error) {
250 | 	c := &CharClass{Open: x.loc()}
251 | 	if r, err := x.next(); err != nil {
252 | 		return nil, Err(c.Open, err.Error())
253 | 	} else if r != '[' {
254 | 		panic("impossible, no [")
255 | 	}
256 | 
257 | 	var prev rune
258 | 	var hasPrev, span bool
259 | 
260 | 	// last is the Loc just before last read rune.
261 | 	var last Loc
262 | 
263 | 	// spanLoc is the location of the current span.
264 | 	// (We use type text to borrow that it implements Located.
265 | 	// However we ignore the str field.)
266 | 	var spanLoc text
267 | loop:
268 | 	for {
269 | 		last = x.loc()
270 | 		if !span && !hasPrev {
271 | 			spanLoc.begin = x.loc()
272 | 		}
273 | 		r, esc, err := x.nextUnesc(']')
274 | 		switch {
275 | 		case err != nil:
276 | 			return nil, err
277 | 
278 | 		case r == eof:
279 | 			c.Close = x.loc()
280 | 			return nil, Err(c, "unclosed [")
281 | 
282 | 		case r == ']' && !esc:
283 | 			c.Close = x.loc()
284 | 			break loop
285 | 
286 | 		case span:
287 | 			spanLoc.end = x.loc()
288 | 			if !hasPrev {
289 | 				return nil, Err(spanLoc, "bad span")
290 | 			}
291 | 			if prev >= r {
292 | 				return nil, Err(spanLoc, "bad span")
293 | 			}
294 | 			c.Spans = append(c.Spans, [2]rune{prev, r})
295 | 			hasPrev, span = false, false
296 | 			spanLoc.begin = spanLoc.end
297 | 
298 | 		case r == '-' && !esc:
299 | 			span = true
300 | 
301 | 		default:
302 | 			if r == '^' && !esc && !c.Neg && len(c.Spans) == 0 && !hasPrev {
303 | 				c.Neg = true
304 | 				continue
305 | 			}
306 | 			if hasPrev {
307 | 				c.Spans = append(c.Spans, [2]rune{prev, prev})
308 | 				spanLoc.begin = last // in case current rune starts a span.
309 | 			}
310 | 			prev, hasPrev = r, true
311 | 		}
312 | 	}
313 | 	if span {
314 | 		spanLoc.end = last // just before closing ]
315 | 		return nil, Err(spanLoc, "bad span")
316 | 	}
317 | 	if hasPrev {
318 | 		c.Spans = append(c.Spans, [2]rune{prev, prev})
319 | 	}
320 | 	if len(c.Spans) == 0 {
321 | 		return nil, Err(c, "bad char class: empty")
322 | 	}
323 | 	return c, nil
324 | }
325 | 
326 | var errUnknownEsc = errors.New("unknown escape sequence")
327 | 
328 | // Like next, but unescapes an escapes a rune according to Go's unescaping rules.
329 | // The second return value is whether the rune was escaped.
330 | func (x *lexer) nextUnesc(delim rune) (rune, bool, error) {
331 | 	switch r, err := x.next(); {
332 | 	case err != nil:
333 | 		return 0, false, err
334 | 	case r == delim:
335 | 		return r, false, nil
336 | 	case r == '\\':
337 | 		r, err = x.next()
338 | 		if err != nil {
339 | 			return 0, true, err
340 | 		}
341 | 		switch r {
342 | 		case eof:
343 | 			return eof, true, nil
344 | 		case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\':
345 | 			switch r {
346 | 			case 'a':
347 | 				r = '\a'
348 | 			case 'b':
349 | 				r = '\b'
350 | 			case 'f':
351 | 				r = '\f'
352 | 			case 'n':
353 | 				r = '\n'
354 | 			case 'r':
355 | 				r = '\r'
356 | 			case 't':
357 | 				r = '\t'
358 | 			case 'v':
359 | 				r = '\v'
360 | 			case '\\':
361 | 				r = '\\'
362 | 			}
363 | 			return r, true, nil
364 | 		case '0', '1', '2', '3', '4', '5', '6', '7':
365 | 			v, _ := oct(r)
366 | 			for i := 1; i < 3; i++ {
367 | 				r, err := x.next()
368 | 				if err != nil {
369 | 					return 0, false, err
370 | 				}
371 | 				d, ok := oct(r)
372 | 				if !ok {
373 | 					return 0, false, errUnknownEsc
374 | 				}
375 | 				v = (v << 3) | d
376 | 			}
377 | 			if v > 255 {
378 | 				return 0, false, errors.New("octal escape >255")
379 | 			}
380 | 			return v, true, nil
381 | 		case 'x', 'u', 'U':
382 | 			var n int
383 | 			switch r {
384 | 			case 'x':
385 | 				n = 2
386 | 			case 'u':
387 | 				n = 4
388 | 			case 'U':
389 | 				n = 8
390 | 			}
391 | 			var v int32
392 | 			for i := 0; i < n; i++ {
393 | 				r, err := x.next()
394 | 				if err != nil {
395 | 					return 0, false, err
396 | 				}
397 | 				d, ok := hex(r)
398 | 				if !ok {
399 | 					return 0, false, errUnknownEsc
400 | 				}
401 | 				v = (v << 4) | d
402 | 			}
403 | 			// TODO: surrogate halves are also illegal — whatever that is.
404 | 			if v > 0x10FFFF {
405 | 				return 0, false, errors.New("hex escape >0x10FFFF")
406 | 			}
407 | 			return v, true, nil
408 | 		default:
409 | 			if r == delim {
410 | 				return r, true, nil
411 | 			}
412 | 			// For character classes, allow \- as - and \^ as ^.
413 | 			if delim == ']' && (r == '-' || r == '^') {
414 | 				return r, true, nil
415 | 			}
416 | 			return 0, false, errUnknownEsc
417 | 		}
418 | 	default:
419 | 		return r, false, nil
420 | 	}
421 | }
422 | 
423 | func oct(r rune) (int32, bool) {
424 | 	if '0' <= r && r <= '7' {
425 | 		return int32(r) - '0', true
426 | 	}
427 | 	return 0, false
428 | }
429 | 
430 | func hex(r rune) (int32, bool) {
431 | 	if '0' <= r && r <= '9' {
432 | 		return int32(r) - '0', true
433 | 	}
434 | 	if 'a' <= r && r <= 'f' {
435 | 		return int32(r) - 'a' + 10, true
436 | 	}
437 | 	if 'A' <= r && r <= 'F' {
438 | 		return int32(r) - 'A' + 10, true
439 | 	}
440 | 	return 0, false
441 | }
442 | 


--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 The Peggy Authors
 2 | //
 3 | // Use of this source code is governed by a BSD-style
 4 | // license that can be found in the LICENSE file or at
 5 | // https://developers.google.com/open-source/licenses/bsd.
 6 | 
 7 | package main
 8 | 
 9 | import (
10 | 	"bufio"
11 | 	"flag"
12 | 	"fmt"
13 | 	"io"
14 | 	"os"
15 | )
16 | 
17 | //go:generate goyacc -o grammar.go -p "peggy" grammar.y
18 | 
19 | var (
20 | 	out          = flag.String("o", "", "output file path")
21 | 	prefix       = flag.String("p", "_", "identifier prefix")
22 | 	genActions   = flag.Bool("a", true, "generate action parsing")
23 | 	genParseTree = flag.Bool("t", true, "generate parse tree parsing")
24 | 	prettyPrint  = flag.Bool("pretty", false, "don't check or generate, write the grammar without labels or actions")
25 | )
26 | 
27 | func main() {
28 | 	flag.Parse()
29 | 	args := flag.Args()
30 | 
31 | 	in := bufio.NewReader(os.Stdin)
32 | 	file := "<stdin>"
33 | 	if len(args) > 0 {
34 | 		f, err := os.Open(args[0])
35 | 		if err != nil {
36 | 			fmt.Println(err)
37 | 			os.Exit(1)
38 | 		}
39 | 		in = bufio.NewReader(f)
40 | 		file = args[0]
41 | 	}
42 | 
43 | 	g, err := Parse(in, file)
44 | 	if err != nil {
45 | 		fmt.Println(err)
46 | 		os.Exit(1)
47 | 	}
48 | 
49 | 	var w io.Writer = os.Stdout
50 | 	if *out != "" {
51 | 		f, err := os.Create(*out)
52 | 		if err != nil {
53 | 			fmt.Println(err)
54 | 			os.Exit(1)
55 | 		}
56 | 		defer func() {
57 | 			if err := f.Close(); err != nil {
58 | 				fmt.Println(err)
59 | 			}
60 | 		}()
61 | 		w = f
62 | 	}
63 | 	if *prettyPrint {
64 | 		for i := range g.Rules {
65 | 			r := &g.Rules[i]
66 | 			if _, err := io.WriteString(w, r.String()+"\n"); err != nil {
67 | 				fmt.Println(err)
68 | 				os.Exit(1)
69 | 			}
70 | 		}
71 | 		os.Exit(0)
72 | 	}
73 | 	if err := Check(g); err != nil {
74 | 		fmt.Println(err)
75 | 		os.Exit(1)
76 | 	}
77 | 
78 | 	cfg := Config{Prefix: *prefix}
79 | 	if err := cfg.Generate(w, file, g); err != nil {
80 | 		fmt.Println(err)
81 | 		os.Exit(1)
82 | 	}
83 | }
84 | 


--------------------------------------------------------------------------------
/parse_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The Peggy Authors
  2 | //
  3 | // Use of this source code is governed by a BSD-style
  4 | // license that can be found in the LICENSE file or at
  5 | // https://developers.google.com/open-source/licenses/bsd.
  6 | 
  7 | package main
  8 | 
  9 | import (
 10 | 	"errors"
 11 | 	"io"
 12 | 	"regexp"
 13 | 	"strings"
 14 | 	"testing"
 15 | 
 16 | 	"github.com/eaburns/pretty"
 17 | )
 18 | 
 19 | // A ParserTest is a Peggy input-file parser test
 20 | // with a given input and expected string formats.
 21 | type ParserTest struct {
 22 | 	Name  string
 23 | 	Input string
 24 | 	// FullString is the expected fully parenthesized string.
 25 | 	FullString string
 26 | 	// String is the expected regular String string.
 27 | 	// This is the same as Input, but without
 28 | 	// comments and unnecessary whitespace,
 29 | 	// except for a single space, " ",
 30 | 	// separating sub-exprsessions of a sequence,
 31 | 	// and on either side of <-.
 32 | 	String string
 33 | 	// Prelude is the expected file prelude text.
 34 | 	Prelude string
 35 | 	// Error is a regexp string that matches an expected parse error.
 36 | 	Error string
 37 | }
 38 | 
 39 | // ParseTests is a set of tests matching
 40 | // FullString and String outputs with expected outputs for successful parses,
 41 | // and expected parse errors for failed parses.
 42 | // If Input contains a ☹ rune, the io.RuneScanner returns an error on that rune.
 43 | var ParseTests = []ParserTest{
 44 | 	{
 45 | 		Name:       "empty",
 46 | 		Input:      "",
 47 | 		FullString: "",
 48 | 		String:     "",
 49 | 	},
 50 | 	{
 51 | 		Name:       "only whitespace",
 52 | 		Input:      "  \n\n\t    ",
 53 | 		FullString: "",
 54 | 		String:     "",
 55 | 	},
 56 | 	{
 57 | 		Name:       "simple rule",
 58 | 		Input:      "A <- B",
 59 | 		FullString: "A <- (B)",
 60 | 		String:     "A <- B",
 61 | 	},
 62 | 	{
 63 | 		Name:       "named rule",
 64 | 		Input:      `A "name" <- B`,
 65 | 		FullString: `A "name" <- (B)`,
 66 | 		String:     `A "name" <- B`,
 67 | 	},
 68 | 	{
 69 | 		Name:       "named rule, single quotes",
 70 | 		Input:      `A 'name' <- B`,
 71 | 		FullString: `A "name" <- (B)`,
 72 | 		String:     `A "name" <- B`,
 73 | 	},
 74 | 	{
 75 | 		Name:       "named rule, empty name",
 76 | 		Input:      `A "" <- B`,
 77 | 		FullString: `A "" <- (B)`,
 78 | 		String:     `A "" <- B`,
 79 | 	},
 80 | 	{
 81 | 		Name:       "named rule, escapes",
 82 | 		Input:      `A "\t\nabc" <- B`,
 83 | 		FullString: `A "\t\nabc" <- (B)`,
 84 | 		String:     `A "\t\nabc" <- B`,
 85 | 	},
 86 | 	{
 87 | 		Name: "prelude and simple rule",
 88 | 		Input: `{
 89 | package main
 90 | 
 91 | import "fmt"
 92 | 
 93 | func main() { fmt.Println("Hello, World") }
 94 | }
 95 | A <- B`,
 96 | 		FullString: "A <- (B)",
 97 | 		String:     "A <- B",
 98 | 		Prelude: `
 99 | package main
100 | 
101 | import "fmt"
102 | 
103 | func main() { fmt.Println("Hello, World") }
104 | `,
105 | 	},
106 | 	{
107 | 		Name:       "multiple simple rules",
108 | 		Input:      "A <- B\nC <- D",
109 | 		FullString: "A <- (B)\nC <- (D)",
110 | 		String:     "A <- B\nC <- D",
111 | 	},
112 | 	{
113 | 		Name:       "multiple simple rules",
114 | 		Input:      "A <- B\nC <- D",
115 | 		FullString: "A <- (B)\nC <- (D)",
116 | 		String:     "A <- B\nC <- D",
117 | 	},
118 | 	{
119 | 		Name:       "whitespace",
120 | 		Input:      "\tA <- B\n   \n\n    C <- D\t  ",
121 | 		FullString: "A <- (B)\nC <- (D)",
122 | 		String:     "A <- B\nC <- D",
123 | 	},
124 | 	{
125 | 		Name:       "comments",
126 | 		Input:      "# comment\nA <- B # comment\n# comment",
127 | 		FullString: "A <- (B)",
128 | 		String:     "A <- B",
129 | 	},
130 | 
131 | 	// Operands.
132 | 	{
133 | 		Name:       "& pred code",
134 | 		Input:      "A <- &{pred}",
135 | 		FullString: "A <- (&{pred})",
136 | 		String:     "A <- &{…}",
137 | 	},
138 | 	{
139 | 		Name:       "! pred code",
140 | 		Input:      "A <- !{pred}",
141 | 		FullString: "A <- (!{pred})",
142 | 		String:     "A <- !{…}",
143 | 	},
144 | 	{
145 | 		Name:       "any",
146 | 		Input:      "A <- .",
147 | 		FullString: "A <- (.)",
148 | 		String:     "A <- .",
149 | 	},
150 | 	{
151 | 		Name:       "identifier",
152 | 		Input:      "A <- BCD",
153 | 		FullString: "A <- (BCD)",
154 | 		String:     "A <- BCD",
155 | 	},
156 | 	{
157 | 		Name:       "non-ASCII identifier",
158 | 		Input:      "Â <- _αβξ",
159 | 		FullString: "Â <- (_αβξ)",
160 | 		String:     "Â <- _αβξ",
161 | 	},
162 | 	{
163 | 		Name:       "double-quote string",
164 | 		Input:      `A <- "BCD☺"`,
165 | 		FullString: `A <- ("BCD☺")`,
166 | 		String:     `A <- "BCD☺"`,
167 | 	},
168 | 	{
169 | 		Name:       "single-quote string",
170 | 		Input:      `A <- 'BCD☺'`,
171 | 		FullString: `A <- ("BCD☺")`,
172 | 		String:     `A <- "BCD☺"`,
173 | 	},
174 | 	{
175 | 		Name:       "character class",
176 | 		Input:      `A <- [abc\nxyzαβξ1-9A-Z\-]`,
177 | 		FullString: `A <- ([abc\nxyzαβξ1-9A-Z\-])`,
178 | 		String:     `A <- [abc\nxyzαβξ1-9A-Z\-]`,
179 | 	},
180 | 	{
181 | 		Name:       "^ character class",
182 | 		Input:      `A <- [^^abc\nxyzαβξ]`,
183 | 		FullString: `A <- ([^\^abc\nxyzαβξ])`,
184 | 		String:     `A <- [^\^abc\nxyzαβξ]`,
185 | 	},
186 | 	{
187 | 		Name:       "character class, delimiters",
188 | 		Input:      `A <- [[\]]`,
189 | 		FullString: `A <- ([[\]])`,
190 | 		String:     `A <- [[\]]`,
191 | 	},
192 | 	{
193 | 		// ^ should only negate the class if it's at the beginning
194 | 		Name:       "character class, non-first^",
195 | 		Input:      `A <- [abc^]`,
196 | 		FullString: `A <- ([abc\^])`,
197 | 		String:     `A <- [abc\^]`,
198 | 	},
199 | 	{
200 | 		Name:       "character class, escaping",
201 | 		Input:      `A <- [\a] [\b] [\f] [\n] [\r] [\t] [\v] [\\] [\-] [\]] [\101] [\x41] [\u0041] [\U00000041] [\aa\b] [a\ab] [\^]`,
202 | 		FullString: `A <- ((((((((((((((((([\a]) ([\b])) ([\f])) ([\n])) ([\r])) ([\t])) ([\v])) ([\\])) ([\-])) ([\]])) ([A])) ([A])) ([A])) ([A])) ([\aa\b])) ([a\ab])) ([\^]))`,
203 | 		String:     `A <- [\a] [\b] [\f] [\n] [\r] [\t] [\v] [\\] [\-] [\]] [A] [A] [A] [A] [\aa\b] [a\ab] [\^]`,
204 | 	},
205 | 
206 | 	// Associativity.
207 | 	{
208 | 		Name:       "choice associativity",
209 | 		Input:      "A <- B/C/D",
210 | 		FullString: "A <- (((B)/(C))/(D))",
211 | 		String:     "A <- B/C/D",
212 | 	},
213 | 	{
214 | 		Name:       "sequence associativity",
215 | 		Input:      "A <- B C D",
216 | 		FullString: "A <- (((B) (C)) (D))",
217 | 		String:     "A <- B C D",
218 | 	},
219 | 
220 | 	// Precedence.
221 | 	{
222 | 		Name:       "various precedences",
223 | 		Input:      "A <- x:B*+ C?/(!D y:&E)* {return 0}/F !{p}",
224 | 		FullString: "A <- ((((x:(((B)*)+)) ((C)?))/((((!(D)) (y:(&(E))))*) {return 0}))/((F) (!{p})))",
225 | 		String:     "A <- x:B*+ C?/(!D y:&E)* {…}/F !{…}",
226 | 	},
227 | 	{
228 | 		Name:       "action < choice",
229 | 		Input:      "A <- B { return 0 }/C { return 0 }",
230 | 		FullString: "A <- (((B) { return 0 })/((C) { return 0 }))",
231 | 		String:     "A <- B {…}/C {…}",
232 | 	},
233 | 	{
234 | 		Name:       "sequence < action",
235 | 		Input:      "A <- B C { return 0 }",
236 | 		FullString: "A <- (((B) (C)) { return 0 })",
237 | 		String:     "A <- B C {…}",
238 | 	},
239 | 	{
240 | 		Name:       "label < sequence",
241 | 		Input:      "A <- s:A t:B",
242 | 		FullString: "A <- ((s:(A)) (t:(B)))",
243 | 		String:     "A <- s:A t:B",
244 | 	},
245 | 	{
246 | 		Name:       "pred < label",
247 | 		Input:      "A <- s:!A t:&B",
248 | 		FullString: "A <- ((s:(!(A))) (t:(&(B))))",
249 | 		String:     "A <- s:!A t:&B",
250 | 	},
251 | 	{
252 | 		Name:       "rep < pred",
253 | 		Input:      "A <- !A* &B+ !C?",
254 | 		FullString: "A <- (((!((A)*)) (&((B)+))) (!((C)?)))",
255 | 		String:     "A <- !A* &B+ !C?",
256 | 	},
257 | 	{
258 | 		Name: "operand < rep",
259 | 		Input: `A <- (a/b c)*
260 | B <- &{pred}*
261 | C <- !{pred}*
262 | D <- .*
263 | E <- Z*
264 | F <- "cde"*
265 | G <- [fgh]*`,
266 | 		FullString: `A <- (((a)/((b) (c)))*)
267 | B <- ((&{pred})*)
268 | C <- ((!{pred})*)
269 | D <- ((.)*)
270 | E <- ((Z)*)
271 | F <- (("cde")*)
272 | G <- (([fgh])*)`,
273 | 		String: `A <- (a/b c)*
274 | B <- &{…}*
275 | C <- !{…}*
276 | D <- .*
277 | E <- Z*
278 | F <- "cde"*
279 | G <- [fgh]*`,
280 | 	},
281 | 
282 | 	// Templates
283 | 	{
284 | 		Name:       "1-ary template rule",
285 | 		Input:      `A<x> <- x`,
286 | 		FullString: `A<x> <- (x)`,
287 | 		String:     `A<x> <- x`,
288 | 	},
289 | 	{
290 | 		Name:       "3-ary template rule",
291 | 		Input:      `A<x, y, z> <- x y z`,
292 | 		FullString: `A<x, y, z> <- (((x) (y)) (z))`,
293 | 		String:     `A<x, y, z> <- x y z`,
294 | 	},
295 | 	{
296 | 		Name:       "1-ary template invocation",
297 | 		Input:      `A <- B<x> C`,
298 | 		FullString: `A <- ((B<x>) (C))`,
299 | 		String:     `A <- B<x> C`,
300 | 	},
301 | 	{
302 | 		Name:       "3-ary template invocation",
303 | 		Input:      `A <- B<x, y, z> C`,
304 | 		FullString: `A <- ((B<x, y, z>) (C))`,
305 | 		String:     `A <- B<x, y, z> C`,
306 | 	},
307 | 
308 | 	// Rune escaping
309 | 	{
310 | 		Name:       `escape \a`,
311 | 		Input:      `A <- "\a"`,
312 | 		FullString: `A <- ("\a")`,
313 | 		String:     `A <- "\a"`,
314 | 	},
315 | 	{
316 | 		Name:       `escape \b`,
317 | 		Input:      `A <- "\b"`,
318 | 		FullString: `A <- ("\b")`,
319 | 		String:     `A <- "\b"`,
320 | 	},
321 | 	{
322 | 		Name:       `escape \f`,
323 | 		Input:      `A <- "\f"`,
324 | 		FullString: `A <- ("\f")`,
325 | 		String:     `A <- "\f"`,
326 | 	},
327 | 	{
328 | 		Name:       `escape \n`,
329 | 		Input:      `A <- "\n"`,
330 | 		FullString: `A <- ("\n")`,
331 | 		String:     `A <- "\n"`,
332 | 	},
333 | 	{
334 | 		Name:       `escape \r`,
335 | 		Input:      `A <- "\r"`,
336 | 		FullString: `A <- ("\r")`,
337 | 		String:     `A <- "\r"`,
338 | 	},
339 | 	{
340 | 		Name:       `escape \t`,
341 | 		Input:      `A <- "\t"`,
342 | 		FullString: `A <- ("\t")`,
343 | 		String:     `A <- "\t"`,
344 | 	},
345 | 	{
346 | 		Name:       `escape \v`,
347 | 		Input:      `A <- "\v"`,
348 | 		FullString: `A <- ("\v")`,
349 | 		String:     `A <- "\v"`,
350 | 	},
351 | 	{
352 | 		Name:       `escape \\`,
353 | 		Input:      `A <- "\\"`,
354 | 		FullString: `A <- ("\\")`,
355 | 		String:     `A <- "\\"`,
356 | 	},
357 | 	{
358 | 		Name:       `escape \"`,
359 | 		Input:      `A <- "\""`,
360 | 		FullString: `A <- ("\"")`,
361 | 		String:     `A <- "\""`,
362 | 	},
363 | 	{
364 | 		Name:       `escape \'`,
365 | 		Input:      `A <- '\''`,
366 | 		FullString: `A <- ("'")`,
367 | 		String:     `A <- "'"`,
368 | 	},
369 | 	{
370 | 		Name:       `escape \000`,
371 | 		Input:      `A <- "\000"`,
372 | 		FullString: `A <- ("\x00")`,
373 | 		String:     `A <- "\x00"`,
374 | 	},
375 | 	{
376 | 		Name:       `escape \101 (A)`,
377 | 		Input:      `A <- "\101"`,
378 | 		FullString: `A <- ("A")`,
379 | 		String:     `A <- "A"`,
380 | 	},
381 | 	{
382 | 		Name:       `escape \101BCD`,
383 | 		Input:      `A <- "\101BCD"`,
384 | 		FullString: `A <- ("ABCD")`,
385 | 		String:     `A <- "ABCD"`,
386 | 	},
387 | 	{
388 | 		Name:       `escape \377 (255)`,
389 | 		Input:      `A <- "\377"`,
390 | 		FullString: `A <- ("ÿ")`, // \xFF
391 | 		String:     `A <- "ÿ"`,
392 | 	},
393 | 	{
394 | 		Name:  `escape \400 (256)`,
395 | 		Input: `A <- "\400"`,
396 | 		Error: "^test.file:1.6,1.11:.*>255",
397 | 	},
398 | 	{
399 | 		Name:  `escape \400 (256)`,
400 | 		Input: `A <- "xyz\400"`,
401 | 		// TODO: report the correct error location.
402 | 		Error: "^test.file:1.6,1.14:.*>255",
403 | 	},
404 | 	{
405 | 		Name:  `escape \4`,
406 | 		Input: `A <- "\4"`,
407 | 		Error: "^test.file:1.6,1.10: unknown escape sequence",
408 | 	},
409 | 	{
410 | 		Name:  `escape \40`,
411 | 		Input: `A <- "\40"`,
412 | 		Error: "^test.file:1.6,1.11: unknown escape sequence",
413 | 	},
414 | 	{
415 | 		Name:       `escape \x00`,
416 | 		Input:      `A <- "\x00"`,
417 | 		FullString: `A <- ("\x00")`,
418 | 		String:     `A <- "\x00"`,
419 | 	},
420 | 	{
421 | 		Name:       `escape \x41 (A)`,
422 | 		Input:      `A <- "\x41"`,
423 | 		FullString: `A <- ("A")`,
424 | 		String:     `A <- "A"`,
425 | 	},
426 | 	{
427 | 		Name:       `escape \x41BCD`,
428 | 		Input:      `A <- "\x41BCD"`,
429 | 		FullString: `A <- ("ABCD")`,
430 | 		String:     `A <- "ABCD"`,
431 | 	},
432 | 	{
433 | 		Name:       `escape \xFF`,
434 | 		Input:      `A <- "\xFF"`,
435 | 		FullString: `A <- ("ÿ")`, // \xFF
436 | 		String:     `A <- "ÿ"`,
437 | 	},
438 | 	{
439 | 		Name:  `escape \xF`,
440 | 		Input: `A <- "\xF"`,
441 | 		Error: "^test.file:1.6,1.11: unknown escape sequence",
442 | 	},
443 | 	{
444 | 		Name:       `escape \u0000`,
445 | 		Input:      `A <- "\u0000"`,
446 | 		FullString: `A <- ("\x00")`,
447 | 		String:     `A <- "\x00"`,
448 | 	},
449 | 	{
450 | 		Name:       `escape \u0041 (A)`,
451 | 		Input:      `A <- "\u0041"`,
452 | 		FullString: `A <- ("A")`,
453 | 		String:     `A <- "A"`,
454 | 	},
455 | 	{
456 | 		Name:       `escape \u0041BCD`,
457 | 		Input:      `A <- "\u0041BCD"`,
458 | 		FullString: `A <- ("ABCD")`,
459 | 		String:     `A <- "ABCD"`,
460 | 	},
461 | 	{
462 | 		Name:       `escape \u263A (☺)`,
463 | 		Input:      `A <- "\u263A"`,
464 | 		FullString: `A <- ("☺")`,
465 | 		String:     `A <- "☺"`,
466 | 	},
467 | 	{
468 | 		Name:       `escape \u263a (☺)`,
469 | 		Input:      `A <- "\u263a"`,
470 | 		FullString: `A <- ("☺")`,
471 | 		String:     `A <- "☺"`,
472 | 	},
473 | 	{
474 | 		Name:  `escape \uF`,
475 | 		Input: `A <- "\xF"`,
476 | 		Error: "^test.file:1.6,1.11: unknown escape sequence",
477 | 	},
478 | 	{
479 | 		Name:  `escape \uFF`,
480 | 		Input: `A <- "\uFF"`,
481 | 		Error: "^test.file:1.6,1.12: unknown escape sequence",
482 | 	},
483 | 	{
484 | 		Name:  `escape \uFFF`,
485 | 		Input: `A <- "\uFFF"`,
486 | 		Error: "^test.file:1.6,1.13: unknown escape sequence",
487 | 	},
488 | 	{
489 | 		Name:       `escape \U00000000`,
490 | 		Input:      `A <- "\U00000000"`,
491 | 		FullString: `A <- ("\x00")`,
492 | 		String:     `A <- "\x00"`,
493 | 	},
494 | 	{
495 | 		Name:       `escape \U00000041 (A)`,
496 | 		Input:      `A <- "\U00000041"`,
497 | 		FullString: `A <- ("A")`,
498 | 		String:     `A <- "A"`,
499 | 	},
500 | 	{
501 | 		Name:       `escape \U00000041BCD`,
502 | 		Input:      `A <- "\U00000041BCD"`,
503 | 		FullString: `A <- ("ABCD")`,
504 | 		String:     `A <- "ABCD"`,
505 | 	},
506 | 	{
507 | 		Name:       `escape \U0000263A (☺)`,
508 | 		Input:      `A <- "\U0000263A"`,
509 | 		FullString: `A <- ("☺")`,
510 | 		String:     `A <- "☺"`,
511 | 	},
512 | 	{
513 | 		Name:       `escape \U0000263a (☺)`,
514 | 		Input:      `A <- "\U0000263a"`,
515 | 		FullString: `A <- ("☺")`,
516 | 		String:     `A <- "☺"`,
517 | 	},
518 | 	{
519 | 		Name:       `escape \U0010FFFF`,
520 | 		Input:      `A <- "\U0010FFFF"`,
521 | 		FullString: `A <- ("\U0010ffff")`,
522 | 		String:     `A <- "\U0010ffff"`,
523 | 	},
524 | 	{
525 | 		Name:  `escape \U00110000`,
526 | 		Input: `A <- "\U00110000"`,
527 | 		Error: "^test.file:1.6,1.17:.*>0x10FFFF",
528 | 	},
529 | 	{
530 | 		Name:  `escape \UF`,
531 | 		Input: `A <- "\UF"`,
532 | 		Error: "^test.file:1.6,1.11: unknown escape sequence",
533 | 	},
534 | 	{
535 | 		Name:  `escape \UFF`,
536 | 		Input: `A <- "\UFF"`,
537 | 		Error: "^test.file:1.6,1.12: unknown escape sequence",
538 | 	},
539 | 	{
540 | 		Name:  `escape \UFFF`,
541 | 		Input: `A <- "\UFFF"`,
542 | 		Error: "^test.file:1.6,1.13: unknown escape sequence",
543 | 	},
544 | 	{
545 | 		Name:  `escape \UFFFF`,
546 | 		Input: `A <- "\UFFFF"`,
547 | 		Error: "^test.file:1.6,1.14: unknown escape sequence",
548 | 	},
549 | 	{
550 | 		Name:  `escape \UFFFFF`,
551 | 		Input: `A <- "\UFFFFF"`,
552 | 		Error: "^test.file:1.6,1.15: unknown escape sequence",
553 | 	},
554 | 	{
555 | 		Name:  `escape \UFFFFFF`,
556 | 		Input: `A <- "\UFFFFFF"`,
557 | 		Error: "^test.file:1.6,1.16: unknown escape sequence",
558 | 	},
559 | 	{
560 | 		Name:  `escape \UFFFFFFF`,
561 | 		Input: `A <- "\UFFFFFFF"`,
562 | 		Error: "^test.file:1.6,1.17: unknown escape sequence",
563 | 	},
564 | 	{
565 | 		Name:       `string with multiple escapes`,
566 | 		Input:      `A <- "x\a\b\f\n\r\t\v\\\"\000\x00\u0000\U00000000☺"`,
567 | 		FullString: `A <- ("x\a\b\f\n\r\t\v\\\"\x00\x00\x00\x00☺")`,
568 | 		String:     `A <- "x\a\b\f\n\r\t\v\\\"\x00\x00\x00\x00☺"`,
569 | 	},
570 | 	{
571 | 		Name:  `unknown escape`,
572 | 		Input: `A <- "\z"`,
573 | 		Error: "^test.file:1.6,1.9: unknown escape sequence",
574 | 	},
575 | 	{
576 | 		Name:  `escape eof`,
577 | 		Input: `A <- "\`,
578 | 		Error: `^test.file:1.6,1.8: unclosed "`,
579 | 	},
580 | 
581 | 	// Whitespace.
582 | 	// BUG: The current YACC grammar
583 | 	// doesn't allow whitespace between all tokens,
584 | 	// but only particular tokens.
585 | 	// Specifically whitespace can only appear after
586 | 	// delimiters after which a new rule cannot begin.
587 | 	// This is because, in order to remain LALR(1),
588 | 	// a newline terminates a sequence expression,
589 | 	// denoting that the next identifier is a rule name.
590 | 	{
591 | 		Name: `after <-`,
592 | 		Input: `A <-
593 | 		"a"
594 | 
595 | 		B <- #comment
596 | 		"b"
597 | 
598 | 		C "c" <-
599 | 		"c"
600 | 
601 | 		D "d" <- #comment
602 | 		"d"`,
603 | 		FullString: `A <- ("a")
604 | B <- ("b")
605 | C "c" <- ("c")
606 | D "d" <- ("d")`,
607 | 		String: `A <- "a"
608 | B <- "b"
609 | C "c" <- "c"
610 | D "d" <- "d"`,
611 | 	},
612 | 	{
613 | 		Name: `after /`,
614 | 		Input: `A <- B /
615 | 		C / # comment
616 | 		D`,
617 | 		FullString: `A <- (((B)/(C))/(D))`,
618 | 		String:     `A <- B/C/D`,
619 | 	},
620 | 	{
621 | 		Name: `after : label`,
622 | 		Input: `A <- l:
623 | 		B m: #comment
624 | 		C`,
625 | 		FullString: `A <- ((l:(B)) (m:(C)))`,
626 | 		String:     `A <- l:B m:C`,
627 | 	},
628 | 	{
629 | 		Name: `after & predicate`,
630 | 		Input: `A <- &
631 | 		B & #comment
632 | 		C`,
633 | 		FullString: `A <- ((&(B)) (&(C)))`,
634 | 		String:     `A <- &B &C`,
635 | 	},
636 | 	{
637 | 		Name: `after ! predicate`,
638 | 		Input: `A <- !
639 | 		B ! #comment
640 | 		C`,
641 | 		FullString: `A <- ((!(B)) (!(C)))`,
642 | 		String:     `A <- !B !C`,
643 | 	},
644 | 	{
645 | 		Name: `after (`,
646 | 		Input: `A <- (
647 | 		B ( #comment
648 | 		C))`,
649 | 		FullString: `A <- ((B) (C))`,
650 | 		String:     `A <- (B (C))`,
651 | 	},
652 | 	{
653 | 		Name: `before )`,
654 | 		Input: `A <- (B (C
655 | 		) #comment
656 | 		)`,
657 | 		FullString: `A <- ((B) (C))`,
658 | 		String:     `A <- (B (C))`,
659 | 	},
660 | 	{
661 | 		Name: `after & code`,
662 | 		Input: `A <- &
663 | 		{code} & #comment
664 | 		{CODE}`,
665 | 		FullString: `A <- ((&{code}) (&{CODE}))`,
666 | 		String:     `A <- &{…} &{…}`,
667 | 	},
668 | 	{
669 | 		Name: `after ! code`,
670 | 		Input: `A <- !
671 | 		{code} ! #comment
672 | 		{CODE}`,
673 | 		FullString: `A <- ((!{code}) (!{CODE}))`,
674 | 		String:     `A <- !{…} !{…}`,
675 | 	},
676 | 
677 | 	// Systax errors.
678 | 	{
679 | 		Name:  "bad rule name",
680 | 		Input: "\n\t\t&",
681 | 		Error: "^test.file:2.3,2.4:",
682 | 	},
683 | 	{
684 | 		Name:  "missing <-",
685 | 		Input: "\nA B",
686 | 		Error: "^test.file:2.3,2.4:",
687 | 	},
688 | 	{
689 | 		Name:  "bad <-",
690 | 		Input: "\nA <~ C",
691 | 		Error: "^test.file:2.4,2.5:",
692 | 	},
693 | 	{
694 | 		Name:  "missing expr",
695 | 		Input: "\nA <-",
696 | 		Error: "^test.file:2.5:",
697 | 	},
698 | 	{
699 | 		Name:  "unexpected rune",
700 | 		Input: "\nA <- C ☺",
701 | 		Error: "^test.file:2.8,2.9:",
702 | 	},
703 | 	{
704 | 		Name:  "unclosed (",
705 | 		Input: "\nA <- (B",
706 | 		Error: "^test.file:2.8:",
707 | 	},
708 | 	{
709 | 		Name:  "unclosed '",
710 | 		Input: "\nA <- 'B",
711 | 		Error: "^test.file:2.6,2.8: unclosed '",
712 | 	},
713 | 	{
714 | 		Name:  `unclosed "`,
715 | 		Input: "\nA <- \"B",
716 | 		Error: "^test.file:2.6,2.8: unclosed \"",
717 | 	},
718 | 	{
719 | 		Name:  `unclosed {`,
720 | 		Input: "\nA <- B { code",
721 | 		Error: "^test.file:2.8,2.14: unclosed {",
722 | 	},
723 | 	{
724 | 		Name:  `unclosed spans lines`,
725 | 		Input: "\nA <- \"B\n\nC",
726 | 		Error: "^test.file:2.6,4.2: unclosed \"",
727 | 	},
728 | 	{
729 | 		Name:  "unclosed [",
730 | 		Input: "\nA <- [B",
731 | 		Error: "^test.file:2.6,2.8: unclosed [[]",
732 | 	},
733 | 	{
734 | 		Name:  "character class empty",
735 | 		Input: "\nA <- []",
736 | 		Error: "^test.file:2.6,2.8: bad char class: empty",
737 | 	},
738 | 	{
739 | 		Name:  "character class starts with span",
740 | 		Input: "\nA <- [-9]",
741 | 		Error: "^test.file:2.7,2.9: bad span",
742 | 	},
743 | 	{
744 | 		Name:  "character class no span start",
745 | 		Input: "\nA <- [1-3-9]",
746 | 		Error: "^test.file:2.10,2.12: bad span",
747 | 	},
748 | 	{
749 | 		Name:  "character class ends with span",
750 | 		Input: "\nA <- [0-]",
751 | 		Error: "^test.file:2.7,2.9: bad span",
752 | 	},
753 | 	{
754 | 		Name:  "character class inverted span",
755 | 		Input: "\nA <- [9-0]",
756 | 		Error: "^test.file:2.7,2.10: bad span",
757 | 	},
758 | 	{
759 | 		Name:  "character class span after span",
760 | 		Input: "\nA <- [^0-9abcA-Zz-a]",
761 | 		Error: "^test.file:2.17,2.20: bad span",
762 | 	},
763 | 	{
764 | 		Name:  "character class bad span after rune",
765 | 		Input: "\nA <- [^0-9abcZ-A]",
766 | 		Error: "^test.file:2.14,2.17: bad span",
767 | 	},
768 | 
769 | 	// Go syntax errors.
770 | 	{
771 | 		Name:  `bad prelude`,
772 | 		Input: "{ not package line }\nA <- B",
773 | 		Error: "^test.file:1.3",
774 | 	},
775 | 	{
776 | 		Name: `bad multi-line prelude`,
777 | 		Input: `{
778 | package main
779 | 
780 | import "fmt"
781 | 
782 | // Missing open paren.
783 | func main() { fmt.Println"Hello, World") }
784 | }
785 | A <- B`,
786 | 		Error: "^test.file:7.26",
787 | 	},
788 | 	{
789 | 		Name: `bad bool expression`,
790 | 		// = instead of ==.
791 | 		Input: "\nA <- &{ x = z}",
792 | 		Error: "^test.file:2.11",
793 | 	},
794 | 	{
795 | 		Name: `bad multi-line bool expression`,
796 | 		// Missing the closed paren on p(.
797 | 		Input: "\nA <- &{ x == \n p(y, z, h}",
798 | 		Error: "^test.file:3.11",
799 | 	},
800 | 	{
801 | 		Name:  `bad action`,
802 | 		Input: "A <- B { if ( }",
803 | 		Error: "^test.file:1.15",
804 | 	},
805 | 	{
806 | 		Name: `bad multi-line action`,
807 | 		Input: "\nA <- B {\n	if ( }",
808 | 		Error: "^test.file:3.7",
809 | 	},
810 | 	{
811 | 		Name:  `bad action: invalid nested func def`,
812 | 		Input: "\nA <- B { func f() int { return 1 } }",
813 | 		Error: "^test.file:2.15",
814 | 	},
815 | 	{
816 | 		Name:       `action with nested return`,
817 | 		Input:      "A <- B { if true { return 0 } else { return 1 } }",
818 | 		FullString: "A <- ((B) { if true { return 0 } else { return 1 } })",
819 | 		String:     "A <- B {…}",
820 | 	},
821 | 	{
822 | 		Name:  `missing return`,
823 | 		Input: "A <- B { }",
824 | 		Error: "^test.file:1.9: no return statement",
825 | 	},
826 | 	{
827 | 		Name:  `multi-value return`,
828 | 		Input: "A <- B { return 1, 2, 3 }",
829 | 		Error: "^test.file:1.9: must return exactly one value",
830 | 	},
831 | 	{
832 | 		Name:  `non-conversion multi-ary function return`,
833 | 		Input: "A <- B { return f(a, b, c) }",
834 | 		Error: "^test.file:1.9: cannot infer type",
835 | 	},
836 | 	{
837 | 		Name:  `non-conversion nil-ary function return`,
838 | 		Input: "A <- B { return f() }",
839 | 		Error: "^test.file:1.9: cannot infer type",
840 | 	},
841 | 	{
842 | 		Name:  `non-conversion function return`,
843 | 		Input: "A <- B { return f(a, b, c) }",
844 | 		Error: "^test.file:1.9: cannot infer type",
845 | 	},
846 | 
847 | 	// I/O errors.
848 | 	{
849 | 		Name:  "only I/O error",
850 | 		Input: "☹",
851 | 		Error: testIOError,
852 | 	},
853 | 	{
854 | 		Name:  "comment I/O error",
855 | 		Input: "#☹",
856 | 		Error: testIOError,
857 | 	},
858 | 	{
859 | 		Name:  "ident I/O error",
860 | 		Input: "A☹",
861 | 		Error: testIOError,
862 | 	},
863 | 	{
864 | 		Name:  "arrow I/O error",
865 | 		Input: "A <☹",
866 | 		Error: testIOError,
867 | 	},
868 | 	{
869 | 		Name:  "code I/O error",
870 | 		Input: "A <- B { ☹",
871 | 		Error: testIOError,
872 | 	},
873 | 	{
874 | 		Name:  "char class I/O error",
875 | 		Input: "A <- [☹",
876 | 		Error: testIOError,
877 | 	},
878 | 	{
879 | 		Name:  "double-quoted string I/O error",
880 | 		Input: "A <- \"☹",
881 | 		Error: testIOError,
882 | 	},
883 | 	{
884 | 		Name:  "single-quoted string I/O error",
885 | 		Input: "A <- '☹",
886 | 		Error: testIOError,
887 | 	},
888 | }
889 | 
890 | func TestParse(t *testing.T) {
891 | 	for _, test := range ParseTests {
892 | 		test := test
893 | 		t.Run(test.Name, func(t *testing.T) {
894 | 			t.Parallel()
895 | 			in := testRuneScanner{strings.NewReader(test.Input)}
896 | 			g, err := Parse(in, "test.file")
897 | 
898 | 			if test.Error != "" {
899 | 				if err == nil {
900 | 					t.Log(pretty.String(g.Rules))
901 | 					t.Errorf("Parse(%q) ok, but expected error matching %q",
902 | 						test.Input, test.Error)
903 | 					return
904 | 				}
905 | 				re := regexp.MustCompile(test.Error)
906 | 				if !re.MatchString(err.Error()) {
907 | 					t.Errorf("Parse(%q) err=%q, but expected to match %q",
908 | 						test.Input, err.Error(), test.Error)
909 | 					return
910 | 				}
911 | 				return
912 | 			}
913 | 
914 | 			if err != nil {
915 | 				t.Errorf("Parse(%q) failed: %s", test.Input, err)
916 | 				return
917 | 			}
918 | 			var pre string
919 | 			if g.Prelude != nil {
920 | 				pre = g.Prelude.String()
921 | 			}
922 | 			if pre != test.Prelude {
923 | 				t.Errorf("Parse(%q).Prelude=\n%s\nwant:\n%s",
924 | 					test.Input, pre, test.Prelude)
925 | 				return
926 | 			}
927 | 			if s := FullString(g.Rules); s != test.FullString {
928 | 				t.Errorf("Parse(%q)\nfull string:\n%q\nwant:\n%q",
929 | 					test.Input, s, test.FullString)
930 | 				return
931 | 			}
932 | 			if s := String(g.Rules); s != test.String {
933 | 				t.Errorf("Parse(%q)\nstring:\n%q\nwant:\n%q",
934 | 					test.Input, s, test.String)
935 | 				return
936 | 			}
937 | 		})
938 | 	}
939 | }
940 | 
941 | // testRuneScanner implements io.RuneScanner, wrapping another RuneScanner,
942 | // however, whenever the original scanner would've returned a ☹ rune,
943 | // testRuneScanner instead returns an error.
944 | type testRuneScanner struct {
945 | 	io.RuneScanner
946 | }
947 | 
948 | const testIOError = "test I/O error"
949 | 
950 | func (rs testRuneScanner) ReadRune() (rune, int, error) {
951 | 	r, n, err := rs.RuneScanner.ReadRune()
952 | 	if r == '☹' {
953 | 		return 0, 0, errors.New(testIOError)
954 | 	}
955 | 	return r, n, err
956 | }
957 | 


--------------------------------------------------------------------------------
/peg/fail.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2018 The Peggy Authors
  2 | //
  3 | // Use of this source code is governed by a BSD-style
  4 | // license that can be found in the LICENSE file or at
  5 | // https://developers.google.com/open-source/licenses/bsd.
  6 | 
  7 | package peg
  8 | 
  9 | import "fmt"
 10 | 
 11 | // SimpleError returns an error with a basic error message
 12 | // that describes what was expected at all of the leaf fails
 13 | // with the greatest position in the tree.
 14 | //
 15 | // The FilePath field of the returned Error is the empty string.
 16 | // The caller can set this field if to prefix the location
 17 | // with the path to an input file.
 18 | func SimpleError(text string, node *Fail) Error {
 19 | 	leaves := LeafFails(node)
 20 | 
 21 | 	var want string
 22 | 	for i, l := range leaves {
 23 | 		switch {
 24 | 		case i == len(leaves)-1 && i == 1:
 25 | 			want += " or "
 26 | 		case i == len(leaves)-1 && len(want) > 1:
 27 | 			want += ", or "
 28 | 		case i > 0:
 29 | 			want += ", "
 30 | 		}
 31 | 		want += l.Want
 32 | 	}
 33 | 
 34 | 	got := "EOF"
 35 | 	pos := leaves[0].Pos
 36 | 	if pos < len(text) {
 37 | 		end := pos + 10
 38 | 		if end > len(text) {
 39 | 			end = len(text)
 40 | 		}
 41 | 		got = "'" + text[pos:end] + "'"
 42 | 	}
 43 | 
 44 | 	return Error{
 45 | 		Loc:     Location(text, pos),
 46 | 		Message: fmt.Sprintf("want %s; got %s", want, got),
 47 | 	}
 48 | }
 49 | 
 50 | // Error implements error, prefixing an error message
 51 | // with location information for the error.
 52 | type Error struct {
 53 | 	// FilePath is the path of the input file containing the error.
 54 | 	FilePath string
 55 | 	// Loc is the location of the error.
 56 | 	Loc Loc
 57 | 	// Message is the error message.
 58 | 	Message string
 59 | }
 60 | 
 61 | func (err Error) Error() string {
 62 | 	return fmt.Sprintf("%s:%d.%d: %s",
 63 | 		err.FilePath, err.Loc.Line, err.Loc.Column, err.Message)
 64 | }
 65 | 
 66 | // LeafFails returns all fails in the tree with the greatest Pos.
 67 | func LeafFails(node *Fail) []*Fail {
 68 | 	pos := -1
 69 | 	var fails []*Fail
 70 | 	seen := make(map[*Fail]bool)
 71 | 	var walk func(*Fail)
 72 | 	walk = func(n *Fail) {
 73 | 		if seen[n] {
 74 | 			return
 75 | 		}
 76 | 		seen[n] = true
 77 | 		if len(n.Kids) == 0 {
 78 | 			switch {
 79 | 			case n.Pos > pos:
 80 | 				pos = n.Pos
 81 | 				fails = append(fails[:0], n)
 82 | 			case n.Pos == pos:
 83 | 				fails = append(fails, n)
 84 | 			}
 85 | 			return
 86 | 		}
 87 | 		for _, k := range n.Kids {
 88 | 			walk(k)
 89 | 		}
 90 | 	}
 91 | 	walk(node)
 92 | 	return fails
 93 | }
 94 | 
 95 | // DedupFails removes duplicate fail branches from the tree,
 96 | // keeping only the first occurrence of each.
 97 | // This is useful for example before printing the Fail tree,
 98 | // because the non-deduped Fail tree can be exponential
 99 | // in the input size.
100 | func DedupFails(node *Fail) {
101 | 	seen := make(map[*Fail]bool)
102 | 	var walk func(*Fail) bool
103 | 	walk = func(n *Fail) bool {
104 | 		if seen[n] {
105 | 			return false
106 | 		}
107 | 		seen[n] = true
108 | 		var kids []*Fail
109 | 		for _, k := range n.Kids {
110 | 			if walk(k) {
111 | 				kids = append(kids, k)
112 | 			}
113 | 		}
114 | 		n.Kids = kids
115 | 		return true
116 | 	}
117 | 	walk(node)
118 | }
119 | 


--------------------------------------------------------------------------------
/peg/fail_test.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2018 The Peggy Authors
  2 | //
  3 | // Use of this source code is governed by a BSD-style
  4 | // license that can be found in the LICENSE file or at
  5 | // https://developers.google.com/open-source/licenses/bsd.
  6 | 
  7 | package peg
  8 | 
  9 | import (
 10 | 	"reflect"
 11 | 	"testing"
 12 | 
 13 | 	"github.com/eaburns/pretty"
 14 | )
 15 | 
 16 | func TestDedupFails(t *testing.T) {
 17 | 	x := &Fail{Name: "x"}
 18 | 	z := &Fail{Name: "z"}
 19 | 	y := &Fail{Name: "y", Kids: []*Fail{z, z}}
 20 | 	root := &Fail{
 21 | 		Kids: []*Fail{
 22 | 			x,
 23 | 			&Fail{
 24 | 				Kids: []*Fail{
 25 | 					y,
 26 | 					y,
 27 | 				},
 28 | 			},
 29 | 			x,
 30 | 		},
 31 | 	}
 32 | 	DedupFails(root)
 33 | 	want := &Fail{
 34 | 		Kids: []*Fail{
 35 | 			&Fail{Name: "x"},
 36 | 			&Fail{
 37 | 				Kids: []*Fail{
 38 | 					&Fail{
 39 | 						Name: "y",
 40 | 						Kids: []*Fail{
 41 | 							&Fail{Name: "z"},
 42 | 						},
 43 | 					},
 44 | 				},
 45 | 			},
 46 | 		},
 47 | 	}
 48 | 	if !reflect.DeepEqual(root, want) {
 49 | 		t.Errorf("DedupFails()=%v, want %v",
 50 | 			pretty.String(root), pretty.String(want))
 51 | 	}
 52 | }
 53 | 
 54 | func TestLeafFails(t *testing.T) {
 55 | 	x0 := &Fail{Name: "x0", Pos: 10}
 56 | 	x1 := &Fail{Name: "x1", Pos: 10}
 57 | 	y0 := &Fail{Name: "y0", Pos: 15}
 58 | 	y1 := &Fail{Name: "y1", Pos: 15}
 59 | 	z0 := &Fail{Name: "z0", Pos: 20}
 60 | 	z1 := &Fail{Name: "z1", Pos: 20}
 61 | 
 62 | 	root := &Fail{
 63 | 		Kids: []*Fail{
 64 | 			x0,
 65 | 			y0,
 66 | 			z0,
 67 | 			&Fail{
 68 | 				Kids: []*Fail{
 69 | 					x1,
 70 | 					y1,
 71 | 					z1,
 72 | 					z0,
 73 | 				},
 74 | 			},
 75 | 			z1,
 76 | 			x0,
 77 | 			y1,
 78 | 		},
 79 | 	}
 80 | 
 81 | 	got := LeafFails(root)
 82 | 	want := []*Fail{z0, z1}
 83 | 	if !reflect.DeepEqual(got, want) {
 84 | 		t.Errorf("LeafFails()=%s, want %s",
 85 | 			pretty.String(got), pretty.String(want))
 86 | 	}
 87 | }
 88 | 
 89 | func TestSimpleError_1(t *testing.T) {
 90 | 	text := "123456789\nabcdefg"
 91 | 	root := &Fail{
 92 | 		Kids: []*Fail{
 93 | 			&Fail{Pos: 10, Want: "A"},
 94 | 		},
 95 | 	}
 96 | 	err := SimpleError(text, root)
 97 | 	want := ":2.1: want A; got 'abcdefg'"
 98 | 	if err.Error() != want {
 99 | 		t.Errorf("err.Error()=%q, want %q", err.Error(), want)
100 | 	}
101 | }
102 | 
103 | func TestSimpleError_2(t *testing.T) {
104 | 	text := "123456789\nabcdefg"
105 | 	root := &Fail{
106 | 		Kids: []*Fail{
107 | 			&Fail{Pos: 10, Want: "A"},
108 | 			&Fail{Pos: 10, Want: "B"},
109 | 		},
110 | 	}
111 | 	err := SimpleError(text, root)
112 | 	want := ":2.1: want A or B; got 'abcdefg'"
113 | 	if err.Error() != want {
114 | 		t.Errorf("err.Error()=%q, want %q", err.Error(), want)
115 | 	}
116 | }
117 | 
118 | func TestSimpleError_3(t *testing.T) {
119 | 	text := "123456789\nabcdefg"
120 | 	root := &Fail{
121 | 		Kids: []*Fail{
122 | 			&Fail{Pos: 10, Want: "A"},
123 | 			&Fail{Pos: 10, Want: "B"},
124 | 			&Fail{Pos: 10, Want: "C"},
125 | 		},
126 | 	}
127 | 	err := SimpleError(text, root)
128 | 	want := ":2.1: want A, B, or C; got 'abcdefg'"
129 | 	if err.Error() != want {
130 | 		t.Errorf("err.Error()=%q, want %q", err.Error(), want)
131 | 	}
132 | }
133 | 


--------------------------------------------------------------------------------
/peg/loc.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 The Peggy Authors
 2 | //
 3 | // Use of this source code is governed by a BSD-style
 4 | // license that can be found in the LICENSE file or at
 5 | // https://developers.google.com/open-source/licenses/bsd.
 6 | 
 7 | package peg
 8 | 
 9 | import "unicode/utf8"
10 | 
11 | // A Loc is a location in the input text.
12 | type Loc struct {
13 | 	Byte   int
14 | 	Rune   int
15 | 	Line   int
16 | 	Column int
17 | }
18 | 
19 | // Location returns the Loc at the corresponding byte offset in the text.
20 | func Location(text string, byte int) Loc {
21 | 	var loc Loc
22 | 	loc.Line = 1
23 | 	loc.Column = 1
24 | 	for byte > loc.Byte {
25 | 		r, w := utf8.DecodeRuneInString(text[loc.Byte:])
26 | 		loc.Byte += w
27 | 		loc.Rune++
28 | 		loc.Column++
29 | 		if r == '\n' {
30 | 			loc.Line++
31 | 			loc.Column = 1
32 | 		}
33 | 	}
34 | 	return loc
35 | }
36 | 


--------------------------------------------------------------------------------
/peg/loc_test.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2018 The Peggy Authors
 2 | //
 3 | // Use of this source code is governed by a BSD-style
 4 | // license that can be found in the LICENSE file or at
 5 | // https://developers.google.com/open-source/licenses/bsd.
 6 | 
 7 | package peg
 8 | 
 9 | import (
10 | 	"strings"
11 | 	"testing"
12 | )
13 | 
14 | func TestLocation(t *testing.T) {
15 | 	tests := []struct {
16 | 		in   string
17 | 		want Loc
18 | 	}{
19 | 		{
20 | 			in:   "*",
21 | 			want: Loc{Byte: 0, Rune: 0, Line: 1, Column: 1},
22 | 		},
23 | 		{
24 | 			in:   "abc*",
25 | 			want: Loc{Byte: 3, Rune: 3, Line: 1, Column: 4},
26 | 		},
27 | 		{
28 | 			in:   "ab\n*",
29 | 			want: Loc{Byte: 3, Rune: 3, Line: 2, Column: 1},
30 | 		},
31 | 		{
32 | 			in:   "ab\n*",
33 | 			want: Loc{Byte: 3, Rune: 3, Line: 2, Column: 1},
34 | 		},
35 | 		{
36 | 			in:   "ab\nabc\nxyz*",
37 | 			want: Loc{Byte: 10, Rune: 10, Line: 3, Column: 4},
38 | 		},
39 | 		{
40 | 			in:   "☺*",
41 | 			want: Loc{Byte: len("☺"), Rune: 1, Line: 1, Column: 2},
42 | 		},
43 | 		{
44 | 			in:   "☺☺☺*",
45 | 			want: Loc{Byte: 3 * len("☺"), Rune: 3, Line: 1, Column: 4},
46 | 		},
47 | 		{
48 | 			in:   "☺☺\n☺*",
49 | 			want: Loc{Byte: 3*len("☺") + 1, Rune: 4, Line: 2, Column: 2},
50 | 		},
51 | 		{
52 | 			in:   "☺☺\n☺*☹☹☹",
53 | 			want: Loc{Byte: 3*len("☺") + 1, Rune: 4, Line: 2, Column: 2},
54 | 		},
55 | 	}
56 | 	for _, test := range tests {
57 | 		b := strings.Index(test.in, "*")
58 | 		if b < 0 {
59 | 			panic("no *")
60 | 		}
61 | 		got := Location(test.in, b)
62 | 		if got != test.want {
63 | 			t.Errorf("Location(%q, %d)=%v, want %v", test.in, b, got, test.want)
64 | 		}
65 | 	}
66 | }
67 | 


--------------------------------------------------------------------------------
/peg/peg.go:
--------------------------------------------------------------------------------
 1 | // Copyright 2017 The Peggy Authors
 2 | //
 3 | // Use of this source code is governed by a BSD-style
 4 | // license that can be found in the LICENSE file or at
 5 | // https://developers.google.com/open-source/licenses/bsd.
 6 | 
 7 | package peg
 8 | 
 9 | import "unicode/utf8"
10 | 
11 | // A Node is a node in a Peggy parse tree.
12 | type Node struct {
13 | 	// Name is the name of the Rule associated with the node,
14 | 	// or the empty string for anonymous Nodes
15 | 	// that are not associated with any Rule.
16 | 	Name string
17 | 
18 | 	// Text is the input text of the Node's subtree.
19 | 	Text string
20 | 
21 | 	// Kids are the immediate successors of this node.
22 | 	Kids []*Node
23 | }
24 | 
25 | // A Fail is a node in a failed-parse tree.
26 | // A failed-parse tree contains all paths in a failed parse
27 | // that lead to the furthest error location in the input text.
28 | // There are two types of nodes: named and unnamed.
29 | // Named nodes represent grammar rules that failed to parse.
30 | // Unnamed nodes represent terminal expressions that failed to parse.
31 | type Fail struct {
32 | 	// Name is the name of the Rule associated with the node,
33 | 	// or the empty string if the Fail is a terminal expression failure.
34 | 	Name string
35 | 
36 | 	// Pos is the byte offset into the input of the Fail.
37 | 	Pos int
38 | 
39 | 	// Kids are the immediate succors of this Fail.
40 | 	// Kids is only non-nil for named Fail nodes.
41 | 	Kids []*Fail
42 | 
43 | 	// Want is a string describing what was expected at the error position.
44 | 	// It is only non-empty for unnamed Fail nodes.
45 | 	//
46 | 	// It can be of one of the following forms:
47 | 	// 	"…" indicating a failed literal match, where the text between the quotes is the expected literal using Go escaping.
48 | 	// 	. indicating a failed . match.
49 | 	// 	[…] indicating a failed character class match, where the text between the [ and ] is the character class.
50 | 	// 	!… where the text after ! is the string representation of a failed predicate subexpression.
51 | 	// 	&… where the text after & is the string representation of a failed predicate subexpression.
52 | 	// 	… the error-name of a rule.
53 | 	// 		For example, "int" in rule: Integer "int" <- [0-9].
54 | 	Want string
55 | }
56 | 
57 | // DecodeRuneInString is utf8.DecodeRuneInString.
58 | // It's here so parsers can just include peg, and not also need unicode/utf8.
59 | func DecodeRuneInString(s string) (rune, int) {
60 | 	return utf8.DecodeRuneInString(s)
61 | }
62 | 


--------------------------------------------------------------------------------
/peg/pretty.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2018 The Peggy Authors
  2 | //
  3 | // Use of this source code is governed by a BSD-style
  4 | // license that can be found in the LICENSE file or at
  5 | // https://developers.google.com/open-source/licenses/bsd.
  6 | 
  7 | package peg
  8 | 
  9 | import (
 10 | 	"bytes"
 11 | 	"io"
 12 | 	"strconv"
 13 | )
 14 | 
 15 | type nodeOrFail interface {
 16 | 	name() string
 17 | 	numKids() int
 18 | 	kid(int) nodeOrFail
 19 | 	text() string
 20 | }
 21 | 
 22 | func (f *Node) name() string         { return f.Name }
 23 | func (f *Node) numKids() int         { return len(f.Kids) }
 24 | func (f *Node) kid(i int) nodeOrFail { return f.Kids[i] }
 25 | func (f *Node) text() string         { return f.Text }
 26 | func (f *Fail) name() string         { return f.Name }
 27 | func (f *Fail) numKids() int         { return len(f.Kids) }
 28 | func (f *Fail) kid(i int) nodeOrFail { return f.Kids[i] }
 29 | func (f *Fail) text() string         { return f.Want }
 30 | 
 31 | // Pretty returns a human-readable string of a Node or Fail
 32 | // and the subtree beneath it.
 33 | // The output looks like:
 34 | // 	<n.Name>{
 35 | // 		<Pretty(n.Kids)[0])>,
 36 | // 		<Pretty(n.Kids[1])>,
 37 | // 		…
 38 | // 		<Pretty(n.Kids[n-1])>,
 39 | // 	}
 40 | func Pretty(n nodeOrFail) string {
 41 | 	b := bytes.NewBuffer(nil)
 42 | 	PrettyWrite(b, n)
 43 | 	return b.String()
 44 | }
 45 | 
 46 | // PrettyWrite is like Pretty but outputs to an io.Writer.
 47 | func PrettyWrite(w io.Writer, n nodeOrFail) error {
 48 | 	return prettyWrite(w, "", n)
 49 | }
 50 | 
 51 | func prettyWrite(w io.Writer, tab string, n nodeOrFail) error {
 52 | 	if _, err := io.WriteString(w, tab); err != nil {
 53 | 		return err
 54 | 	}
 55 | 	if n.numKids() == 0 {
 56 | 		if n.name() != "" {
 57 | 			if _, err := io.WriteString(w, n.name()+"("); err != nil {
 58 | 				return err
 59 | 			}
 60 | 		}
 61 | 		if _, err := io.WriteString(w, `"`+n.text()+`"`); err != nil {
 62 | 			return err
 63 | 		}
 64 | 		if n.name() != "" {
 65 | 			if _, err := io.WriteString(w, ")"); err != nil {
 66 | 				return err
 67 | 			}
 68 | 		}
 69 | 		return nil
 70 | 	}
 71 | 	if _, err := io.WriteString(w, n.name()); err != nil {
 72 | 		return err
 73 | 	}
 74 | 	if f, ok := n.(*Fail); ok {
 75 | 		pos := "[" + strconv.Itoa(f.Pos) + "]"
 76 | 		if _, err := io.WriteString(w, pos); err != nil {
 77 | 			return err
 78 | 		}
 79 | 	}
 80 | 	if n.numKids() == 0 {
 81 | 		if n.name() == "" {
 82 | 			if _, err := io.WriteString(w, "{}"); err != nil {
 83 | 				return err
 84 | 			}
 85 | 		}
 86 | 		return nil
 87 | 	}
 88 | 	if _, err := io.WriteString(w, "{"); err != nil {
 89 | 		return err
 90 | 	}
 91 | 	if n.numKids() == 1 && n.kid(0).numKids() == 0 {
 92 | 		if err := prettyWrite(w, "", n.kid(0)); err != nil {
 93 | 			return err
 94 | 		}
 95 | 		if _, err := io.WriteString(w, "}"); err != nil {
 96 | 			return err
 97 | 		}
 98 | 		return nil
 99 | 	}
100 | 	for i := 0; i < n.numKids(); i++ {
101 | 		if _, err := io.WriteString(w, "\n"); err != nil {
102 | 			return err
103 | 		}
104 | 		if err := prettyWrite(w, tab+"\t", n.kid(i)); err != nil {
105 | 			return err
106 | 		}
107 | 		if _, err := io.WriteString(w, ","); err != nil {
108 | 			return err
109 | 		}
110 | 	}
111 | 	if _, err := io.WriteString(w, "\n"+tab+"}"); err != nil {
112 | 		return err
113 | 	}
114 | 	return nil
115 | }
116 | 


--------------------------------------------------------------------------------
/rule.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The Peggy Authors
  2 | //
  3 | // Use of this source code is governed by a BSD-style
  4 | // license that can be found in the LICENSE file or at
  5 | // https://developers.google.com/open-source/licenses/bsd.
  6 | 
  7 | package main
  8 | 
  9 | import "fmt"
 10 | 
 11 | // Grammar is a PEG grammar.
 12 | type Grammar struct {
 13 | 	// Prelude is custom code added to the beginning of the generated output.
 14 | 	Prelude Text
 15 | 
 16 | 	// Rules are the rules of the grammar.
 17 | 	Rules []Rule
 18 | 
 19 | 	// CheckedRules are the rules successfully checked by the Check pass.
 20 | 	// It contains all non-template rules and all expanded templates.
 21 | 	CheckedRules []*Rule
 22 | }
 23 | 
 24 | // A Rule defines a production in a PEG grammar.
 25 | type Rule struct {
 26 | 	Name
 27 | 
 28 | 	// ErrorName, if non-nil, indicates that this is a named rule.
 29 | 	// Errors beneath a named rule are collapsed,
 30 | 	// reporting the error position as the start of the rule's parse
 31 | 	// with the "want" message set to ErrorName.
 32 | 	//
 33 | 	// If nil, the rule is unnamed and does not collapse errors.
 34 | 	ErrorName Text
 35 | 
 36 | 	// Expr is the PEG expression matched by the rule.
 37 | 	Expr Expr
 38 | 
 39 | 	// N is the rule's unique integer within its containing Grammar.
 40 | 	// It is a small integer that may be used as an array index.
 41 | 	N int
 42 | 
 43 | 	// typ is the type of the rule in the action pass.
 44 | 	// typ is nil before the checkLeft pass add non-nil after.
 45 | 	typ *string
 46 | 
 47 | 	// epsilon indicates whether the rule can match the empty string.
 48 | 	epsilon bool
 49 | 
 50 | 	// Labels is the set of all label names in the rule's expression.
 51 | 	Labels []*LabelExpr
 52 | }
 53 | 
 54 | func (r *Rule) Begin() Loc  { return r.Name.Begin() }
 55 | func (r *Rule) End() Loc    { return r.Expr.End() }
 56 | func (r Rule) Type() string { return *r.typ }
 57 | 
 58 | // A Name is the name of a rule template.
 59 | type Name struct {
 60 | 	// Name is the name of the template.
 61 | 	Name Text
 62 | 
 63 | 	// Args are the arguments or parameters of the template.
 64 | 	Args []Text
 65 | }
 66 | 
 67 | func (n Name) Begin() Loc { return n.Name.Begin() }
 68 | func (n Name) End() Loc {
 69 | 	if len(n.Args) == 0 {
 70 | 		return n.Name.End()
 71 | 	}
 72 | 	return n.Args[len(n.Args)-1].End()
 73 | }
 74 | 
 75 | // Text is a string of text located along with its location in the input.
 76 | type Text interface {
 77 | 	Located
 78 | 	// String is the text string.
 79 | 	String() string
 80 | }
 81 | 
 82 | // Loc identifies a location in a file by its line and column numbers.
 83 | type Loc struct {
 84 | 	// File is the name of the input file.
 85 | 	File string
 86 | 	// Line is line number of the location.
 87 | 	// The first line of input is line number 1.
 88 | 	Line int
 89 | 	// Col is the Loc's rune offset into the line.
 90 | 	// Col 0 is before the first rune on the line.
 91 | 	Col int
 92 | }
 93 | 
 94 | // Less returns whether the receiver is earlier in the input than the argument.
 95 | func (l Loc) Less(j Loc) bool {
 96 | 	if l.Line == j.Line {
 97 | 		return l.Col < j.Col
 98 | 	}
 99 | 	return l.Line < j.Line
100 | }
101 | 
102 | // PrettyPrint implements the pretty.PrettyPrinter interface,
103 | // returning a simpler, one-line string form of the Loc.
104 | func (l Loc) PrettyPrint() string { return fmt.Sprintf("Loc{%d, %d}", l.Line, l.Col) }
105 | 
106 | // Begin returns the Loc.
107 | func (l Loc) Begin() Loc { return l }
108 | 
109 | // End returns the Loc.
110 | func (l Loc) End() Loc { return l }
111 | 
112 | // Expr is PEG expression that matches a sequence of input runes.
113 | type Expr interface {
114 | 	Located
115 | 	String() string
116 | 
117 | 	// fullString returns the fully parenthesized string representation.
118 | 	fullString() string
119 | 
120 | 	// Walk calls a function for each expression in the tree.
121 | 	// Walk stops early if the function returns false.
122 | 	Walk(func(Expr) bool) bool
123 | 
124 | 	// substitute returns a clone of the expression
125 | 	// with all occurrences of identifiers that are keys of sub
126 | 	// substituted with the corresponding value.
127 | 	// substitute must not be called after Check,
128 | 	// because it does not update bookkeeping fields
129 | 	// that are set by the Check pass.
130 | 	substitute(sub map[string]string) Expr
131 | 
132 | 	// Type returns the type of the expression in the Action Tree.
133 | 	// This is the Go type associated with the expression.
134 | 	Type() string
135 | 
136 | 	// epsilon returns whether the rule can match the empty string.
137 | 	epsilon() bool
138 | 
139 | 	// CanFail returns whether the node can ever fail to parse.
140 | 	// Nodes like * or ?, for example, can never fail.
141 | 	// Parents of never-fail nodes needn't emit a failure branch,
142 | 	// as it will never be called.
143 | 	CanFail() bool
144 | 
145 | 	// checkLeft checks for left-recursion and sets rule types.
146 | 	checkLeft(rules map[string]*Rule, p path, errs *Errors)
147 | 
148 | 	// check checks for undefined identifiers,
149 | 	// linking defined identifiers to rules;
150 | 	// and checks for type mismatches.
151 | 	check(ctx ctx, valueUsed bool, errs *Errors)
152 | }
153 | 
154 | // A Choice is an ordered choice between expressions.
155 | type Choice struct{ Exprs []Expr }
156 | 
157 | func (e *Choice) Begin() Loc { return e.Exprs[0].Begin() }
158 | func (e *Choice) End() Loc   { return e.Exprs[len(e.Exprs)-1].End() }
159 | 
160 | func (e *Choice) Walk(f func(Expr) bool) bool {
161 | 	if !f(e) {
162 | 		return false
163 | 	}
164 | 	for _, kid := range e.Exprs {
165 | 		if !kid.Walk(f) {
166 | 			return false
167 | 		}
168 | 	}
169 | 	return true
170 | }
171 | 
172 | func (e *Choice) substitute(sub map[string]string) Expr {
173 | 	substitute := *e
174 | 	substitute.Exprs = make([]Expr, len(e.Exprs))
175 | 	for i, kid := range e.Exprs {
176 | 		substitute.Exprs[i] = kid.substitute(sub)
177 | 	}
178 | 	return &substitute
179 | }
180 | 
181 | // Type returns the type of a choice expression,
182 | // which is the type of it's first branch.
183 | // All other branches must have the same type;
184 | // this is verified during the Check pass.
185 | func (e *Choice) Type() string { return e.Exprs[0].Type() }
186 | 
187 | func (e *Choice) epsilon() bool {
188 | 	for _, e := range e.Exprs {
189 | 		if e.epsilon() {
190 | 			return true
191 | 		}
192 | 	}
193 | 	return false
194 | }
195 | 
196 | func (e *Choice) CanFail() bool {
197 | 	// A choice node can only fail if all of its branches can fail.
198 | 	// If there is a non-failing branch, it will always return accept.
199 | 	for _, s := range e.Exprs {
200 | 		if !s.CanFail() {
201 | 			return false
202 | 		}
203 | 	}
204 | 	return true
205 | }
206 | 
207 | // An Action is an action expression:
208 | // a subexpression and code to run if matched.
209 | type Action struct {
210 | 	Expr Expr
211 | 	// Code is the Go code to execute if the subexpression is matched.
212 | 	// The Begin and End locations of Code includes the { } delimiters,
213 | 	// but the string does not.
214 | 	//
215 | 	// TODO: specify the environment under which the code is run.
216 | 	Code Text
217 | 
218 | 	// ReturnType is the go type of the value returned by the action.
219 | 	ReturnType string
220 | 
221 | 	// Labels are the labels that are in scope of this action.
222 | 	Labels []*LabelExpr
223 | }
224 | 
225 | func (e *Action) Begin() Loc    { return e.Expr.Begin() }
226 | func (e *Action) End() Loc      { return e.Code.End() }
227 | func (e *Action) Type() string  { return e.ReturnType }
228 | func (e *Action) epsilon() bool { return e.Expr.epsilon() }
229 | func (e *Action) CanFail() bool { return e.Expr.CanFail() }
230 | 
231 | func (e *Action) Walk(f func(Expr) bool) bool {
232 | 	return f(e) && e.Expr.Walk(f)
233 | }
234 | 
235 | func (e *Action) substitute(sub map[string]string) Expr {
236 | 	substitute := *e
237 | 	substitute.Expr = e.Expr.substitute(sub)
238 | 	substitute.Labels = nil
239 | 	return &substitute
240 | }
241 | 
242 | // A Sequence is a sequence of expressions.
243 | type Sequence struct{ Exprs []Expr }
244 | 
245 | func (e *Sequence) Begin() Loc { return e.Exprs[0].Begin() }
246 | func (e *Sequence) End() Loc   { return e.Exprs[len(e.Exprs)-1].End() }
247 | 
248 | func (e *Sequence) Walk(f func(Expr) bool) bool {
249 | 	if !f(e) {
250 | 		return false
251 | 	}
252 | 	for _, kid := range e.Exprs {
253 | 		if !kid.Walk(f) {
254 | 			return false
255 | 		}
256 | 	}
257 | 	return true
258 | }
259 | 
260 | func (e *Sequence) substitute(sub map[string]string) Expr {
261 | 	substitute := *e
262 | 	substitute.Exprs = make([]Expr, len(e.Exprs))
263 | 	for i, kid := range e.Exprs {
264 | 		substitute.Exprs[i] = kid.substitute(sub)
265 | 	}
266 | 	return &substitute
267 | }
268 | 
269 | // Type returns the type of a sequence expression,
270 | // which is based on the type of its first sub-expression.
271 | // All other other sub-expressions must have the same type;
272 | // this is verified during the Check pass.
273 | //
274 | // If the first sub-expression is a string,
275 | // the type of the entire sequence is a string.
276 | // The value is the concatenation of all sub-expressions.
277 | //
278 | // Otherwise, the type is a slice of the first sub-expression type.
279 | // The value is the slice of all sub-expression values.
280 | func (e *Sequence) Type() string {
281 | 	t := e.Exprs[0].Type()
282 | 	switch t {
283 | 	case "":
284 | 		return ""
285 | 	case "string":
286 | 		return "string"
287 | 	default:
288 | 		return "[]" + t
289 | 	}
290 | }
291 | 
292 | func (e *Sequence) epsilon() bool {
293 | 	for _, e := range e.Exprs {
294 | 		if !e.epsilon() {
295 | 			return false
296 | 		}
297 | 	}
298 | 	return true
299 | }
300 | 
301 | func (e *Sequence) CanFail() bool {
302 | 	for _, s := range e.Exprs {
303 | 		if s.CanFail() {
304 | 			return true
305 | 		}
306 | 	}
307 | 	return false
308 | }
309 | 
310 | // A LabelExpr is a labeled subexpression.
311 | // The label can be used in actions to refer to the result of the subexperssion.
312 | type LabelExpr struct {
313 | 	// Label is the text of the label, not including the :.
314 | 	Label Text
315 | 	Expr  Expr
316 | 	// N is a small integer assigned to this label
317 | 	// that is unique within the containing Rule.
318 | 	// It is a small integer that may be used as an array index.
319 | 	N int
320 | }
321 | 
322 | func (e *LabelExpr) Begin() Loc    { return e.Label.Begin() }
323 | func (e *LabelExpr) End() Loc      { return e.Expr.End() }
324 | func (e *LabelExpr) Type() string  { return e.Expr.Type() }
325 | func (e *LabelExpr) epsilon() bool { return e.Expr.epsilon() }
326 | func (e *LabelExpr) CanFail() bool { return e.Expr.CanFail() }
327 | 
328 | func (e *LabelExpr) Walk(f func(Expr) bool) bool {
329 | 	return f(e) && e.Expr.Walk(f)
330 | }
331 | 
332 | func (e *LabelExpr) substitute(sub map[string]string) Expr {
333 | 	substitute := *e
334 | 	substitute.Expr = e.Expr.substitute(sub)
335 | 	return &substitute
336 | }
337 | 
338 | // A PredExpr is a non-consuming predicate expression:
339 | // If it succeeds (or fails, in the case of Neg),
340 | // return success and consume no input.
341 | // If it fails (or succeeds, in the case of Neg),
342 | // return failure and consume no input.
343 | // Predicate expressions allow a powerful form of lookahead.
344 | type PredExpr struct {
345 | 	Expr Expr
346 | 	// Neg indicates that the result of the predicate is negated.
347 | 	Neg bool
348 | 	// Loc is the location of the operator, & or !.
349 | 	Loc Loc
350 | }
351 | 
352 | func (e *PredExpr) Begin() Loc { return e.Loc }
353 | func (e *PredExpr) End() Loc   { return e.Expr.End() }
354 | 
355 | // Type returns the type of the predicate expression,
356 | // which is a string; the value is always the empty string.
357 | func (e *PredExpr) Type() string { return "string" }
358 | 
359 | func (e *PredExpr) epsilon() bool { return true }
360 | func (e *PredExpr) CanFail() bool { return e.Expr.CanFail() }
361 | 
362 | func (e *PredExpr) Walk(f func(Expr) bool) bool {
363 | 	return f(e) && e.Expr.Walk(f)
364 | }
365 | 
366 | func (e *PredExpr) substitute(sub map[string]string) Expr {
367 | 	substitute := *e
368 | 	substitute.Expr = e.Expr.substitute(sub)
369 | 	return &substitute
370 | }
371 | 
372 | // A RepExpr is a repetition expression, sepecifying whether the sub-expression
373 | // should be matched any number of times (*) or one or more times (+),
374 | type RepExpr struct {
375 | 	// Op is one of * or +.
376 | 	Op   rune
377 | 	Expr Expr
378 | 	// Loc is the location of the operator, * or  +.
379 | 	Loc Loc
380 | }
381 | 
382 | func (e *RepExpr) Begin() Loc { return e.Expr.Begin() }
383 | func (e *RepExpr) End() Loc   { return e.Loc }
384 | 
385 | // Type returns the type of the repetition expression,
386 | // which is based on the type of its sub-expression.
387 | //
388 | // If the sub-expression type is string,
389 | // the repetition expression type is a string.
390 | // The value is the concatenation of all matches,
391 | // or the empty string if nothing matches.
392 | //
393 | // Otherwise, the type is a slice of the sub-expression type.
394 | // The value contains an element for each match
395 | // of the sub-expression.
396 | func (e *RepExpr) Type() string {
397 | 	switch t := e.Expr.Type(); t {
398 | 	case "":
399 | 		return ""
400 | 	case "string":
401 | 		return t
402 | 	default:
403 | 		return "[]" + t
404 | 	}
405 | }
406 | 
407 | func (e *RepExpr) epsilon() bool { return e.Op == '*' }
408 | func (e *RepExpr) CanFail() bool { return e.Op == '+' && e.Expr.CanFail() }
409 | 
410 | func (e *RepExpr) Walk(f func(Expr) bool) bool {
411 | 	return f(e) && e.Expr.Walk(f)
412 | }
413 | 
414 | func (e *RepExpr) substitute(sub map[string]string) Expr {
415 | 	substitute := *e
416 | 	substitute.Expr = e.Expr.substitute(sub)
417 | 	return &substitute
418 | }
419 | 
420 | // An OptExpr is an optional expression, which may or may not be matched.
421 | type OptExpr struct {
422 | 	Expr Expr
423 | 	// Loc is the location of the ?.
424 | 	Loc Loc
425 | }
426 | 
427 | func (e *OptExpr) Begin() Loc { return e.Expr.Begin() }
428 | func (e *OptExpr) End() Loc   { return e.Loc }
429 | 
430 | // Type returns the type of the optional expression,
431 | // which is based on the type of its sub-expression.
432 | //
433 | // If the sub-expression type is string,
434 | // the optional expression type is a string.
435 | // The value is the value of the sub-expression if it matched,
436 | // or the empty string if it did not match.
437 | //
438 | // Otherwise, the type is a pointer to the type of the sub-expression.
439 | // The value is a pointer to the sub-expression's value if it matched,
440 | // or a nil pointer if it did not match.
441 | func (e *OptExpr) Type() string {
442 | 	switch t := e.Expr.Type(); {
443 | 	case t == "":
444 | 		return ""
445 | 	case t == "string":
446 | 		return t
447 | 	default:
448 | 		return "*" + e.Expr.Type()
449 | 	}
450 | }
451 | 
452 | func (e *OptExpr) epsilon() bool { return true }
453 | func (e *OptExpr) CanFail() bool { return false }
454 | 
455 | func (e *OptExpr) Walk(f func(Expr) bool) bool {
456 | 	return f(e) && e.Expr.Walk(f)
457 | }
458 | 
459 | func (e *OptExpr) substitute(sub map[string]string) Expr {
460 | 	substitute := *e
461 | 	substitute.Expr = e.Expr.substitute(sub)
462 | 	return &substitute
463 | }
464 | 
465 | // An Ident is an identifier referring to the name of anothe rule,
466 | // indicating to match that rule's expression.
467 | type Ident struct {
468 | 	Name
469 | 
470 | 	// rule is the rule referred to by this identifier.
471 | 	// It is set during check.
472 | 	rule *Rule
473 | }
474 | 
475 | func (e *Ident) Begin() Loc                  { return e.Name.Begin() }
476 | func (e *Ident) End() Loc                    { return e.Name.End() }
477 | func (e *Ident) CanFail() bool               { return true }
478 | func (e *Ident) Walk(f func(Expr) bool) bool { return f(e) }
479 | 
480 | // Type returns the type of the identifier expression,
481 | // which is the type of its corresponding rule.
482 | func (e *Ident) Type() string {
483 | 	if e.rule == nil {
484 | 		return ""
485 | 	}
486 | 	return e.rule.Type()
487 | }
488 | 
489 | func (e *Ident) epsilon() bool {
490 | 	if e.rule == nil {
491 | 		return false
492 | 	}
493 | 	return e.rule.epsilon
494 | }
495 | 
496 | func (e *Ident) substitute(sub map[string]string) Expr {
497 | 	substitute := *e
498 | 	if s, ok := sub[e.Name.String()]; ok {
499 | 		substitute.Name = Name{
500 | 			Name: text{
501 | 				str:   s,
502 | 				begin: e.Name.Begin(),
503 | 				end:   e.Name.End(),
504 | 			},
505 | 		}
506 | 	}
507 | 	substitute.Args = make([]Text, len(e.Args))
508 | 	for i, a := range e.Args {
509 | 		if s, ok := sub[a.String()]; !ok {
510 | 			substitute.Args[i] = e.Args[i]
511 | 		} else {
512 | 			substitute.Args[i] = text{
513 | 				str:   s,
514 | 				begin: a.Begin(),
515 | 				end:   a.End(),
516 | 			}
517 | 		}
518 | 	}
519 | 	return &substitute
520 | }
521 | 
522 | // A SubExpr simply wraps an expression.
523 | // It holds no extra information beyond tracking parentheses.
524 | // It's purpose is to allow easily re-inserting the parentheses
525 | // when stringifying an expression, whithout the need
526 | // to compute precedence inversion for each subexpression.
527 | type SubExpr struct {
528 | 	Expr
529 | 	// Open is the location of the open parenthesis.
530 | 	// Close is the location of the close parenthesis.
531 | 	Open, Close Loc
532 | }
533 | 
534 | func (e *SubExpr) Begin() Loc    { return e.Open }
535 | func (e *SubExpr) End() Loc      { return e.Close }
536 | func (e *SubExpr) Type() string  { return e.Expr.Type() }
537 | func (e *SubExpr) epsilon() bool { return e.Expr.epsilon() }
538 | func (e *SubExpr) CanFail() bool { return e.Expr.CanFail() }
539 | 
540 | func (e *SubExpr) Walk(f func(Expr) bool) bool {
541 | 	return f(e) && e.Expr.Walk(f)
542 | }
543 | 
544 | func (e *SubExpr) substitute(sub map[string]string) Expr {
545 | 	substitute := *e
546 | 	substitute.Expr = e.Expr.substitute(sub)
547 | 	return &substitute
548 | }
549 | 
550 | // A PredCode is a predicate code expression,
551 | // allowing predication using a Go boolean expression.
552 | //
553 | // TODO: Specify the conditions under which the expression is evaluated.
554 | type PredCode struct {
555 | 	// Code is a Go boolean expression.
556 | 	// The Begin and End locations of Code includes the { } delimiters,
557 | 	// but the string does not.
558 | 	Code Text
559 | 	// Neg indicates that the result of the predicate is negated.
560 | 	Neg bool
561 | 	// Loc is the location of the operator, & or !.
562 | 	Loc Loc
563 | 
564 | 	// Labels are the labels that are in scope of this action.
565 | 	Labels []*LabelExpr
566 | }
567 | 
568 | func (e *PredCode) Begin() Loc { return e.Loc }
569 | func (e *PredCode) End() Loc   { return e.Code.End() }
570 | 
571 | // Type returns the type of the predicate code expression,
572 | // which is a string; the value is always the empty string.
573 | func (e *PredCode) Type() string { return "string" }
574 | 
575 | func (e *PredCode) epsilon() bool               { return true }
576 | func (e *PredCode) CanFail() bool               { return true }
577 | func (e *PredCode) Walk(f func(Expr) bool) bool { return f(e) }
578 | 
579 | func (e *PredCode) substitute(sub map[string]string) Expr {
580 | 	substitute := *e
581 | 	substitute.Labels = nil
582 | 	return &substitute
583 | }
584 | 
585 | // A Literal matches a literal text string.
586 | type Literal struct {
587 | 	// Text is the text to match.
588 | 	// The Begin and End locations of Text includes the ' or " delimiters,
589 | 	// but the string does not.
590 | 	Text Text
591 | }
592 | 
593 | func (e *Literal) Begin() Loc                  { return e.Text.Begin() }
594 | func (e *Literal) End() Loc                    { return e.Text.End() }
595 | func (e *Literal) Type() string                { return "string" }
596 | func (e *Literal) epsilon() bool               { return false }
597 | func (e *Literal) CanFail() bool               { return true }
598 | func (e *Literal) Walk(f func(Expr) bool) bool { return f(e) }
599 | 
600 | func (e *Literal) substitute(sub map[string]string) Expr {
601 | 	substitute := *e
602 | 	return &substitute
603 | }
604 | 
605 | // A CharClass matches a single rune from a set of acceptable
606 | // (or unacceptable if Neg) runes.
607 | type CharClass struct {
608 | 	// Spans are rune spans accepted (or rejected) by the character class.
609 | 	// The 0th rune is always ≤ the 1st.
610 | 	// Single rune matches are a span of both the same rune.
611 | 	Spans [][2]rune
612 | 
613 | 	// Neg indicates that the input must not match any in the set.
614 | 	Neg bool
615 | 
616 | 	// Open and Close are the Loc of [ and ] respectively.
617 | 	Open, Close Loc
618 | }
619 | 
620 | func (e *CharClass) Begin() Loc                  { return e.Open }
621 | func (e *CharClass) End() Loc                    { return e.Close }
622 | func (e *CharClass) Type() string                { return "string" }
623 | func (e *CharClass) epsilon() bool               { return false }
624 | func (e *CharClass) CanFail() bool               { return true }
625 | func (e *CharClass) Walk(f func(Expr) bool) bool { return f(e) }
626 | 
627 | func (e *CharClass) substitute(sub map[string]string) Expr {
628 | 	substitute := *e
629 | 	return &substitute
630 | }
631 | 
632 | // Any matches any rune.
633 | type Any struct {
634 | 	// Loc is the location of the . symbol.
635 | 	Loc Loc
636 | }
637 | 
638 | func (e *Any) Begin() Loc                  { return e.Loc }
639 | func (e *Any) End() Loc                    { return Loc{Line: e.Loc.Line, Col: e.Loc.Col + 1} }
640 | func (e *Any) Type() string                { return "string" }
641 | func (e *Any) epsilon() bool               { return false }
642 | func (e *Any) CanFail() bool               { return true }
643 | func (e *Any) Walk(f func(Expr) bool) bool { return f(e) }
644 | 
645 | func (e *Any) substitute(sub map[string]string) Expr {
646 | 	substitute := *e
647 | 	return &substitute
648 | }
649 | 


--------------------------------------------------------------------------------
/string.go:
--------------------------------------------------------------------------------
  1 | // Copyright 2017 The Peggy Authors
  2 | //
  3 | // Use of this source code is governed by a BSD-style
  4 | // license that can be found in the LICENSE file or at
  5 | // https://developers.google.com/open-source/licenses/bsd.
  6 | 
  7 | package main
  8 | 
  9 | import (
 10 | 	"fmt"
 11 | 	"strconv"
 12 | 	"strings"
 13 | )
 14 | 
 15 | // String returns the string representation of the rules.
 16 | // The output contains no comments or whitespace,
 17 | // except for a single space, " ",
 18 | // separating sub-exprsessions of a sequence,
 19 | // and on either side of <-.
 20 | func String(rules []Rule) string {
 21 | 	var s string
 22 | 	for _, r := range rules {
 23 | 		if s != "" {
 24 | 			s += "\n"
 25 | 		}
 26 | 		s += r.String()
 27 | 	}
 28 | 	return s
 29 | }
 30 | 
 31 | // String returns the string representation of a rule.
 32 | // The output contains no comments or whitespace,
 33 | // except for a single space, " ",
 34 | // separating sub-exprsessions of a sequence,
 35 | // and on either side of <-.
 36 | func (r *Rule) String() string {
 37 | 	var name string
 38 | 	if r.ErrorName != nil {
 39 | 		name = " " + strconv.Quote(r.ErrorName.String())
 40 | 	}
 41 | 	return r.Name.String() + name + " <- " + r.Expr.String()
 42 | }
 43 | 
 44 | func (n Name) String() string {
 45 | 	if len(n.Args) == 0 {
 46 | 		return n.Name.String()
 47 | 	}
 48 | 	s := n.Name.String() + "<"
 49 | 	for i, a := range n.Args {
 50 | 		if i > 0 {
 51 | 			s += ", "
 52 | 		}
 53 | 		s += a.String()
 54 | 	}
 55 | 	return s + ">"
 56 | }
 57 | 
 58 | // Ident returns a Go identifier for the name.
 59 | func (n Name) Ident() string {
 60 | 	if len(n.Args) == 0 {
 61 | 		return n.Name.String()
 62 | 	}
 63 | 	s := n.Name.String() + "__"
 64 | 	for i, a := range n.Args {
 65 | 		if i > 0 {
 66 | 			s += "__"
 67 | 		}
 68 | 		s += a.String()
 69 | 	}
 70 | 	return s
 71 | }
 72 | 
 73 | func (e *Choice) String() string {
 74 | 	s := e.Exprs[0].String()
 75 | 	for _, sub := range e.Exprs[1:] {
 76 | 		s += "/" + sub.String()
 77 | 	}
 78 | 	return s
 79 | }
 80 | 
 81 | func (e *Action) String() string {
 82 | 	if *prettyPrint {
 83 | 		return e.Expr.String()
 84 | 	}
 85 | 	return e.Expr.String() + " {…}"
 86 | }
 87 | 
 88 | func (e *Sequence) String() string {
 89 | 	s := e.Exprs[0].String()
 90 | 	for _, sub := range e.Exprs[1:] {
 91 | 		s += " " + sub.String()
 92 | 	}
 93 | 	return s
 94 | }
 95 | 
 96 | func (e *LabelExpr) String() string {
 97 | 	if *prettyPrint {
 98 | 		return e.Expr.String()
 99 | 	}
100 | 	return e.Label.String() + ":" + e.Expr.String()
101 | }
102 | 
103 | func (e *PredExpr) String() string {
104 | 	s := "&"
105 | 	if e.Neg {
106 | 		s = "!"
107 | 	}
108 | 	return s + e.Expr.String()
109 | }
110 | 
111 | func (e *RepExpr) String() string {
112 | 	return e.Expr.String() + string([]rune{e.Op})
113 | }
114 | 
115 | func (e *OptExpr) String() string {
116 | 	return e.Expr.String() + "?"
117 | }
118 | 
119 | func (e *SubExpr) String() string {
120 | 	return "(" + e.Expr.String() + ")"
121 | }
122 | 
123 | func (e *Ident) String() string {
124 | 	return e.Name.String()
125 | }
126 | 
127 | func (e *PredCode) String() string {
128 | 	s := "&{"
129 | 	if e.Neg {
130 | 		s = "!{"
131 | 	}
132 | 	return s + "…}"
133 | }
134 | 
135 | func (e *Literal) String() string {
136 | 	s := strconv.QuoteToGraphic(e.Text.String())
137 | 	// Replace some combining characters with their escaped version.
138 | 	for _, sub := range []string{
139 | 		"\u0301",
140 | 		"\u0304",
141 | 		"\u030C",
142 | 		"\u0306",
143 | 		"\u0309",
144 | 		"\u0302",
145 | 		"\u0300",
146 | 		"\u0303",
147 | 	} {
148 | 		q := strconv.QuoteToASCII(sub)
149 | 		s = strings.Replace(s, sub, q[1:len(q)-1], -1)
150 | 	}
151 | 	return s
152 | }
153 | 
154 | func (e *CharClass) String() string {
155 | 	s := "["
156 | 	if e.Neg {
157 | 		s += "^"
158 | 	}
159 | 	for _, sp := range e.Spans {
160 | 		if sp[0] == sp[1] {
161 | 			s += charClassEsc(sp[0])
162 | 		} else {
163 | 			s += charClassEsc(sp[0]) + "-" + charClassEsc(sp[1])
164 | 		}
165 | 	}
166 | 	return s + "]"
167 | }
168 | 
169 | func charClassEsc(r rune) string {
170 | 	switch r {
171 | 	case '^':
172 | 		return `\^`
173 | 	case '-':
174 | 		return `\-`
175 | 	case ']':
176 | 		return `\]`
177 | 	}
178 | 	s := strconv.QuoteRuneToGraphic(r)
179 | 	return strings.TrimPrefix(strings.TrimSuffix(s, "'"), "'")
180 | }
181 | 
182 | func (e *Any) String() string { return "." }
183 | 
184 | // FullString returns the fully parenthesized string representation of the rules.
185 | // The output contains no comments or whitespace,
186 | // except for a single space, " ",
187 | // separating sub-exprsessions of a sequence,
188 | // and on either side of <-.
189 | func FullString(rules []Rule) string {
190 | 	var s string
191 | 	for _, r := range rules {
192 | 		if s != "" {
193 | 			s += "\n"
194 | 		}
195 | 
196 | 		var name string
197 | 		if r.ErrorName != nil {
198 | 			name = " " + strconv.Quote(r.ErrorName.String())
199 | 		}
200 | 		s += fmt.Sprintf("%s%s <- %s", r.Name, name, r.Expr.fullString())
201 | 	}
202 | 	return s
203 | }
204 | 
205 | func (e *Choice) fullString() string {
206 | 	s := strings.Repeat("(", len(e.Exprs)-1) + e.Exprs[0].fullString()
207 | 	for _, sub := range e.Exprs[1:] {
208 | 		s += "/" + sub.fullString() + ")"
209 | 	}
210 | 	return s
211 | }
212 | 
213 | func (e *Action) fullString() string {
214 | 	return "(" + e.Expr.fullString() + " {" + e.Code.String() + "})"
215 | }
216 | 
217 | func (e *Sequence) fullString() string {
218 | 	s := strings.Repeat("(", len(e.Exprs)-1) + e.Exprs[0].fullString()
219 | 	for _, sub := range e.Exprs[1:] {
220 | 		s += " " + sub.fullString() + ")"
221 | 	}
222 | 	return s
223 | }
224 | 
225 | func (e *LabelExpr) fullString() string {
226 | 	return fmt.Sprintf("(%s:%s)", e.Label.String(), e.Expr.fullString())
227 | }
228 | 
229 | func (e *PredExpr) fullString() string {
230 | 	if e.Neg {
231 | 		return fmt.Sprintf("(!%s)", e.Expr.fullString())
232 | 	}
233 | 	return fmt.Sprintf("(&%s)", e.Expr.fullString())
234 | }
235 | 
236 | func (e *RepExpr) fullString() string {
237 | 	return fmt.Sprintf("(%s%c)", e.Expr.fullString(), e.Op)
238 | }
239 | 
240 | func (e *OptExpr) fullString() string {
241 | 	return "(" + e.Expr.fullString() + "?)"
242 | }
243 | 
244 | func (e *Ident) fullString() string { return "(" + e.String() + ")" }
245 | 
246 | func (e *PredCode) fullString() string {
247 | 	s := "(&{"
248 | 	if e.Neg {
249 | 		s = "(!{"
250 | 	}
251 | 	return s + e.Code.String() + "})"
252 | }
253 | 
254 | func (e *Literal) fullString() string { return "(" + e.String() + ")" }
255 | 
256 | func (e *CharClass) fullString() string { return "(" + e.String() + ")" }
257 | 
258 | func (e *Any) fullString() string { return "(" + e.String() + ")" }
259 | 


--------------------------------------------------------------------------------