├── .travis.yml
├── AUTHORS
├── CONTRIBUTING.md
├── LICENSE
├── README.md
├── action_test.go
├── check.go
├── check_test.go
├── error.go
├── example
├── calc
│ ├── calc.go
│ └── calc.peggy
└── label_names
│ ├── label_names.go
│ └── label_names.peggy
├── gen.go
├── gen_test.go
├── go.go
├── go.mod
├── go.sum
├── gok.sh
├── grammar.go
├── grammar.y
├── lex.go
├── main.go
├── parse_test.go
├── peg
├── fail.go
├── fail_test.go
├── loc.go
├── loc_test.go
├── peg.go
└── pretty.go
├── rule.go
└── string.go
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: go
2 |
3 | go: 1.12
4 |
5 | notifications:
6 | email: false
7 |
8 | env:
9 | - PATH=$HOME/gopath/bin:$PATH
10 |
11 | install:
12 | - go get golang.org/x/tools/cmd/goyacc
13 | - go get golang.org/x/lint/golint
14 | - go get -t -v ./... && go build -v ./...
15 |
16 | script:
17 | - ./gok.sh
18 |
--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | # This is the list of Peggy authors for copyright purposes.
2 | #
3 | # This does not necessarily list everyone who has contributed code, since in
4 | # some cases, their employer may be the copyright holder. To see the full list
5 | # of contributors, see the revision history in source control.
6 | Google Inc.
7 |
--------------------------------------------------------------------------------
/CONTRIBUTING.md:
--------------------------------------------------------------------------------
1 | # How to Contribute
2 |
3 | We'd love to accept your patches and contributions to this project. There are
4 | just a few small guidelines you need to follow.
5 |
6 | ## Contributor License Agreement
7 |
8 | Contributions to this project must be accompanied by a Contributor License
9 | Agreement. You (or your employer) retain the copyright to your contribution,
10 | this simply gives us permission to use and redistribute your contributions as
11 | part of the project. Head over to to see
12 | your current agreements on file or to sign a new one.
13 |
14 | You generally only need to submit a CLA once, so if you've already submitted one
15 | (even if it was for a different project), you probably don't need to do it
16 | again.
17 |
18 | ## Code reviews
19 |
20 | All submissions, including submissions by project members, require review. We
21 | use GitHub pull requests for this purpose. Consult
22 | [GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
23 | information on using pull requests.
24 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Copyright 2017, The Peggy Authors
2 |
3 | Redistribution and use in source and binary forms, with or without
4 | modification, are permitted provided that the following conditions are
5 | met:
6 |
7 | * Redistributions of source code must retain the above copyright
8 | notice, this list of conditions and the following disclaimer.
9 |
10 | * Redistributions in binary form must reproduce the above
11 | copyright notice, this list of conditions and the following disclaimer
12 | in the documentation and/or other materials provided with the
13 | distribution.
14 |
15 | * Neither the name of Google Inc. nor the names of its
16 | contributors may be used to endorse or promote products derived from
17 | this software without specific prior written permission.
18 |
19 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20 | "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21 | LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22 | A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23 | OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25 | LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26 | DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27 | THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28 | (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://travis-ci.org/eaburns/peggy)
2 |
3 | # Introduction
4 |
5 | Peggy is a Parsing Expression Grammar
6 | ([PEG](https://en.wikipedia.org/wiki/Parsing_expression_grammar))
7 | parser generator.
8 |
9 | The generated parser is a
10 | [packrat parser](https://en.wikipedia.org/wiki/Parsing_expression_grammar#Implementing_parsers_from_parsing_expression_grammars).
11 | However, the implementation is somewhat novel (I believe).
12 |
13 | # Background
14 |
15 | Packrat parsers work by doing a recursive descent on the grammar rules,
16 | backtracking when a rule fails to accept.
17 | To prevent exponential backtracking, a memo table remembers
18 | the parse result for each rule, for each point in the input.
19 | This way when the backtracking encounters a subtree of grammar already tried
20 | it can compute the result in constant time by looking up the memo table
21 | instead of computing the parse again.
22 |
23 | Because the memo table, packrat parsers for PEG grammars
24 | parse in time linear in the size of the input
25 | and use memory linear in the size of the input too.
26 | (Note that other common parser generators,
27 | such as yacc for LALR(1) grammars
28 | are linear time in the size of the input
29 | and linear space in the _depth of the parse_,
30 | which can be smaller than the input size.)
31 |
32 | A common way to implement the memo table is to use a hash table.
33 | The key is a pair of the grammar rule plus the input position,
34 | and the value is the result (result of any parser actions)
35 | of parsing the keyed rule at the keyed input position
36 | and the number of runes consumed, or whether the parse failed.
37 |
38 | A problem that I've found with this approach is that
39 | for grammars that tend to do a lot of backtracking,
40 | a significant amount of time is spent modifying and accessing the memo table.
41 | Hash tables lookups and inserts are expected constant time,
42 | but in the face of much backtracking, the constant time can add up.
43 |
44 | In addition, hash tables tend to be implemented with linked structures
45 | which take up additional memory to hold pointers and allocation overhead.
46 | Finally, as they grow large, linked sturctures take more time to scan
47 | by a garbage collector.
48 |
49 | I originally implemented Peggy to parse the constructed language
50 | [Lojban](https://mw.lojban.org/papri/Lojban)
51 | (see [johaus](http://github.com/eaburns/johaus)).
52 | My initial hash table based implementation performed very poorly on large texts
53 | because of the issues described above:
54 | profiling showed a singificant amount of time spent
55 | on map accesses and garbage collection scanning,
56 | and memory use was too high to parse some texts (4kb)
57 | on my laptop (8GB ram).
58 |
59 | I noticed similar issues with the JavaScript- and Java-based PEG parsers for Lojban.
60 |
61 | Peggy takes a different approach that was tuned for this use-case.
62 |
63 | ## Peggy's approach
64 |
65 | Peggy computes the result of a parse in two passes instead of one.
66 | The first pass determines whether the grammar accepts,
67 | and builds a table that tracks for each rule tried at each position:
68 | whether the rule accepted and if so how much input was consumed,
69 | or if it failed, how much input was consumed to the deepest failure.
70 | These values can be stored in an array using only integers.
71 |
72 | If the first pass acceptes the input, a second pass can quickly follow the table
73 | to try only rules that accept and compute the result of the actions of the rule.
74 |
75 | If the first pass fails to accept, another pass can follow the table
76 | and compute a tree tracking which rules failed at the deepest point of the parse.
77 | These can be used to build precise syntax error messages.
78 |
79 | The advantage of Peggy's approach is that
80 | the first pass only performs a single allocation: the table — an array of integers.
81 | Accessing the table is just indexing into an array of intergers,
82 | which is cheaper than most hash table lookups.
83 | Since the array only contains integers and no pointers,
84 | it needn't be scanned by the garbage collector.
85 | And finally, whenever a hash table would be relatively densely populated,
86 | an array can be memory efficient.
87 |
88 | For the Lojban grammar, this made the difference
89 | between being able to parse full texts
90 | (a 4KB text that needed >8GB of memory was reduced to needing only 2GB,
91 | and multiple minutes were reduced to mere seconds).
92 |
93 | ## Disadvantages
94 |
95 | There are disadvantages to the Peggy approach:
96 |
97 | 1) The interface is not as simple to use.
98 | However, I hope that you will not find it too difficult.
99 | See the example in the next section for a fairly short wrapper
100 | that warps the Peggy calls into a single, more typical Go function call.
101 |
102 | 2) For grammars that do not rely as heavily on the memo table
103 | a hash table could be much more memory efficient.
104 |
105 | I would like to expand this list, so please send pull requests
106 | if you have other disadvantages of this approach that should be here.
107 |
108 | Now, let's see how to use it.
109 |
110 | # Input file format
111 |
112 | A Peggy input file is UTF-8 encoded.
113 |
114 | A Peggy grammar file consists of a _prelude_ followed by a set of _rules_.
115 | The prelude is valid Go code enclosed between { and }.
116 | This code is emitted at the beginning of the generated parser .go file.
117 | It should begin with a package statement then any imports used by the parser.
118 | Any other valid Go code is also permitted.
119 |
120 | After the prelude is a set of _rules_ that define the grammar.
121 | Each rule begins with an _identifier_ that is the name of the rule.
122 | After the name is an optional string giving the rule a human-readable name
123 | and marking it as a _leaf_ rule for error reporting (more below).
124 | After the optional string is the token <-.
125 | Next is the expression that defines the rule.
126 |
127 | **Example**
128 | ```
129 | A <- "Hello," _ ( "World!" / "世界" )
130 | _ <- ( p:. &{ isUnicodeSpace(p) } )+
131 | ```
132 |
133 | # Expressions
134 |
135 | Expressions define the grammar.
136 | The input to each expression is a sequence of runes.
137 | The expression either accepts or rejects the input.
138 | If the expression accepts, it consumes zero or more runes of input,
139 | and evaluates to a result (a Go value).
140 |
141 | The types of expressions, in order of precedence, are:
142 | * Choice
143 | * Action
144 | * Sequence
145 | * Label
146 | * Predicate
147 | * Repetition
148 | * Literal, Code Predicate, Identifier, and Subexpression
149 |
150 | ## Choice
151 |
152 | A choice is a sequence of expressions separated by `/`.
153 | Unlike context free grammars, choices in PEG are ordered.
154 |
155 | It is an error if the result types of the subexpressions are not all the same.
156 |
157 | **Accepts:**
158 | A choice accepts if any of its expressions accept.
159 |
160 | **Consumes:**
161 | A choice consumes the runes consumed by its first accepting subexpression
162 | from left-to-right.
163 |
164 | **Result:**
165 | The result of a choice has the type and value of its first accepting subexpression
166 | from left-to-right.
167 |
168 | **Example:**
169 | ```
170 | A / "Hello" / foo:Bar { return string(foo) }
171 | ```
172 |
173 | ## Sequences
174 |
175 | A sequence is two a sequence of expressions separated by whitespace.
176 |
177 | **Accepts:**
178 | A sequence accepts if each of its subexpressions accepts
179 | on the input remaining after each preceeding subexpression consumes.
180 |
181 | **Consumes:**
182 | The sequence consumes from the input
183 | the sum of the number of runes of all its subexpressions.
184 |
185 | ***Result:**
186 | It is an error if the type of the result of the first expression
187 | is not the same as the type of the result of the second.
188 |
189 | If the first expression is a `string`, the type of the sequence is `string`,
190 | and the result is the concatenation of the results of the expressions.
191 |
192 | If the first expression is any non-`string` type, T,
193 | the type of the result of the sequence is `[]T`,
194 | and the result itself is the slice from
195 | `append()`ing the results of the subexpressions.
196 |
197 | **Example:**
198 | ```
199 | "Hello," Space "World" Punctiation
200 | ```
201 |
202 | ## Labels
203 |
204 | A label is an identifier followed by : followed by an expression.
205 |
206 | Labels are used to create new identifiers used by actions and code predicates.
207 |
208 | The scope of a label is its branch in the nearest, containing choice expression,
209 | or in the entire rule if there is no choice expression.
210 |
211 | For example,
212 |
213 | R <- a:A / a:A / a:A / a:A
214 |
215 | All `a`s refer to different labels, as they are all scoped to different branches of the choice, `/`.
216 |
217 | Similarly, in this expression,
218 |
219 | R <- a:A / (a:A / a:A)
220 |
221 | all `a`s are different labels.
222 | However,
223 |
224 | R <- a:A / a:A a:A
225 |
226 | is an error, as `a` is re-defined in the right-hand branch of the choice, `/`.
227 |
228 | **Accepts:**
229 | A label accepts if its subexpression accepts.
230 |
231 | **Consumes:**
232 | A label consumes the runs of its subexpression.
233 |
234 | **Result:**
235 | The result type and value of a label are that of its subexpression.
236 |
237 | **Example:**
238 | ```
239 | hello:"Hello" "," Space world:( "World" / "世界" )
240 | ```
241 |
242 | ## Predicates
243 |
244 | A predicate is a & or ! operator followed by an expression.
245 |
246 | **Accepts:**
247 | A predicate with the operator & accepts if its subexpression accepts.
248 |
249 | A predicate with the operator ! accepts if its subexpression dose not accept.
250 |
251 | **Consumes:**
252 | Predicatse consume no runes.
253 |
254 | **Result:**
255 | The result of a predicate is the empty string.
256 |
257 | **Example:**
258 | ```
259 | !Keyword [a-ZA-Z_] [a-ZA-Z0-9_]*
260 | ```
261 |
262 | ## Repetition
263 |
264 | A repetition is an expression followed by either a *, +, or ? operator.
265 |
266 | **Accepts:**
267 | A repetition with an operator * or ? always accepts.
268 |
269 | A repetition with the operator + accepts if its subexpression accepts.
270 |
271 | **Consumes:**
272 | A repetition with an operator * or + consumes all matches of its subexpression.
273 |
274 | A repetition with the operator ? consumes at most one match of its subexpression.
275 |
276 | **Result:**
277 | If the type of the subexpression is `string`, the result of a repetition is `string`,
278 | and the value is the consumed runes.
279 |
280 | Otherwise, if the type of the subexpression is a type `T`:
281 | * if the operator is * or +, the type of the result is `[]T`
282 | and the value is a slice containing all `append`ed subexpression results.
283 | * if the operatior is ?, the type of the result is `*T`
284 | and the value is a pointer to the subexpression result if it accepted
285 | or `nil`.
286 |
287 | **Example:**
288 | ```
289 | [a-ZA-Z0-9_]* ":"?
290 | ```
291 |
292 | ## Literals
293 |
294 | Literals are String Literals, Character Classes, and Dot.
295 |
296 | ### String Literals
297 |
298 | String literals are lexically the same as
299 | [Go String Literals](https://golang.org/ref/spec#String_literals).
300 |
301 | **Accepts:**
302 | A string literal accepts if the next runes of input are exactly those of the string.
303 |
304 | **Consumes:**
305 | A stirng literal consumes the matching runes of input.
306 |
307 | **Result:**
308 | The result is the `string` of consumed runes.
309 |
310 | **Example:**
311 | ```
312 | "Hello\nWorld!"
313 | ```
314 |
315 | ### Character Classes
316 |
317 | A character class is a sequence of characters
318 | between [ and the next, unescaped occurrence of ].
319 | Escapes are treated as per strings.
320 |
321 | Character classes are much like that of common regular expression libraries.
322 |
323 | **Accepts:**
324 | A character class accepts if the next rune of input is within the class.
325 |
326 | If the first character after the opening [ is a ^,
327 | then the character class's acceptance is negated.
328 |
329 | A pair of characters surrounding on either side of a - define a _span_.
330 | the character class will accept any rune with a number (codepoint)
331 | between (and including) the two characters
332 | It is an error if the first is not smaller than the last.
333 |
334 | All other characters in the class are treated as a list of accepted runes.
335 |
336 | **Consumes:**
337 | A character class consumes one rune of input.
338 |
339 | **Result:**
340 | The result is the `string` of the consumed rune.
341 |
342 | **Example:**
343 | ```
344 | [a-ZA-Z0-9_]
345 | ```
346 |
347 | ### Dot
348 |
349 | The character . is an expression.
350 |
351 | **Accepts:**
352 | A dot expression accepts if the input is not empty and the next rune is valid.
353 |
354 | **Consumes:**
355 | A dot expression consumes a single rune.
356 |
357 | **Result:**
358 | The result is the `string` of the consumed rune.
359 |
360 | **Example:**
361 | ```
362 | .
363 | ```
364 |
365 | ## Code predicates
366 |
367 | A code predicate is an operator & or ! followed by a Go expression enclosed in { and }.
368 | The expression must result in a boolean value,
369 | and must be syntactically valid as the condition of an
370 | [if statement](https://golang.org/ref/spec#If_statements).
371 |
372 | Label expressions in scope of the code predicate define identifiers accessible in the Go code.
373 | The value of the identifier is a `string` of the input consumed by the labeled expression.
374 | If the labeled expression has yet to accept at the time the code predicate is evalutade, the string is empty.
375 |
376 | **Accepts:**
377 |
378 | A code predicate with the operator & accepts if the expression evaluates to `true`.
379 |
380 | A code predicate with the operator ! accepts if the expression evaluates to `false`.
381 |
382 | **Consumes:**
383 | A code predicate consumes no runes of input.
384 |
385 | **Result:**
386 | The result of a code predicate is the empty string.
387 |
388 | **Example:**
389 | ```
390 | p:. &{ isUnicodeSpace(p) }
391 | ```
392 |
393 | ## Identifiers
394 |
395 | Identifiers begin with any unicode letter or _
396 | followed by a sequence of zero or more letters, numbers, or _.
397 | Identifiers name a rule of the grammar.
398 | It is an error if the identifier is not the name of the rule of the grammar.
399 |
400 | **Accepts:**
401 | An identifier accepts if its named rule accepts.
402 |
403 | **Consumes:**
404 | An identifier consumes the runes of its named rule.
405 |
406 | **Result:**
407 | The result of an identifier has the type and value of that of its named rule.
408 |
409 | **Example:**
410 | ```
411 | HelloWorld <- Hello "," Space World
412 | Hello <- "Hello" / "こんいちは"
413 | World <- "World" / "世界"
414 | Space <- ( p:. &{ isUnicodeSpace(p) } )+
415 | ```
416 |
417 | ## Subexpressions
418 |
419 | A subexpression is an expression enclosed between ( and ).
420 | They are primarily used for grouping.
421 |
422 | **Accepts:**
423 | A subexpression accepts if its inner expression accepts.
424 |
425 | **Consumes:**
426 | A subexpression consumes the runes of its inner expression.
427 |
428 | **Result:**
429 | The result type and value of a subexpression are that of its inner expression.
430 |
431 | **Example:**
432 | ```
433 | "Hello, " ( "World" / "世界" )
434 | ```
435 |
436 | ## Actions
437 |
438 | Actions are an expression followed by Go code between { and }.
439 | The Go code must be valid as the
440 | [body of a function](https://golang.org/ref/spec#Block).
441 | The Go code must end in a
442 | [return statement](https://golang.org/ref/spec#Return_statements),
443 | and the returned value must be one of:
444 | * [a type conversion](https://golang.org/ref/spec#Conversions)
445 | * [a type assertion](https://golang.org/ref/spec#Type_assertions)
446 | * [a function literal](https://golang.org/ref/spec#Function_literals)
447 | * [a composite literal](https://golang.org/ref/spec#Composite_literals)
448 | * [an &-composite literal](https://golang.org/ref/spec#Address_operators)
449 | * [an int literal](https://golang.org/ref/spec#Integer_literals)
450 | * [a float literal](https://golang.org/ref/spec#Floating-point_literals)
451 | * [a rune literal](https://golang.org/ref/spec#Rune_literals)
452 | * [a string literal](https://golang.org/ref/spec#String_literals)
453 |
454 | Label expressions in scope of the action define identifiers accessible in the Go code.
455 | The value of the identifier is the value of the labeled expression if it accepted.
456 | If the labeled expression has yet to accept at the time the action is evaluated,
457 | the value is the zero value of the corresponding type.
458 |
459 | In addition there are several other special identifiers accessable to the code:
460 | * `parser` is a pointer to the Peggy `Parser`.
461 | * `start` is the byte offset in the input at which this expression first accepted.
462 | * `end` is the byte offset in the input just after this expression last accepted.
463 |
464 | **Accepts:**
465 | An action accepts if its subexpression accepts.
466 |
467 | **Consumes:**
468 | An action consumes the runes of its subexpression.
469 |
470 | **Result:**
471 | The result of an action has the type of the last return statement
472 | at the end of the block of Go code.
473 | The value is the value returned by the Go code.
474 |
475 | **Example:**
476 | ```
477 | hello:("Hello" / "こんいちは") ", " world:("World" / "世界") {
478 | return HelloWorld{
479 | Hello: hello,
480 | World: world,
481 | }
482 | }
483 | ```
484 |
485 | # Generated code
486 |
487 | The output file path is specified by the `-o` command-line option.
488 |
489 | All package-level definitions in the generated begin with a prefix, defaulting to `_`. This default makes the definitions unexported. The prefix can be overridden with the `-p` command-line option.
490 |
491 | The generated file has a `Parser` type passed to the various parser functions,
492 | and contains between 2 and 4 of functions for each rule defining
493 | several parser _passes_. The passes are:
494 | 1. the _accepts_ pass,
495 | 2. the _fail_ pass,
496 | 3. optionally the _action_ pass, and
497 | 4. optionally the _node_ pass.
498 |
499 | A typical flow to use a Peggy-generated parser is to:
500 | * Create a new instance of the `Parser` type on a given input.
501 | * Call the accepts function for the root-level grammar rule.
502 | ** If the rule did not accept, there was a syntax error:
503 | call the fail function of the rule to get an `*peg.Fail` tree,
504 | and pass that to `peg.SimpleError` to get an `error`
505 | describing the syntax error.
506 | ** If the rule accepted, call the action function of the rule
507 | to get the result of the parse (an AST, evaluation, whatever),
508 | or call the node pass to get a `*peg.Node` of the syntax tree.
509 |
510 | Here is an example:
511 |
512 | ```
513 | // Parse returns the AST generated by the grammar rule actions.
514 | func Parse(input string) (AstNode, error) {
515 | parser := _NewParser(input)
516 | if pos, perr := _RuleAccepts(parser, 0); pos < 0 {
517 | _, failTree := _RuleFail(parser, 0, perr)
518 | return nil, peg.SimpleError(input, failTree)
519 | }
520 | // Or, instead call _RuleNode(parser, 0)
521 | // and return a *peg.Node with the syntax tree.
522 | _, astRoot := _RuleAction(parser, 0)
523 | return astRoot, nil
524 | }
525 | ```
526 |
527 | There are a lot of steps.
528 | This allows advanced uses not described here ☺.
529 | (But see, for example,
530 | [this file](https://github.com/eaburns/johaus/blob/master/parser/error.go)
531 | that showcases how to use the `*peg.Fail` tree to construct more precise error messages).
532 |
533 | Now let's see what the generated code for each of the passes looks like in moredetail.
534 |
535 | ## The Parser type
536 |
537 | The `Parser` type is mostly intended to be treated as opaque.
538 | It maintains information about the parse to communicate between the multiple passes.
539 |
540 | The `Parser` type will have a field named `data` of type `interface{}`,
541 | which is ignored by the generated code.
542 | This field may be used in code predicates or actions to store auxiliary information.
543 | Such a use is considered advanced, and is not recommended
544 | unless you have a thorough understanding of the generated parser.
545 |
546 | ## Accepts pass
547 |
548 | The accepts pass generates a function for each rule of the grammer with a signature of the form:
549 | ```
550 | func Accepts(parser *Parser, start int) (deltaPos, deltaErr int)
551 | ```
552 |
553 | The function determines whether the rule accepts the input
554 | beginning from the byte-offset `start`.
555 | If it accepts `deltaPos` is a non-negative number of bytes accepted.
556 | If it does not accept `deltaErr` is the number of bytes from start
557 | until the last rune of input that could not be consumed.
558 |
559 | The primary purpose of the accept pass is to determine
560 | whether the language defined by the grammar accepts the input.
561 | The `Parser` maintains state from the accept pass that enables a subsequent
562 | fail, action, or node pass to compute its result without backtracking on rules.
563 |
564 | ## Fail pass
565 |
566 | The fail pass generates a function for each rule of the grammar twith a signature of the form:
567 | ```
568 | func Fail(parser *Parser, start, errPos int) (int, *peg.Fail)
569 | ```
570 |
571 | The functions of the fail pass assume that the `Parser` has already been used
572 | as the argument of a corresponding accept pass,
573 | and that the accept pass failed to accept.
574 |
575 | Each function returns the `*peg.Fail` tree of all attempted rules
576 | that failed to accept the input beginning from `start`,
577 | which failed no earlier than `errPos` bytes into the input.
578 |
579 | The description is somewhat advanced.
580 | Suffice it to say, this computes a data structured used by the `peg` package
581 | to compute a parse error string with the `peg.SimpleError` function.
582 | More advanced users can inspect the `*peg.Fail` tree
583 | to create more precise or informative parse errors.
584 |
585 | ## Action pass
586 |
587 | The action pass generates a function for each rule of the grammar twith a signature of the form:
588 | ```
589 | func Action(parser *Parser, start int) (int, *)
590 | ```
591 |
592 | The functions of the action pass assume that the `Parser` has already been used
593 | as the argument of a corresponding accept pass,
594 | and that the accept pass accepted the rule at this position.
595 |
596 | Each function returns the number of consumed runes
597 | and a pointer to a value of the rule expression's result type.
598 |
599 | ## Node pass
600 |
601 | The node pass generates a function for each rule of the grammar twith a signature of the form:
602 | ```
603 | func Node(parser *Parser, start int) (int, *peg.Node)
604 | ````
605 |
606 | The functions of the node pass assume that the `Parser` has already been used
607 | as the argument of a corresponding accept pass,
608 | and that the accept pass accepted the rule at this position.
609 |
610 | Each function returns the number of consumed runes
611 | and a *peg.Node that is the root of the syntax tree of the parse.
612 |
613 | (Peggy is not an official Google product.)
--------------------------------------------------------------------------------
/action_test.go:
--------------------------------------------------------------------------------
1 | package main
2 |
3 | import (
4 | "encoding/json"
5 | "io"
6 | "os"
7 | "os/exec"
8 | "reflect"
9 | "testing"
10 |
11 | "github.com/eaburns/pretty"
12 | )
13 |
14 | type actionTest struct {
15 | name string
16 | grammar string
17 | cases []actionTestCase
18 | }
19 |
20 | type actionTestCase struct {
21 | input string
22 | want interface{}
23 | }
24 |
25 | var actionTests = []actionTest{
26 | {
27 | name: "literal",
28 | grammar: `A <- "abc☺XYZ"`,
29 | cases: []actionTestCase{
30 | {"abc☺XYZ", "abc☺XYZ"},
31 | },
32 | },
33 | {
34 | name: "char class",
35 | grammar: `A <- [a-zA-Z0-9☺]`,
36 | cases: []actionTestCase{
37 | {"a", "a"},
38 | {"☺", "☺"},
39 | {"Z", "Z"},
40 | {"5", "5"},
41 | },
42 | },
43 | {
44 | name: "any char",
45 | grammar: `A <- .`,
46 | cases: []actionTestCase{
47 | {"a", "a"},
48 | {"☺", "☺"},
49 | {"Z", "Z"},
50 | {"5", "5"},
51 | },
52 | },
53 | {
54 | name: "star",
55 | grammar: `A <- "abc"*`,
56 | cases: []actionTestCase{
57 | {"", ""},
58 | {"abc", "abc"},
59 | {"abcabc", "abcabc"},
60 | {"abcabcabcabc", "abcabcabcabc"},
61 | },
62 | },
63 | {
64 | name: "plus",
65 | grammar: `A <- "abc"+`,
66 | cases: []actionTestCase{
67 | {"abc", "abc"},
68 | {"abcabc", "abcabc"},
69 | {"abcabcabcabc", "abcabcabcabc"},
70 | },
71 | },
72 | {
73 | name: "question",
74 | grammar: `A <- "abc"?`,
75 | cases: []actionTestCase{
76 | {"", ""},
77 | {"abc", "abc"},
78 | },
79 | },
80 | {
81 | name: "single type sequence",
82 | grammar: `A <- "a" "b" "c"`,
83 | cases: []actionTestCase{
84 | {"abc", "abc"},
85 | },
86 | },
87 | {
88 | name: "single type choice",
89 | grammar: `A <- "abc" / "☺☹" / .`,
90 | cases: []actionTestCase{
91 | {"abc", "abc"},
92 | {"☺☹", "☺☹"},
93 | {"z", "z"},
94 | },
95 | },
96 | {
97 | name: "multi-type choice",
98 | grammar: `A <- "abc" / "x" "y" "z"`,
99 | cases: []actionTestCase{
100 | {"abc", "abc"},
101 | {"xyz", "xyz"},
102 | },
103 | },
104 | {
105 | name: "choice branch fails after submatch",
106 | grammar: `A <- "xyz"? ( "a" "b" "c" / "a" "c" "b" )`,
107 | cases: []actionTestCase{
108 | {"acb", "acb"},
109 | {"xyzacb", "xyzacb"},
110 | },
111 | },
112 | {
113 | name: "multi-type sequence",
114 | grammar: `A <- ("a" "b" "c") "xyz"`,
115 | cases: []actionTestCase{
116 | {"abcxyz", "abcxyz"},
117 | },
118 | },
119 | {
120 | name: "identifier",
121 | grammar: `
122 | A <- Abc "xyz"
123 | Abc <- "a" "b" "c"`,
124 | cases: []actionTestCase{
125 | {"abcxyz", "abcxyz"},
126 | },
127 | },
128 | {
129 | name: "true predicate",
130 | grammar: `
131 | A <- "123"? &Abc "abc"
132 | Abc <- "a" "b" "c"`,
133 | cases: []actionTestCase{
134 | {"abc", "abc"},
135 | {"123abc", "123abc"},
136 | },
137 | },
138 | {
139 | name: "false predicate",
140 | grammar: `
141 | A <- "123"? !Abc "xyz"
142 | Abc <- "a" "b" "c"`,
143 | cases: []actionTestCase{
144 | {"xyz", "xyz"},
145 | {"123xyz", "123xyz"},
146 | },
147 | },
148 | {
149 | name: "true pred code",
150 | grammar: `
151 | A <- "abc"? &{ true } "xyz"`,
152 | cases: []actionTestCase{
153 | {"xyz", "xyz"},
154 | {"abcxyz", "abcxyz"},
155 | },
156 | },
157 | {
158 | name: "false pred code",
159 | grammar: `
160 | A <- "abc"? !{ false } "xyz"`,
161 | cases: []actionTestCase{
162 | {"xyz", "xyz"},
163 | {"abcxyz", "abcxyz"},
164 | },
165 | },
166 | {
167 | name: "subexpr",
168 | grammar: `A <- ("a" "b" "c")`,
169 | cases: []actionTestCase{
170 | {"abc", "abc"},
171 | },
172 | },
173 | {
174 | name: "label",
175 | grammar: `A <- l1:"a" l2:"b" l3:"c"`,
176 | cases: []actionTestCase{
177 | {"abc", "abc"},
178 | },
179 | },
180 | {
181 | name: "action",
182 | grammar: `
183 | A <- l1:. l2:. l3:. {
184 | return map[string]string{
185 | "1": l1,
186 | "2": l2,
187 | "3": l3,
188 | }
189 | }`,
190 | cases: []actionTestCase{
191 | {"abc", map[string]interface{}{
192 | "1": "a",
193 | "2": "b",
194 | "3": "c",
195 | }},
196 | {"xyz", map[string]interface{}{
197 | "1": "x",
198 | "2": "y",
199 | "3": "z",
200 | }},
201 | },
202 | },
203 | {
204 | name: "start and end",
205 | grammar: `
206 | A <- smiley? as v:bs cs { return [2]int(v) }
207 | smiley <- '☺'
208 | as <- 'a'*
209 | bs <- 'b'* { return [2]int{start, end} }
210 | cs <- 'c'*
211 | `,
212 | cases: []actionTestCase{
213 | {"", []interface{}{0.0, 0.0}},
214 | {"aaaccc", []interface{}{3.0, 3.0}},
215 | {"aaabccc", []interface{}{3.0, 4.0}},
216 | {"bbb", []interface{}{0.0, 3.0}},
217 | {"aaabbbccc", []interface{}{3.0, 6.0}},
218 | {"☺aaabbbccc", []interface{}{float64(len("☺") + 3), float64(len("☺") + 6)}},
219 | },
220 | },
221 | {
222 | name: "type inference",
223 | grammar: `
224 | A <- convert / ptr_convert / assert / func / struct / ptr_struct / map / array / slice / int / float / rune / string
225 | convert <- x:("convert" { return int32(1) }) { return string(fmt.Sprintf("%T", x)) }
226 | ptr_convert <- x:("ptr_convert" { return (*string)(nil) }) { return string(fmt.Sprintf("%T", x)) }
227 | assert <- x:("assert" { var c interface{} = peg.Node{}; return c.(peg.Node) }) { return string(fmt.Sprintf("%T", x)) }
228 | func <- x:("func" { return func(){} }) { return string(fmt.Sprintf("%T", x)) }
229 | struct <- x:("struct" { return peg.Node{} }) { return string(fmt.Sprintf("%T", x)) }
230 | ptr_struct <- x:("ptr_struct" { return &peg.Node{} }) { return string(fmt.Sprintf("%T", x)) }
231 | map <- x:("map" { return map[string]int{} }) { return string(fmt.Sprintf("%T", x)) }
232 | array <- x:("array" { return [5]int{} }) { return string(fmt.Sprintf("%T", x)) }
233 | slice <- x:("slice" { return []int{} }) { return string(fmt.Sprintf("%T", x)) }
234 | int <- x:("int" { return 0 }) { return string(fmt.Sprintf("%T", x)) }
235 | float <- x:("float" { return 0.0 }) { return string(fmt.Sprintf("%T", x)) }
236 | rune <- x:("rune" { return 'a' }) { return string(fmt.Sprintf("%T", x)) }
237 | string <- x:("string" { return "" }) { return string(fmt.Sprintf("%T", x)) }
238 | `,
239 | cases: []actionTestCase{
240 | {"convert", "int32"},
241 | {"ptr_convert", "*string"},
242 | {"assert", "peg.Node"},
243 | {"func", "func()"},
244 | {"struct", "peg.Node"},
245 | {"ptr_struct", "*peg.Node"},
246 | {"array", "[5]int"},
247 | {"slice", "[]int"},
248 | {"int", "int"},
249 | {"float", "float64"},
250 | {"rune", "int32"},
251 | {"string", "string"},
252 | },
253 | },
254 |
255 | // A simple calculator.
256 | // BUG: The test grammar has reverse the normal associativity — oops.
257 | {
258 | name: "calculator",
259 | grammar: `
260 | A <- Expr
261 | Expr <- l:Term op:(Plus / Minus) r:Expr { return int(op(l, r)) } / x:Term { return int(x) }
262 | Plus <- "+" { return func(a, b int) int { return a + b } }
263 | Minus <- "-" { return func(a, b int) int { return a - b } }
264 | Term <- l:Factor op:(Times / Divide) r:Term { return int(op(l, r)) } / x:Factor { return int(x) }
265 | Times <- "*" { return func(a, b int) int { return a * b } }
266 | Divide <- "/"{ return func(a, b int) int { return a / b } }
267 | Factor <- Number / '(' x:Expr ')' { return int(x) }
268 | Number <- x:[0-9]+ { var i int; for _, r := range x { i = i * 10 + (int(r) - '0') }; return int(i) }
269 | `,
270 | cases: []actionTestCase{
271 | {"1", 1.0},
272 | {"(5)", 5.0},
273 | {"2*3", 6.0},
274 | {"2+3", 5.0},
275 | {"10-3*2", 4.0},
276 | {"10-(6/2)*5", -5.0},
277 | },
278 | },
279 | }
280 |
281 | func TestActionGen(t *testing.T) {
282 | for _, test := range actionTests {
283 | test := test
284 | t.Run(test.name, func(t *testing.T) {
285 | t.Parallel()
286 | source := generateTest(actionPrelude, test.grammar)
287 | binary := build(source)
288 | defer rm(binary)
289 | go rm(source)
290 |
291 | for _, c := range test.cases {
292 | var got struct {
293 | T interface{}
294 | }
295 | parseJSON(binary, c.input, &got)
296 | if !reflect.DeepEqual(got.T, c.want) {
297 | t.Errorf("parse(%q)=%s (%#v), want %s",
298 | c.input, pretty.String(got.T), got.T,
299 | pretty.String(c.want))
300 | }
301 | }
302 |
303 | })
304 | }
305 | }
306 |
307 | // parseJSON parses an input using the given binary
308 | // and returns the position of either the parse or error
309 | // along with whether the parse succeeded.
310 | // The format for transmitting the result
311 | // from the parser binary to the test harness
312 | // is JSON.
313 | func parseJSON(binary, input string, result interface{}) {
314 | cmd := exec.Command(binary)
315 | cmd.Stderr = os.Stderr
316 | stdin, err := cmd.StdinPipe()
317 | if err != nil {
318 | panic(err.Error())
319 | }
320 | stdout, err := cmd.StdoutPipe()
321 | if err != nil {
322 | panic(err.Error())
323 | }
324 | if err := cmd.Start(); err != nil {
325 | panic(err.Error())
326 | }
327 | go func() {
328 | if _, err := io.WriteString(stdin, input); err != nil {
329 | panic(err.Error())
330 | }
331 | if err := stdin.Close(); err != nil {
332 | panic(err.Error())
333 | }
334 | }()
335 | if err := json.NewDecoder(stdout).Decode(result); err != nil {
336 | panic(err.Error())
337 | }
338 | if err := cmd.Wait(); err != nil {
339 | panic(err.Error())
340 | }
341 | }
342 |
343 | var actionPrelude = `{
344 | package main
345 |
346 | import (
347 | "encoding/json"
348 | "fmt"
349 | "io/ioutil"
350 | "os"
351 |
352 | "github.com/eaburns/peggy/peg"
353 | )
354 |
355 | func main() {
356 | data, err := ioutil.ReadAll(os.Stdin)
357 | if err != nil {
358 | os.Stderr.WriteString(err.Error() + "\n")
359 | os.Exit(1)
360 | }
361 | p, err := _NewParser(string(data))
362 | if err != nil {
363 | os.Stderr.WriteString(err.Error() + "\n")
364 | os.Exit(1)
365 | }
366 | if pos, _ := _AAccepts(p, 0); pos < 0 {
367 | os.Stderr.WriteString("parse failed")
368 | os.Exit(1)
369 | }
370 | var result struct {
371 | T interface{}
372 | }
373 | _, result.T = _AAction(p, 0)
374 | if err := json.NewEncoder(os.Stdout).Encode(&result); err != nil {
375 | // Hack — we need fmt imported for the type inference test.
376 | // However, if imported, it must be used.
377 | // Here we use it at least once.
378 | fmt.Fprintf(os.Stderr, err.Error() + "\n")
379 | os.Exit(1)
380 | }
381 | }
382 | }
383 | `
384 |
--------------------------------------------------------------------------------
/check.go:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package main
8 |
9 | import (
10 | "sort"
11 | )
12 |
13 | // Check does semantic analysis of the rules,
14 | // setting bookkeeping needed to later generate the parser,
15 | // returning any errors encountered in order of their begin location.
16 | func Check(grammar *Grammar) error {
17 | var errs Errors
18 | rules := expandTemplates(grammar.Rules, &errs)
19 | ruleMap := make(map[string]*Rule, len(rules))
20 | for i, r := range rules {
21 | r.N = i
22 | name := r.Name.String()
23 | if other := ruleMap[name]; other != nil {
24 | errs.add(r, "rule %s redefined", name)
25 | }
26 | ruleMap[name] = r
27 | }
28 |
29 | var p path
30 | for _, r := range rules {
31 | r.checkLeft(ruleMap, p, &errs)
32 | }
33 | for _, r := range rules {
34 | check(r, ruleMap, &errs)
35 | }
36 | if err := errs.ret(); err != nil {
37 | return err
38 | }
39 | grammar.CheckedRules = rules
40 | return nil
41 | }
42 |
43 | func expandTemplates(ruleDefs []Rule, errs *Errors) []*Rule {
44 | var expanded, todo []*Rule
45 | tmplNames := make(map[string]*Rule)
46 | for i := range ruleDefs {
47 | r := &ruleDefs[i]
48 | if len(r.Name.Args) > 0 {
49 | seenParams := make(map[string]bool)
50 | for _, param := range r.Name.Args {
51 | n := param.String()
52 | if seenParams[n] {
53 | errs.add(param, "parameter %s redefined", n)
54 | }
55 | seenParams[n] = true
56 | }
57 | tmplNames[r.Name.Name.String()] = r
58 | } else {
59 | expanded = append(expanded, r)
60 | todo = append(todo, r)
61 | }
62 | }
63 |
64 | seen := make(map[string]bool)
65 | for i := 0; i < len(todo); i++ {
66 | for _, invok := range invokedTemplates(todo[i]) {
67 | if seen[invok.Name.String()] {
68 | continue
69 | }
70 | seen[invok.Name.String()] = true
71 | tmpl := tmplNames[invok.Name.Name.String()]
72 | if tmpl == nil {
73 | continue // undefined template, error reported elsewhere
74 | }
75 | exp := expand1(tmpl, invok, errs)
76 | if exp == nil {
77 | continue // error expanding, error reported elsewhere
78 | }
79 | todo = append(todo, exp)
80 | expanded = append(expanded, exp)
81 | }
82 | }
83 | return expanded
84 | }
85 |
86 | func expand1(tmpl *Rule, invok *Ident, errs *Errors) *Rule {
87 | if len(invok.Args) != len(tmpl.Args) {
88 | errs.add(invok, "template %s argument count mismatch: got %d, expected %d",
89 | tmpl.Name, len(invok.Args), len(tmpl.Args))
90 | return nil
91 | }
92 | copy := *tmpl
93 | sub := make(map[string]string, len(tmpl.Args))
94 | for i, arg := range invok.Args {
95 | sub[tmpl.Args[i].String()] = arg.String()
96 | }
97 | copy.Args = invok.Args
98 | copy.Expr = tmpl.Expr.substitute(sub)
99 | return ©
100 | }
101 |
102 | func invokedTemplates(r *Rule) []*Ident {
103 | var tmpls []*Ident
104 | r.Expr.Walk(func(e Expr) bool {
105 | if id, ok := e.(*Ident); ok {
106 | if len(id.Args) > 0 {
107 | tmpls = append(tmpls, id)
108 | }
109 | }
110 | return true
111 | })
112 | return tmpls
113 | }
114 |
115 | type path struct {
116 | stack []*Rule
117 | seen map[*Rule]bool
118 | }
119 |
120 | func (p *path) push(r *Rule) bool {
121 | if p.seen == nil {
122 | p.seen = make(map[*Rule]bool)
123 | }
124 | if p.seen[r] {
125 | return false
126 | }
127 | p.stack = append(p.stack, r)
128 | p.seen[r] = true
129 | return true
130 | }
131 |
132 | func (p *path) pop() {
133 | p.stack = p.stack[:len(p.stack)]
134 | }
135 |
136 | func (p *path) cycle(r *Rule) []*Rule {
137 | for i := len(p.stack) - 1; i >= 0; i-- {
138 | if p.stack[i] == r {
139 | return append(p.stack[i:], r)
140 | }
141 | }
142 | panic("no cycle")
143 | }
144 |
145 | func cycleString(rules []*Rule) string {
146 | var s string
147 | for _, r := range rules {
148 | if s != "" {
149 | s += ", "
150 | }
151 | s += r.Name.String()
152 | }
153 | return s
154 | }
155 |
156 | func (r *Rule) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
157 | if r.typ != nil {
158 | return
159 | }
160 | if !p.push(r) {
161 | cycle := p.cycle(r)
162 | errs.add(cycle[0], "left-recursion: %s", cycleString(cycle))
163 | for _, r := range cycle {
164 | r.typ = new(string)
165 | }
166 | return
167 | }
168 | r.Expr.checkLeft(rules, p, errs)
169 | t := r.Expr.Type()
170 | r.typ = &t
171 | r.epsilon = r.Expr.epsilon()
172 | p.pop()
173 | }
174 |
175 | func (e *Choice) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
176 | for _, sub := range e.Exprs {
177 | sub.checkLeft(rules, p, errs)
178 | }
179 | }
180 |
181 | func (e *Action) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
182 | e.Expr.checkLeft(rules, p, errs)
183 | }
184 |
185 | func (e *Sequence) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
186 | for _, sub := range e.Exprs {
187 | sub.checkLeft(rules, p, errs)
188 | if !sub.epsilon() {
189 | break
190 | }
191 | }
192 | }
193 |
194 | func (e *LabelExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
195 | e.Expr.checkLeft(rules, p, errs)
196 | }
197 |
198 | func (e *PredExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
199 | e.Expr.checkLeft(rules, p, errs)
200 | }
201 |
202 | func (e *RepExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
203 | e.Expr.checkLeft(rules, p, errs)
204 | }
205 |
206 | func (e *OptExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
207 | e.Expr.checkLeft(rules, p, errs)
208 | }
209 |
210 | func (e *Ident) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
211 | if e.rule = rules[e.Name.String()]; e.rule != nil {
212 | e.rule.checkLeft(rules, p, errs)
213 | }
214 | }
215 |
216 | func (e *SubExpr) checkLeft(rules map[string]*Rule, p path, errs *Errors) {
217 | e.Expr.checkLeft(rules, p, errs)
218 | }
219 |
220 | func (e *PredCode) checkLeft(rules map[string]*Rule, p path, errs *Errors) {}
221 |
222 | func (e *Literal) checkLeft(rules map[string]*Rule, p path, errs *Errors) {}
223 |
224 | func (e *CharClass) checkLeft(rules map[string]*Rule, p path, errs *Errors) {}
225 |
226 | func (e *Any) checkLeft(rules map[string]*Rule, p path, errs *Errors) {}
227 |
228 | type ctx struct {
229 | rules map[string]*Rule
230 | allLabels *[]*LabelExpr
231 | curLabels map[string]*LabelExpr
232 | }
233 |
234 | func check(rule *Rule, rules map[string]*Rule, errs *Errors) {
235 | ctx := ctx{
236 | rules: rules,
237 | allLabels: &rule.Labels,
238 | curLabels: make(map[string]*LabelExpr),
239 | }
240 | rule.Expr.check(ctx, true, errs)
241 | sort.Slice(rule.Labels, func(i, j int) bool {
242 | return rule.Labels[i].N < rule.Labels[j].N
243 | })
244 | }
245 |
246 | func (e *Choice) check(ctx ctx, valueUsed bool, errs *Errors) {
247 | for _, sub := range e.Exprs {
248 | subCtx := ctx
249 | subCtx.curLabels = make(map[string]*LabelExpr)
250 | for n, l := range ctx.curLabels {
251 | subCtx.curLabels[n] = l
252 | }
253 | sub.check(subCtx, valueUsed, errs)
254 | }
255 | t := e.Exprs[0].Type()
256 | for _, sub := range e.Exprs {
257 | if got := sub.Type(); *genActions && valueUsed && got != t && got != "" && t != "" {
258 | errs.add(sub, "type mismatch: got %s, expected %s", got, t)
259 | }
260 | }
261 | }
262 |
263 | func (e *Action) check(ctx ctx, valueUsed bool, errs *Errors) {
264 | e.Expr.check(ctx, false, errs)
265 | for _, l := range ctx.curLabels {
266 | e.Labels = append(e.Labels, l)
267 | }
268 | sort.Slice(e.Labels, func(i, j int) bool {
269 | return e.Labels[i].Label.String() < e.Labels[j].Label.String()
270 | })
271 | }
272 |
273 | // BUG: figure out what to do about sequence types.
274 | func (e *Sequence) check(ctx ctx, valueUsed bool, errs *Errors) {
275 | for _, sub := range e.Exprs {
276 | sub.check(ctx, valueUsed, errs)
277 | }
278 | t := e.Exprs[0].Type()
279 | for _, sub := range e.Exprs {
280 | if got := sub.Type(); *genActions && valueUsed && got != t && got != "" && t != "" {
281 | errs.add(sub, "type mismatch: got %s, expected %s", got, t)
282 | }
283 | }
284 | }
285 |
286 | func (e *LabelExpr) check(ctx ctx, valueUsed bool, errs *Errors) {
287 | e.Expr.check(ctx, true, errs)
288 | if _, ok := ctx.curLabels[e.Label.String()]; ok {
289 | errs.add(e.Label, "label %s redefined", e.Label.String())
290 | }
291 | e.N = len(*ctx.allLabels)
292 | *ctx.allLabels = append(*ctx.allLabels, e)
293 | ctx.curLabels[e.Label.String()] = e
294 | }
295 |
296 | func (e *PredExpr) check(ctx ctx, valueUsed bool, errs *Errors) {
297 | e.Expr.check(ctx, false, errs)
298 | }
299 |
300 | func (e *RepExpr) check(ctx ctx, valueUsed bool, errs *Errors) {
301 | e.Expr.check(ctx, valueUsed, errs)
302 | }
303 |
304 | func (e *OptExpr) check(ctx ctx, valueUsed bool, errs *Errors) {
305 | e.Expr.check(ctx, valueUsed, errs)
306 | }
307 |
308 | func (e *SubExpr) check(ctx ctx, valueUsed bool, errs *Errors) {
309 | e.Expr.check(ctx, valueUsed, errs)
310 | }
311 |
312 | func (e *Ident) check(ctx ctx, _ bool, errs *Errors) {
313 | r, ok := ctx.rules[e.Name.String()]
314 | if !ok {
315 | errs.add(e, "rule %s undefined", e.Name.String())
316 | } else {
317 | e.rule = r
318 | }
319 | }
320 |
321 | func (e *PredCode) check(ctx ctx, _ bool, _ *Errors) {
322 | for _, l := range ctx.curLabels {
323 | e.Labels = append(e.Labels, l)
324 | }
325 | sort.Slice(e.Labels, func(i, j int) bool {
326 | return e.Labels[i].Label.String() < e.Labels[j].Label.String()
327 | })
328 | }
329 |
330 | func (e *Literal) check(ctx, bool, *Errors) {}
331 |
332 | func (e *CharClass) check(ctx, bool, *Errors) {}
333 |
334 | func (e *Any) check(ctx, bool, *Errors) {}
335 |
--------------------------------------------------------------------------------
/check_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package main
8 |
9 | import (
10 | "regexp"
11 | "strings"
12 | "testing"
13 | )
14 |
15 | type checkTest struct {
16 | name string
17 | in string
18 | err string
19 | skipActions bool
20 | }
21 |
22 | func (test checkTest) Run(t *testing.T) {
23 | in := strings.NewReader(test.in)
24 | g, err := Parse(in, "test.file")
25 | if err != nil {
26 | t.Errorf("Parse(%q, _)=_, %v, want _,nil", test.in, err)
27 | return
28 | }
29 | err = Check(g)
30 | if test.err == "" {
31 | if err != nil {
32 | t.Errorf("Check(%q)=%v, want nil", test.in, err)
33 | }
34 | return
35 | }
36 | re := regexp.MustCompile(test.err)
37 | if err == nil || !re.MatchString(err.Error()) {
38 | var e string
39 | if err != nil {
40 | e = err.Error()
41 | }
42 | t.Errorf("Check(%q)=%v, but expected to match %q",
43 | test.in, e, test.err)
44 | return
45 | }
46 | }
47 |
48 | func TestCheck(t *testing.T) {
49 | tests := []checkTest{
50 | {
51 | name: "empty OK",
52 | in: "",
53 | err: "",
54 | },
55 | {
56 | name: "various OK",
57 | in: `A <- (G/B C)*
58 | B <- &{pred}*
59 | C <- !{pred}* { return string(act) }
60 | D <- .* !B
61 | E <- C*
62 | F <- "cde"*
63 | G <- [fgh]*`,
64 | err: "",
65 | },
66 | {
67 | name: "redefined rule",
68 | in: "A <- [x]\nA <- [y]",
69 | err: "^test.file:2.1,2.9: rule A redefined",
70 | },
71 | {
72 | name: "undefined rule",
73 | in: "A <- B",
74 | err: "^test.file:1.6,1.7: rule B undefined",
75 | },
76 | {
77 | name: "redefined label",
78 | in: "A <- a:[a] a:[a]",
79 | err: "^test.file:1.12,1.13: label a redefined",
80 | },
81 | {
82 | name: "non-redefined label with same name in different branch",
83 | in: "A <- a:[a] / (a:[a] / a:[a]) / a:[a]",
84 | err: "",
85 | },
86 | {
87 | name: "redefined label in same choice branch",
88 | in: "A <- a:[a] / a:[a] a:[a]",
89 | err: "^test.file:1.20,1.21: label a redefined",
90 | },
91 | {
92 | name: "choice first error",
93 | in: "A <- Undefined / A",
94 | err: ".+",
95 | },
96 | {
97 | name: "choice second error",
98 | in: "A <- B / Undefined\nB <- [x]",
99 | err: ".+",
100 | },
101 | {
102 | name: "seq first error",
103 | in: "A <- Undefined A",
104 | err: ".+",
105 | },
106 | {
107 | name: "sequence second error",
108 | in: "A <- B Undefined\nB <- [x]",
109 | err: ".+",
110 | },
111 | {
112 | name: "template parameter OK",
113 | in: `A <- x
114 | B <- A
115 | C <- "c"`,
116 | err: "",
117 | },
118 | {
119 | name: "template parameter redef",
120 | in: `A <- x
121 | B <- A
122 | C <- "c"`,
123 | err: "^test.file:1.6,1.7: parameter x redefined$",
124 | },
125 | {
126 | name: "template arg count mismatch",
127 | in: `A <- x
128 | B <- A
129 | C <- "c"`,
130 | err: "test.file:2.10,2.16: template A argument count mismatch: got 2, expected 1",
131 | },
132 | {
133 | name: "multiple errors",
134 | in: "A <- U1 U2\nA <- u:[x] u:[x]",
135 | err: "test.file:1.6,1.8: rule U1 undefined\n" +
136 | "test.file:1.9,1.11: rule U2 undefined\n" +
137 | "test.file:2.1,2.17: rule A redefined\n" +
138 | "test.file:2.12,2.13: label u redefined",
139 | },
140 | {
141 | name: "right recursion is OK",
142 | in: `A <- "b" B
143 | B <- A`,
144 | },
145 | {
146 | name: "direct left-recursion",
147 | in: `A <- A`,
148 | err: "^test.file:1.1,1.7: left-recursion: A, A$",
149 | },
150 | {
151 | name: "indirect left-recursion",
152 | in: `A <- C0
153 | C0 <- C1
154 | C1 <- C2
155 | C2 <- C0`,
156 | err: "^test.file:2.5,2.13: left-recursion: C0, C1, C2, C0$",
157 | },
158 | {
159 | name: "choice left-recursion",
160 | in: `A <- B / C / D
161 | B <- "b"
162 | C <- "c"
163 | D <- A`,
164 | err: "^test.file:1.1,1.15: left-recursion: A, D, A$",
165 | },
166 | {
167 | name: "sequence left-recursion",
168 | in: `A <- !B C D E
169 | B <- "b"
170 | C <- !"c"
171 | D <- C # non-consuming through C
172 | E <- A`,
173 | err: "^test.file:1.1,1.14: left-recursion: A, E, A$",
174 | },
175 | {
176 | name: "various expr left-recursion",
177 | in: `Choice <- "a" / Sequence
178 | Sequence <- SubExpr "b"
179 | SubExpr <- ( PredExpr )
180 | PredExpr <- &RepExpr
181 | RepExpr <- OptExpr+
182 | OptExpr <- Action?
183 | Action <- Choice { return "" }`,
184 | err: "^test.file:1.1,1.25: left-recursion: Choice, Sequence, SubExpr, PredExpr, RepExpr, OptExpr, Action, Choice$",
185 | },
186 | {
187 | name: "templates calling templates",
188 | in: `A <- B
189 | B <- C
190 | C <- "a" D C?
191 | D <- X
192 | X <- "x"`,
193 | err: "", // this should work fine.
194 | },
195 | {
196 | name: "template left-recursion",
197 | in: `A <- C0
198 | C0 <- C1
199 | C1 <- C2
200 | C2 <- X`,
201 | err: "^test.file:2.5,2.13: left-recursion: C0, C1, C2, C0$",
202 | },
203 | {
204 | name: "multiple left-recursion errors",
205 | in: `A <- A
206 | B <- C
207 | C <- B`,
208 | err: "^test.file:1.1,1.7: left-recursion: A, A\n" +
209 | "test.file:2.5,2.11: left-recursion: B, C, B$",
210 | },
211 | {
212 | name: "right-recursion is OK",
213 | in: `A <- B C A?
214 | B <- "b" B / C
215 | C <- "c"`,
216 | err: "",
217 | },
218 |
219 | {
220 | name: "choice type mismatch",
221 | in: `A <- "a" / "b" { return 5 }`,
222 | err: "^test.file:1.12,1.28: type mismatch: got int, expected string",
223 | },
224 | {
225 | name: "sequence type mismatch",
226 | in: `A <- "a" ( "b" { return 5 } )`,
227 | err: "^test.file:1.10,1.29: type mismatch: got int, expected string",
228 | },
229 | {
230 | name: "unused choice, no mismatch",
231 | in: `A <- ( "a" / "b" { return 5 } ) { return 6 }`,
232 | err: "",
233 | },
234 | {
235 | name: "unused sequence, no mismatch",
236 | in: `A <- "a" ( "b" { return 5 } ) { return 6 }`,
237 | err: "",
238 | },
239 | {
240 | name: "&-pred subexpression is unused",
241 | in: `A <- "a" !( "b" { return 5 } )`,
242 | err: "",
243 | },
244 | {
245 | name: "!-pred subexpression is unused",
246 | in: `A <- "a" !( "b" { return 5 } )`,
247 | err: "",
248 | },
249 | {
250 | name: "multiple type errors",
251 | in: `A <- B ( "c" { return 0 } )
252 | B <- "b" / ( "c" { return 0 } )`,
253 | err: "^test.file:1.8,1.27: type mismatch: got int, expected string\n" +
254 | "test.file:2.16,2.35: type mismatch: got int, expected string$",
255 | },
256 | }
257 | for _, test := range tests {
258 | test := test
259 | t.Run(test.name, func(t *testing.T) {
260 | t.Parallel()
261 | test.Run(t)
262 | })
263 | }
264 | }
265 |
266 | func TestGenActionsFalse(t *testing.T) {
267 | // This set of tests cannot be run in parallel.
268 | *genActions = false
269 | defer func() { *genActions = true }()
270 |
271 | tests := []checkTest{
272 | {
273 | name: "choice type mismatch: no error",
274 | in: `A <- "a" / "b" { return 5 }`,
275 | },
276 | {
277 | name: "sequence type mismatch: no error",
278 | in: `A <- "a" ( "b" { return 5 } )`,
279 | },
280 | }
281 | for _, test := range tests {
282 | t.Run(test.name, test.Run)
283 | }
284 | }
285 |
--------------------------------------------------------------------------------
/error.go:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package main
8 |
9 | import (
10 | "fmt"
11 | "sort"
12 | )
13 |
14 | // Located is an interface representing anything located within the input stream.
15 | type Located interface {
16 | Begin() Loc
17 | End() Loc
18 | }
19 |
20 | // Errors implements error, containing multiple errors.
21 | type Errors struct {
22 | Errs []Error
23 | }
24 |
25 | func (err *Errors) ret() error {
26 | if len(err.Errs) == 0 {
27 | return nil
28 | }
29 | sort.Slice(err.Errs, func(i, j int) bool {
30 | return err.Errs[i].Begin().Less(err.Errs[j].Begin())
31 | })
32 | return err
33 | }
34 |
35 | func (err *Errors) add(loc Located, format string, args ...interface{}) {
36 | err.Errs = append(err.Errs, Err(loc, format, args...))
37 | }
38 |
39 | // Error returns the string representation of the Errors,
40 | // which is the string of each Error, one per-line.
41 | func (err Errors) Error() string {
42 | var s string
43 | for i, e := range err.Errs {
44 | if i > 0 {
45 | s += "\n"
46 | }
47 | s += e.Error()
48 | }
49 | return s
50 | }
51 |
52 | // Error is an error tied to an element of the Peggy input file.
53 | type Error struct {
54 | Located
55 | Msg string
56 | }
57 |
58 | func (err Error) Error() string {
59 | b, e := err.Begin(), err.End()
60 | l0, c0 := b.Line, b.Col
61 | l1, c1 := e.Line, e.Col
62 | switch {
63 | case l0 == l1 && c0 == c1:
64 | return fmt.Sprintf("%s:%d.%d: %s", b.File, l0, c0, err.Msg)
65 | default:
66 | return fmt.Sprintf("%s:%d.%d,%d.%d: %s", b.File, l0, c0, l1, c1, err.Msg)
67 | }
68 | }
69 |
70 | // Err returns an error containing the location and formatted message.
71 | func Err(loc Located, format string, args ...interface{}) Error {
72 | return Error{Located: loc, Msg: fmt.Sprintf(format, args...)}
73 | }
74 |
--------------------------------------------------------------------------------
/example/calc/calc.peggy:
--------------------------------------------------------------------------------
1 | {
2 | // Calc is an example calculator program.
3 | // You can build it from calc.peggy with
4 | // peggy -o calc.go calc.peggy
5 | package main
6 |
7 | import (
8 | "bufio"
9 | "fmt"
10 | "math/big"
11 | "os"
12 | "unicode"
13 | "unicode/utf8"
14 |
15 | "github.com/eaburns/peggy/peg"
16 | )
17 |
18 | func main() {
19 | scanner := bufio.NewScanner(os.Stdin)
20 | for scanner.Scan() {
21 | line := scanner.Text()
22 | p, err := _NewParser(line)
23 | if err != nil {
24 | fmt.Println(err)
25 | os.Exit(1)
26 | }
27 | if pos, perr := _ExprAccepts(p, 0); pos < 0 {
28 | _, fail := _ExprFail(p, 0 ,perr)
29 | fmt.Println(peg.SimpleError(line, fail))
30 | continue
31 | }
32 | _, result := _ExprAction(p, 0)
33 | fmt.Println((*result).String())
34 | }
35 | if err := scanner.Err(); err != nil {
36 | fmt.Println(err)
37 | os.Exit(1)
38 | }
39 | }
40 |
41 | type op func(*big.Float, *big.Float, *big.Float) *big.Float
42 |
43 | type tail struct {
44 | op op
45 | r *big.Float
46 | }
47 |
48 | func evalTail(l big.Float, tail []tail) big.Float {
49 | for _, t := range tail {
50 | t.op(&l, &l, t.r)
51 | }
52 | return l
53 | }
54 |
55 | func isSpace(s string) bool {
56 | r, _ := utf8.DecodeRuneInString(s)
57 | return unicode.IsSpace(r)
58 | }
59 | }
60 |
61 | Expr <- s:Sum EOF { return (*big.Float)(&s) }
62 |
63 | Sum <- l:Product tail:SumTail* { return (big.Float)(evalTail(l, tail)) }
64 |
65 | SumTail <- op:AddOp r:Product { return tail{op, &r} }
66 |
67 | AddOp "operator" <-
68 | _ "+" { return op((*big.Float).Add) } /
69 | _ "-" { return op((*big.Float).Sub) }
70 |
71 | Product <- l:Value tail:ProductTail* { return (big.Float)(evalTail(l, tail)) }
72 |
73 | ProductTail <- op:MulOp r:Value { return tail{op, &r} }
74 |
75 | MulOp "operator" <-
76 | _ "*" { return op((*big.Float).Mul) } /
77 | _ "/" { return op((*big.Float).Quo) }
78 |
79 | Value <- Num / _ "(" e:Sum _ ")" { return (big.Float)(e) }
80 |
81 | Num "number" <- _ n:( [0-9]+ ("." [0-9]+)? ) {
82 | var f big.Float
83 | f.Parse(n, 10)
84 | return (big.Float)(f)
85 | }
86 |
87 | _ "space" <- ( s:. &{ isSpace(s) } )*
88 |
89 | EOF "end of file" <- !.
--------------------------------------------------------------------------------
/example/label_names/label_names.go:
--------------------------------------------------------------------------------
1 | // Test labels with the same name but in different choice branches.
2 | // peggy -o label_names.go label_names.peggy
3 | package main
4 |
5 | import (
6 | "bufio"
7 | "fmt"
8 | "os"
9 |
10 | "github.com/eaburns/peggy/peg"
11 | )
12 |
13 | func main() {
14 | scanner := bufio.NewScanner(os.Stdin)
15 | for scanner.Scan() {
16 | line := scanner.Text()
17 | p, err := _NewParser(line)
18 | if err != nil {
19 | fmt.Println(err)
20 | os.Exit(1)
21 | }
22 | if pos, perr := _ExprAccepts(p, 0); pos < 0 {
23 | _, fail := _ExprFail(p, 0, perr)
24 | fmt.Println(peg.SimpleError(line, fail))
25 | continue
26 | }
27 | _, result := _ExprAction(p, 0)
28 | fmt.Println(*result)
29 | }
30 | if err := scanner.Err(); err != nil {
31 | fmt.Println(err)
32 | os.Exit(1)
33 | }
34 | }
35 |
36 | const (
37 | _Expr int = 0
38 |
39 | _N int = 1
40 | )
41 |
42 | type _Parser struct {
43 | text string
44 | deltaPos [][_N]int32
45 | deltaErr [][_N]int32
46 | node map[_key]*peg.Node
47 | fail map[_key]*peg.Fail
48 | act map[_key]interface{}
49 | lastFail int
50 | data interface{}
51 | }
52 |
53 | type _key struct {
54 | start int
55 | rule int
56 | }
57 |
58 | type tooBigError struct{}
59 |
60 | func (tooBigError) Error() string { return "input is too big" }
61 |
62 | func _NewParser(text string) (*_Parser, error) {
63 | n := len(text) + 1
64 | if n < 0 {
65 | return nil, tooBigError{}
66 | }
67 | p := &_Parser{
68 | text: text,
69 | deltaPos: make([][_N]int32, n),
70 | deltaErr: make([][_N]int32, n),
71 | node: make(map[_key]*peg.Node),
72 | fail: make(map[_key]*peg.Fail),
73 | act: make(map[_key]interface{}),
74 | }
75 | return p, nil
76 | }
77 |
78 | func _max(a, b int) int {
79 | if a > b {
80 | return a
81 | }
82 | return b
83 | }
84 |
85 | func _memoize(parser *_Parser, rule, start, pos, perr int) (int, int) {
86 | parser.lastFail = perr
87 | derr := perr - start
88 | parser.deltaErr[start][rule] = int32(derr + 1)
89 | if pos >= 0 {
90 | dpos := pos - start
91 | parser.deltaPos[start][rule] = int32(dpos + 1)
92 | return dpos, derr
93 | }
94 | parser.deltaPos[start][rule] = -1
95 | return -1, derr
96 | }
97 |
98 | func _memo(parser *_Parser, rule, start int) (int, int, bool) {
99 | dp := parser.deltaPos[start][rule]
100 | if dp == 0 {
101 | return 0, 0, false
102 | }
103 | if dp > 0 {
104 | dp--
105 | }
106 | de := parser.deltaErr[start][rule] - 1
107 | return int(dp), int(de), true
108 | }
109 |
110 | func _failMemo(parser *_Parser, rule, start, errPos int) (int, *peg.Fail) {
111 | if start > parser.lastFail {
112 | return -1, &peg.Fail{}
113 | }
114 | dp := parser.deltaPos[start][rule]
115 | de := parser.deltaErr[start][rule]
116 | if start+int(de-1) < errPos {
117 | if dp > 0 {
118 | return start + int(dp-1), &peg.Fail{}
119 | }
120 | return -1, &peg.Fail{}
121 | }
122 | f := parser.fail[_key{start: start, rule: rule}]
123 | if dp < 0 && f != nil {
124 | return -1, f
125 | }
126 | if dp > 0 && f != nil {
127 | return start + int(dp-1), f
128 | }
129 | return start, nil
130 | }
131 |
132 | func _accept(parser *_Parser, f func(*_Parser, int) (int, int), pos, perr *int) bool {
133 | dp, de := f(parser, *pos)
134 | *perr = _max(*perr, *pos+de)
135 | if dp < 0 {
136 | return false
137 | }
138 | *pos += dp
139 | return true
140 | }
141 |
142 | func _node(parser *_Parser, f func(*_Parser, int) (int, *peg.Node), node *peg.Node, pos *int) bool {
143 | p, kid := f(parser, *pos)
144 | if kid == nil {
145 | return false
146 | }
147 | node.Kids = append(node.Kids, kid)
148 | *pos = p
149 | return true
150 | }
151 |
152 | func _fail(parser *_Parser, f func(*_Parser, int, int) (int, *peg.Fail), errPos int, node *peg.Fail, pos *int) bool {
153 | p, kid := f(parser, *pos, errPos)
154 | if kid.Want != "" || len(kid.Kids) > 0 {
155 | node.Kids = append(node.Kids, kid)
156 | }
157 | if p < 0 {
158 | return false
159 | }
160 | *pos = p
161 | return true
162 | }
163 |
164 | func _next(parser *_Parser, pos int) (rune, int) {
165 | r, w := peg.DecodeRuneInString(parser.text[pos:])
166 | return r, w
167 | }
168 |
169 | func _sub(parser *_Parser, start, end int, kids []*peg.Node) *peg.Node {
170 | node := &peg.Node{
171 | Text: parser.text[start:end],
172 | Kids: make([]*peg.Node, len(kids)),
173 | }
174 | copy(node.Kids, kids)
175 | return node
176 | }
177 |
178 | func _leaf(parser *_Parser, start, end int) *peg.Node {
179 | return &peg.Node{Text: parser.text[start:end]}
180 | }
181 |
182 | // A no-op function to mark a variable as used.
183 | func use(interface{}) {}
184 |
185 | func _ExprAccepts(parser *_Parser, start int) (deltaPos, deltaErr int) {
186 | var labels [2]string
187 | use(labels)
188 | if dp, de, ok := _memo(parser, _Expr, start); ok {
189 | return dp, de
190 | }
191 | pos, perr := start, -1
192 | // letter:[a] {…}/letter:[b] {…}
193 | {
194 | pos3 := pos
195 | // action
196 | // letter:[a]
197 | {
198 | pos5 := pos
199 | // [a]
200 | if r, w := _next(parser, pos); r != 'a' {
201 | perr = _max(perr, pos)
202 | goto fail4
203 | } else {
204 | pos += w
205 | }
206 | labels[0] = parser.text[pos5:pos]
207 | }
208 | goto ok0
209 | fail4:
210 | pos = pos3
211 | // action
212 | // letter:[b]
213 | {
214 | pos7 := pos
215 | // [b]
216 | if r, w := _next(parser, pos); r != 'b' {
217 | perr = _max(perr, pos)
218 | goto fail6
219 | } else {
220 | pos += w
221 | }
222 | labels[1] = parser.text[pos7:pos]
223 | }
224 | goto ok0
225 | fail6:
226 | pos = pos3
227 | goto fail
228 | ok0:
229 | }
230 | return _memoize(parser, _Expr, start, pos, perr)
231 | fail:
232 | return _memoize(parser, _Expr, start, -1, perr)
233 | }
234 |
235 | func _ExprNode(parser *_Parser, start int) (int, *peg.Node) {
236 | var labels [2]string
237 | use(labels)
238 | dp := parser.deltaPos[start][_Expr]
239 | if dp < 0 {
240 | return -1, nil
241 | }
242 | key := _key{start: start, rule: _Expr}
243 | node := parser.node[key]
244 | if node != nil {
245 | return start + int(dp-1), node
246 | }
247 | pos := start
248 | node = &peg.Node{Name: "Expr"}
249 | // letter:[a] {…}/letter:[b] {…}
250 | {
251 | pos3 := pos
252 | nkids1 := len(node.Kids)
253 | // action
254 | // letter:[a]
255 | {
256 | pos5 := pos
257 | // [a]
258 | if r, w := _next(parser, pos); r != 'a' {
259 | goto fail4
260 | } else {
261 | node.Kids = append(node.Kids, _leaf(parser, pos, pos+w))
262 | pos += w
263 | }
264 | labels[0] = parser.text[pos5:pos]
265 | }
266 | goto ok0
267 | fail4:
268 | node.Kids = node.Kids[:nkids1]
269 | pos = pos3
270 | // action
271 | // letter:[b]
272 | {
273 | pos7 := pos
274 | // [b]
275 | if r, w := _next(parser, pos); r != 'b' {
276 | goto fail6
277 | } else {
278 | node.Kids = append(node.Kids, _leaf(parser, pos, pos+w))
279 | pos += w
280 | }
281 | labels[1] = parser.text[pos7:pos]
282 | }
283 | goto ok0
284 | fail6:
285 | node.Kids = node.Kids[:nkids1]
286 | pos = pos3
287 | goto fail
288 | ok0:
289 | }
290 | node.Text = parser.text[start:pos]
291 | parser.node[key] = node
292 | return pos, node
293 | fail:
294 | return -1, nil
295 | }
296 |
297 | func _ExprFail(parser *_Parser, start, errPos int) (int, *peg.Fail) {
298 | var labels [2]string
299 | use(labels)
300 | pos, failure := _failMemo(parser, _Expr, start, errPos)
301 | if failure != nil {
302 | return pos, failure
303 | }
304 | failure = &peg.Fail{
305 | Name: "Expr",
306 | Pos: int(start),
307 | }
308 | key := _key{start: start, rule: _Expr}
309 | // letter:[a] {…}/letter:[b] {…}
310 | {
311 | pos3 := pos
312 | // action
313 | // letter:[a]
314 | {
315 | pos5 := pos
316 | // [a]
317 | if r, w := _next(parser, pos); r != 'a' {
318 | if pos >= errPos {
319 | failure.Kids = append(failure.Kids, &peg.Fail{
320 | Pos: int(pos),
321 | Want: "[a]",
322 | })
323 | }
324 | goto fail4
325 | } else {
326 | pos += w
327 | }
328 | labels[0] = parser.text[pos5:pos]
329 | }
330 | goto ok0
331 | fail4:
332 | pos = pos3
333 | // action
334 | // letter:[b]
335 | {
336 | pos7 := pos
337 | // [b]
338 | if r, w := _next(parser, pos); r != 'b' {
339 | if pos >= errPos {
340 | failure.Kids = append(failure.Kids, &peg.Fail{
341 | Pos: int(pos),
342 | Want: "[b]",
343 | })
344 | }
345 | goto fail6
346 | } else {
347 | pos += w
348 | }
349 | labels[1] = parser.text[pos7:pos]
350 | }
351 | goto ok0
352 | fail6:
353 | pos = pos3
354 | goto fail
355 | ok0:
356 | }
357 | parser.fail[key] = failure
358 | return pos, failure
359 | fail:
360 | parser.fail[key] = failure
361 | return -1, failure
362 | }
363 |
364 | func _ExprAction(parser *_Parser, start int) (int, *string) {
365 | var labels [2]string
366 | use(labels)
367 | var label0 string
368 | var label1 string
369 | dp := parser.deltaPos[start][_Expr]
370 | if dp < 0 {
371 | return -1, nil
372 | }
373 | key := _key{start: start, rule: _Expr}
374 | n := parser.act[key]
375 | if n != nil {
376 | n := n.(string)
377 | return start + int(dp-1), &n
378 | }
379 | var node string
380 | pos := start
381 | // letter:[a] {…}/letter:[b] {…}
382 | {
383 | pos3 := pos
384 | var node2 string
385 | // action
386 | {
387 | start5 := pos
388 | // letter:[a]
389 | {
390 | pos6 := pos
391 | // [a]
392 | if r, w := _next(parser, pos); r != 'a' {
393 | goto fail4
394 | } else {
395 | label0 = parser.text[pos : pos+w]
396 | pos += w
397 | }
398 | labels[0] = parser.text[pos6:pos]
399 | }
400 | node = func(
401 | start, end int, letter string) string {
402 | fmt.Printf("a=[%s]\n", letter)
403 | return string(letter)
404 | }(
405 | start5, pos, label0)
406 | }
407 | goto ok0
408 | fail4:
409 | node = node2
410 | pos = pos3
411 | // action
412 | {
413 | start8 := pos
414 | // letter:[b]
415 | {
416 | pos9 := pos
417 | // [b]
418 | if r, w := _next(parser, pos); r != 'b' {
419 | goto fail7
420 | } else {
421 | label1 = parser.text[pos : pos+w]
422 | pos += w
423 | }
424 | labels[1] = parser.text[pos9:pos]
425 | }
426 | node = func(
427 | start, end int, letter string) string {
428 | fmt.Printf("b=[%s]\n", letter)
429 | return string(letter)
430 | }(
431 | start8, pos, label1)
432 | }
433 | goto ok0
434 | fail7:
435 | node = node2
436 | pos = pos3
437 | goto fail
438 | ok0:
439 | }
440 | parser.act[key] = node
441 | return pos, &node
442 | fail:
443 | return -1, nil
444 | }
445 |
--------------------------------------------------------------------------------
/example/label_names/label_names.peggy:
--------------------------------------------------------------------------------
1 | {
2 | // Test labels with the same name but in different choice branches.
3 | // peggy -o label_names.go label_names.peggy
4 | package main
5 |
6 | import (
7 | "bufio"
8 | "fmt"
9 | "os"
10 |
11 | "github.com/eaburns/peggy/peg"
12 | )
13 |
14 | func main() {
15 | scanner := bufio.NewScanner(os.Stdin)
16 | for scanner.Scan() {
17 | line := scanner.Text()
18 | p, err := _NewParser(line)
19 | if err != nil {
20 | fmt.Println(err)
21 | os.Exit(1)
22 | }
23 | if pos, perr := _ExprAccepts(p, 0); pos < 0 {
24 | _, fail := _ExprFail(p, 0 ,perr)
25 | fmt.Println(peg.SimpleError(line, fail))
26 | continue
27 | }
28 | _, result := _ExprAction(p, 0)
29 | fmt.Println(*result)
30 | }
31 | if err := scanner.Err(); err != nil {
32 | fmt.Println(err)
33 | os.Exit(1)
34 | }
35 | }
36 | }
37 |
38 | Expr <-
39 | letter:[a] { fmt.Printf("a=[%s]\n", letter); return string(letter) } /
40 | letter:[b] { fmt.Printf("b=[%s]\n", letter); return string(letter) }
41 |
--------------------------------------------------------------------------------
/gen.go:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package main
8 |
9 | import (
10 | "bytes"
11 | "errors"
12 | "go/format"
13 | "go/parser"
14 | "go/token"
15 | "io"
16 | "os"
17 | "reflect"
18 | "strconv"
19 | "text/template"
20 | )
21 |
22 | // Generate generates a parser for the rules,
23 | // using a default Config:
24 | // Config{Prefix: "_"}
25 | func Generate(w io.Writer, file string, grammar *Grammar) error {
26 | return Config{Prefix: "_"}.Generate(w, file, grammar)
27 | }
28 |
29 | // A Config specifies code generation options.
30 | type Config struct {
31 | Prefix string
32 | }
33 |
34 | // Generate generates a parser for the rules.
35 | func (c Config) Generate(w io.Writer, file string, gr *Grammar) error {
36 | b := bytes.NewBuffer(nil)
37 | if err := writePrelude(b, file, gr); err != nil {
38 | return err
39 | }
40 | if err := writeDecls(b, c, gr); err != nil {
41 | return err
42 | }
43 | for _, r := range gr.CheckedRules {
44 | if err := writeRule(b, c, r); err != nil {
45 | return err
46 | }
47 | }
48 | return gofmt(w, b.String())
49 | }
50 |
51 | func gofmt(w io.Writer, s string) error {
52 | fset := token.NewFileSet()
53 | root, err := parser.ParseFile(fset, "", s, parser.ParseComments)
54 | if err != nil {
55 | io.WriteString(os.Stderr, s)
56 | io.WriteString(w, s)
57 | return err
58 | }
59 | if err := format.Node(w, fset, root); err != nil {
60 | io.WriteString(w, s)
61 | return err
62 | }
63 | return nil
64 | }
65 |
66 | func writePrelude(w io.Writer, file string, gr *Grammar) error {
67 | if gr.Prelude == nil {
68 | return nil
69 | }
70 | _, err := io.WriteString(w, gr.Prelude.String())
71 | return err
72 | }
73 |
74 | func writeDecls(w io.Writer, c Config, gr *Grammar) error {
75 | tmp, err := template.New("Decls").Parse(declsTemplate)
76 | if err != nil {
77 | return err
78 | }
79 | return tmp.Execute(w, map[string]interface{}{
80 | "Config": c,
81 | "Grammar": gr,
82 | })
83 | }
84 |
85 | func writeRule(w io.Writer, c Config, r *Rule) error {
86 | funcs := map[string]interface{}{
87 | "gen": gen,
88 | "quote": strconv.Quote,
89 | "makeAcceptState": func(r *Rule) state {
90 | return state{
91 | Config: c,
92 | Rule: r,
93 | n: new(int),
94 | AcceptsPass: true,
95 | }
96 | },
97 | "makeNodeState": func(r *Rule) state {
98 | return state{
99 | Config: c,
100 | Rule: r,
101 | n: new(int),
102 | NodePass: true,
103 | }
104 | },
105 | "makeFailState": func(r *Rule) state {
106 | return state{
107 | Config: c,
108 | Rule: r,
109 | n: new(int),
110 | FailPass: true,
111 | }
112 | },
113 | "makeActionState": func(r *Rule) state {
114 | return state{
115 | Config: c,
116 | Rule: r,
117 | n: new(int),
118 | ActionPass: true,
119 | }
120 | },
121 | }
122 | data := map[string]interface{}{
123 | "Config": c,
124 | "Rule": r,
125 | "GenActions": *genActions,
126 | "GenParseTree": *genParseTree,
127 | }
128 | tmp, err := template.New("rule").Parse(ruleTemplate)
129 | if err != nil {
130 | return err
131 | }
132 | for _, ts := range [][2]string{
133 | {"ruleAccepts", ruleAccepts},
134 | {"ruleNode", ruleNode},
135 | {"ruleFail", ruleFail},
136 | {"stringLabels", stringLabels},
137 | {"ruleAction", ruleAction},
138 | } {
139 | name, text := ts[0], ts[1]
140 | tmp, err = tmp.New(name).Funcs(funcs).Parse(text)
141 | if err != nil {
142 | return err
143 | }
144 | }
145 | return tmp.ExecuteTemplate(w, "rule", data)
146 | }
147 |
148 | type state struct {
149 | Config
150 | Rule *Rule
151 | Expr Expr
152 | Fail string
153 | // Node is the ident into which to assign action-pass value, or "".
154 | Node string
155 | n *int
156 | // AcceptsPass indicates whether to generate the accepts pass.
157 | AcceptsPass bool
158 | // NodePass indicates whether to generate the node pass.
159 | NodePass bool
160 | // FailPass indicates whether to generate the error pass.
161 | FailPass bool
162 | // ActionPass indicates whether to generate the action pass.
163 | ActionPass bool
164 | }
165 |
166 | func (s state) id(str string) string {
167 | (*s.n)++
168 | return str + strconv.Itoa(*s.n-1)
169 | }
170 |
171 | func gen(parentState state, expr Expr, node, fail string) (string, error) {
172 | t := reflect.TypeOf(expr)
173 | tmpString, ok := templates[reflect.TypeOf(expr)]
174 | if !ok {
175 | return "", errors.New("gen not found: " + t.String())
176 | }
177 | funcs := map[string]interface{}{
178 | "quote": strconv.Quote,
179 | "quoteRune": strconv.QuoteRune,
180 | "id": parentState.id,
181 | "gen": gen,
182 | "last": func(i int, exprs []Expr) bool { return i == len(exprs)-1 },
183 | }
184 | tmp, err := template.New(t.String()).Funcs(funcs).Parse(tmpString)
185 | if err != nil {
186 | return "", err
187 | }
188 | if err := addGlobalTemplates(tmp); err != nil {
189 | return "", err
190 | }
191 | b := bytes.NewBuffer(nil)
192 | state := parentState
193 | state.Expr = expr
194 | state.Fail = fail
195 | state.Node = node
196 | err = tmp.Execute(b, state)
197 | return b.String(), err
198 | }
199 |
200 | var globalTemplates = [][2]string{
201 | {"charClassCondition", charClassCondition},
202 | }
203 |
204 | func addGlobalTemplates(tmp *template.Template) error {
205 | for _, p := range globalTemplates {
206 | var err error
207 | if tmp, err = tmp.New(p[0]).Parse(p[1]); err != nil {
208 | return err
209 | }
210 | }
211 | return nil
212 | }
213 |
214 | // A note on formatting in Expr templates
215 | //
216 | // gofmt properly fixes any horizontal spacing issues.
217 | // However, while it eliminates duplicate empty lines,
218 | // it does not eliminate empty lines.
219 | // For example, it will convert a sequence of 2 or more empty lines
220 | // into a single empty line, but it will not remove the empty line.
221 | // So it's important to handle newlines propertly
222 | // to maintain a nice, consistent formatting.
223 | //
224 | // There are two rules:
225 | // 1) Templates must end with a newline, or the codegen will be invalid.
226 | // 2) Templates should not begin with an newline, or the codegen will be ugly.
227 |
228 | var declsTemplate = `
229 | {{$pre := $.Config.Prefix -}}
230 |
231 | const (
232 | {{range $r := $.Grammar.CheckedRules -}}
233 | {{$pre}}{{$r.Name.Ident}} int = {{$r.N}}
234 | {{end}}
235 | {{$pre}}N int = {{len $.Grammar.CheckedRules}}
236 | )
237 |
238 | type {{$pre}}Parser struct {
239 | text string
240 | deltaPos [][{{$pre}}N]int32
241 | deltaErr [][{{$pre}}N]int32
242 | node map[{{$pre}}key]*peg.Node
243 | fail map[{{$pre}}key]*peg.Fail
244 | act map[{{$pre}}key]interface{}
245 | lastFail int
246 | data interface{}
247 | }
248 |
249 | type {{$pre}}key struct {
250 | start int
251 | rule int
252 | }
253 |
254 | type tooBigError struct{}
255 | func (tooBigError) Error() string { return "input is too big" }
256 |
257 | func {{$pre}}NewParser(text string) (*{{$pre}}Parser, error) {
258 | n := len(text)+1
259 | if n < 0 {
260 | return nil, tooBigError{}
261 | }
262 | p := &{{$pre}}Parser{
263 | text: text,
264 | deltaPos: make([][{{$pre}}N]int32, n),
265 | deltaErr: make([][{{$pre}}N]int32, n),
266 | node: make(map[{{$pre}}key]*peg.Node),
267 | fail: make(map[{{$pre}}key]*peg.Fail),
268 | act: make(map[{{$pre}}key]interface{}),
269 | }
270 | return p, nil
271 | }
272 |
273 | func {{$pre}}max(a, b int) int {
274 | if a > b {
275 | return a
276 | }
277 | return b
278 | }
279 |
280 | func {{$pre}}memoize(parser *{{$pre}}Parser, rule, start, pos, perr int) (int, int) {
281 | parser.lastFail = perr
282 | derr := perr - start
283 | parser.deltaErr[start][rule] = int32(derr+1)
284 | if pos >= 0 {
285 | dpos := pos - start
286 | parser.deltaPos[start][rule] = int32(dpos + 1)
287 | return dpos, derr
288 | }
289 | parser.deltaPos[start][rule] = -1
290 | return -1, derr
291 | }
292 |
293 | func {{$pre}}memo(parser *{{$pre}}Parser, rule, start int) (int, int, bool) {
294 | dp := parser.deltaPos[start][rule]
295 | if dp == 0 {
296 | return 0, 0, false
297 | }
298 | if dp > 0 {
299 | dp--
300 | }
301 | de := parser.deltaErr[start][rule] - 1
302 | return int(dp), int(de), true
303 | }
304 |
305 | func {{$pre}}failMemo(parser *{{$pre}}Parser, rule, start, errPos int) (int, *peg.Fail) {
306 | if start > parser.lastFail {
307 | return -1, &peg.Fail{}
308 | }
309 | dp := parser.deltaPos[start][rule]
310 | de := parser.deltaErr[start][rule]
311 | if start+int(de-1) < errPos {
312 | if dp > 0 {
313 | return start + int(dp-1), &peg.Fail{}
314 | }
315 | return -1, &peg.Fail{}
316 | }
317 | f := parser.fail[_key{start: start, rule: rule}]
318 | if dp < 0 && f != nil {
319 | return -1, f
320 | }
321 | if dp > 0 && f != nil {
322 | return start + int(dp-1), f
323 | }
324 | return start, nil
325 | }
326 |
327 | func {{$pre}}accept(parser *{{$pre}}Parser, f func(*{{$pre}}Parser, int) (int, int), pos, perr *int) bool {
328 | dp, de := f(parser, *pos)
329 | *perr = _max(*perr, *pos+de)
330 | if dp < 0 {
331 | return false
332 | }
333 | *pos += dp
334 | return true
335 | }
336 |
337 | func {{$pre}}node(parser *{{$pre}}Parser, f func(*{{$pre}}Parser, int) (int, *peg.Node), node *peg.Node, pos *int) bool {
338 | p, kid := f(parser, *pos)
339 | if kid == nil {
340 | return false
341 | }
342 | node.Kids = append(node.Kids, kid)
343 | *pos = p
344 | return true
345 | }
346 |
347 | func {{$pre}}fail(parser *{{$pre}}Parser, f func(*{{$pre}}Parser, int, int) (int, *peg.Fail), errPos int, node *peg.Fail, pos *int) bool {
348 | p, kid := f(parser, *pos, errPos)
349 | if kid.Want != "" || len(kid.Kids) > 0 {
350 | node.Kids = append(node.Kids, kid)
351 | }
352 | if p < 0 {
353 | return false
354 | }
355 | *pos = p
356 | return true
357 | }
358 |
359 | func {{$pre}}next(parser *{{$pre}}Parser, pos int) (rune, int) {
360 | r, w := peg.DecodeRuneInString(parser.text[pos:])
361 | return r, w
362 | }
363 |
364 | func {{$pre}}sub(parser *{{$pre}}Parser, start, end int, kids []*peg.Node) *peg.Node {
365 | node := &peg.Node{
366 | Text: parser.text[start:end],
367 | Kids: make([]*peg.Node, len(kids)),
368 | }
369 | copy(node.Kids, kids)
370 | return node
371 | }
372 |
373 | func {{$pre}}leaf(parser *{{$pre}}Parser, start, end int) *peg.Node {
374 | return &peg.Node{Text: parser.text[start:end]}
375 | }
376 |
377 | // A no-op function to mark a variable as used.
378 | func use(interface{}) {}
379 | `
380 |
381 | // templates contains a mapping from Expr types to their templates.
382 | // These templates parse the input text and compute
383 | // for each pair encountered by the parse,
384 | // the position immediately following the text accepted by the rule,
385 | // or the position of the furthest error encountered by the rule.
386 | //
387 | // When generating the parse tree pass,
388 | // the templates also add peg.Nodes to the kids slice.
389 | //
390 | // Variables for use by the templates:
391 | // parser is the *Parser.
392 | // parser.text is the input text.
393 | // pos is the byte offset into parser.text of where to begin parsing.
394 | // If the Expr fails to parse, pos must be set to the position of the error.
395 | // If if the Expr succeeds to parse, pos must be set
396 | // to the position just after the accepted text.
397 | //
398 | // On the accepts pass these variables are also defined:
399 | // perr is the position of the max error position found so far.
400 | // It is only defined if Rule.Expr.CanFail.
401 | // It is initialized to -1 at the beginning of the parse.
402 | // It is updated by Choice nodes when branches fail,
403 | // and by rules when their entire parse fails.
404 | // ok is a scratch boolean variable.
405 | // It may be either true or false before and after each Expr template.
406 | // Each template that wants to use ok must set it before using it.
407 | //
408 | // On the node tree pass these variables are also defined:
409 | // node is the *peg.Node of the Rule being parsed.
410 | //
411 | // On the action tree pass these variables are also defined:
412 | // node is an interface{} containing the current action tree node value.
413 | //
414 | // On the fail tree pass these variables are also defined:
415 | // failure is the *peg.Fail of the Rule being parsed.
416 | // errPos is the position before which Fail nodes are not generated.
417 | var templates = map[reflect.Type]string{
418 | reflect.TypeOf(&Choice{}): choiceTemplate,
419 | reflect.TypeOf(&Action{}): actionTemplate,
420 | reflect.TypeOf(&Sequence{}): sequenceTemplate,
421 | reflect.TypeOf(&LabelExpr{}): labelExprTemplate,
422 | reflect.TypeOf(&PredExpr{}): predExprTemplate,
423 | reflect.TypeOf(&RepExpr{}): repExprTemplate,
424 | reflect.TypeOf(&OptExpr{}): optExprTemplate,
425 | reflect.TypeOf(&SubExpr{}): subExprTemplate,
426 | reflect.TypeOf(&PredCode{}): predCodeTemplate,
427 | reflect.TypeOf(&Ident{}): identTemplate,
428 | reflect.TypeOf(&Literal{}): literalTemplate,
429 | reflect.TypeOf(&Any{}): anyTemplate,
430 | reflect.TypeOf(&CharClass{}): charClassTemplate,
431 | }
432 |
433 | var ruleTemplate = `
434 | {{template "ruleAccepts" $}}
435 | {{if $.GenParseTree -}}
436 | {{template "ruleNode" $}}
437 | {{end -}}
438 | {{template "ruleFail" $}}
439 | {{if $.GenActions -}}
440 | {{template "ruleAction" $}}
441 | {{end -}}
442 | `
443 |
444 | var stringLabels = `
445 | {{- if $.Rule.Labels -}}
446 | var labels [{{len $.Rule.Labels}}]string
447 | use(labels)
448 | {{- end -}}
449 | `
450 |
451 | var ruleAccepts = `
452 | {{$pre := $.Config.Prefix -}}
453 | {{- $id := $.Rule.Name.Ident -}}
454 | func {{$pre}}{{$id}}Accepts(parser *{{$pre}}Parser, start int) (deltaPos, deltaErr int) {
455 | {{- template "stringLabels" $}}
456 | if dp, de, ok := {{$pre}}memo(parser, {{$pre}}{{$id}}, start); ok {
457 | return dp, de
458 | }
459 | pos, perr := start, -1
460 | {{gen (makeAcceptState $.Rule) $.Rule.Expr "" "fail" -}}
461 |
462 | {{if $.Rule.ErrorName -}}
463 | perr = start
464 | {{end -}}
465 | return {{$pre}}memoize(parser, {{$pre}}{{$id}}, start, pos, perr)
466 | {{if $.Rule.Expr.CanFail -}}
467 | fail:
468 | return {{$pre}}memoize(parser, {{$pre}}{{$id}}, start, -1, perr)
469 | {{end -}}
470 | }
471 | `
472 |
473 | var ruleNode = `
474 | {{$pre := $.Config.Prefix -}}
475 | {{- $id := $.Rule.Name.Ident -}}
476 | {{- $name := $.Rule.Name.String -}}
477 | func {{$pre}}{{$id}}Node(parser *{{$pre}}Parser, start int) (int, *peg.Node) {
478 | {{- template "stringLabels" $}}
479 | dp := parser.deltaPos[start][{{$pre}}{{$id}}]
480 | if dp < 0 {
481 | return -1, nil
482 | }
483 | key := {{$pre}}key{start: start, rule: {{$pre}}{{$id}}}
484 | node := parser.node[key]
485 | if node != nil {
486 | return start + int(dp - 1), node
487 | }
488 | pos := start
489 | node = &peg.Node{Name: {{quote $name}}}
490 | {{gen (makeNodeState $.Rule) $.Rule.Expr "" "fail" -}}
491 |
492 | node.Text = parser.text[start:pos]
493 | parser.node[key] = node
494 | return pos, node
495 | {{if $.Rule.Expr.CanFail -}}
496 | fail:
497 | return -1, nil
498 | {{end -}}
499 | }
500 | `
501 |
502 | var ruleFail = `
503 | {{$pre := $.Config.Prefix -}}
504 | {{- $id := $.Rule.Name.Ident -}}
505 | func {{$pre}}{{$id}}Fail(parser *{{$pre}}Parser, start, errPos int) (int, *peg.Fail) {
506 | {{- template "stringLabels" $}}
507 | pos, failure := {{$pre}}failMemo(parser, {{$pre}}{{$id}}, start, errPos)
508 | if failure != nil {
509 | return pos, failure
510 | }
511 | failure = &peg.Fail{
512 | Name: {{quote $id}},
513 | Pos: int(start),
514 | }
515 | key := {{$pre}}key{start: start, rule: {{$pre}}{{$id}}}
516 | {{gen (makeFailState $.Rule) $.Rule.Expr "" "fail" -}}
517 |
518 | {{if $.Rule.ErrorName -}}
519 | failure.Kids = nil
520 | {{end -}}
521 | parser.fail[key] = failure
522 | return pos, failure
523 | {{if $.Rule.Expr.CanFail -}}
524 | fail:
525 | {{if $.Rule.ErrorName -}}
526 | failure.Kids = nil
527 | failure.Want = {{quote $.Rule.ErrorName.String}}
528 | {{end -}}
529 | parser.fail[key] = failure
530 | return -1, failure
531 | {{end -}}
532 | }
533 | `
534 |
535 | var ruleAction = `
536 | {{$pre := $.Config.Prefix -}}
537 | {{- $id := $.Rule.Name.Ident -}}
538 | {{- $type := $.Rule.Expr.Type -}}
539 | func {{$pre}}{{$id}}Action(parser *{{$pre}}Parser, start int) (int, *{{$type}}) {
540 | {{- template "stringLabels" $}}
541 | {{if $.Rule.Labels -}}
542 | {{range $l := $.Rule.Labels -}}
543 | var label{{$l.N}} {{$l.Type}}
544 | {{end}}
545 | {{- end -}}
546 | dp := parser.deltaPos[start][{{$pre}}{{$id}}]
547 | if dp < 0 {
548 | return -1, nil
549 | }
550 | key := {{$pre}}key{start: start, rule: {{$pre}}{{$id}}}
551 | n := parser.act[key]
552 | if n != nil {
553 | n := n.({{$type}})
554 | return start + int(dp - 1), &n
555 | }
556 | var node {{$type}}
557 | pos := start
558 | {{gen (makeActionState $.Rule) $.Rule.Expr "node" "fail" -}}
559 |
560 | parser.act[key] = node
561 | return pos, &node
562 | {{if $.Rule.Expr.CanFail -}}
563 | fail:
564 | return -1, nil
565 | {{end -}}
566 | }
567 | `
568 |
569 | var choiceTemplate = `// {{$.Expr.String}}
570 | {
571 | {{- $ok := id "ok" -}}
572 | {{- $nkids := id "nkids" -}}
573 | {{- $node0 := id "node" -}}
574 | {{- $pos0 := id "pos" -}}
575 | {{$pos0}} := pos
576 | {{if $.NodePass -}}
577 | {{$nkids}} := len(node.Kids)
578 | {{else if (and $.Node $.ActionPass) -}}
579 | var {{$node0}} {{$.Expr.Type}}
580 | {{end -}}
581 | {{- range $i, $subExpr := $.Expr.Exprs -}}
582 | {{- $fail := id "fail" -}}
583 | {{gen $ $subExpr $.Node $fail -}}
584 |
585 | {{if $subExpr.CanFail -}}
586 | goto {{$ok}}
587 | {{$fail}}:
588 | {{if $.NodePass -}}
589 | node.Kids = node.Kids[:{{$nkids}}]
590 | {{else if (and $.Node $.ActionPass) -}}
591 | {{$.Node}} = {{$node0}}
592 | {{end -}}
593 | pos = {{$pos0}}
594 | {{if last $i $.Expr.Exprs -}}
595 | goto {{$.Fail}}
596 | {{end -}}
597 | {{end -}}
598 | {{end -}}
599 | {{$ok}}:
600 | }
601 | `
602 |
603 | var actionTemplate = `// action
604 | {{if $.ActionPass -}}
605 | {
606 | {{$start := id "start" -}}
607 | {{$start}} := pos
608 | {{gen $ $.Expr.Expr "" $.Fail -}}
609 | {{/* TODO: don't put the func in the scope of the rule. */ -}}
610 | {{if $.Node}}{{$.Node}} = {{end}} func(
611 | start, end int,
612 | {{- if $.Expr.Labels -}}
613 | {{range $lexpr := $.Expr.Labels -}}
614 | {{$lexpr.Label}} {{$lexpr.Type}},
615 | {{- end -}}
616 | {{- end -}})
617 | {{- $.Expr.Type}} { {{$.Expr.Code}} }(
618 | {{$start}}, pos,
619 | {{- if $.Expr.Labels -}}
620 | {{range $lexpr := $.Expr.Labels -}}
621 | label{{$lexpr.N}},
622 | {{- end -}}
623 | {{- end -}}
624 | )
625 | }
626 | {{else -}}
627 | {{gen $ $.Expr.Expr "" $.Fail -}}
628 | {{end -}}
629 | `
630 |
631 | var sequenceTemplate = `// {{$.Expr.String}}
632 | {{$node := id "node" -}}
633 | {{if (and $.ActionPass $.Node (eq $.Expr.Type "string")) -}}
634 | {
635 | var {{$node}} string
636 | {{else if (and $.ActionPass $.Node) -}}
637 | {{$.Node}} = make({{$.Expr.Type}}, {{len $.Expr.Exprs}})
638 | {{end -}}
639 |
640 | {{range $i, $subExpr := $.Expr.Exprs -}}
641 | {{if (and $.ActionPass $.Node (eq $.Expr.Type "string")) -}}
642 | {{gen $ $subExpr $node $.Fail -}}
643 | {{$.Node}}, {{$node}} = {{$.Node}}+{{$node}}, ""
644 | {{else if (and $.ActionPass $.Node) -}}
645 | {{gen $ $subExpr (printf "%s[%d]" $.Node $i) $.Fail -}}
646 | {{else -}}
647 | {{gen $ $subExpr "" $.Fail -}}
648 | {{end -}}
649 | {{end -}}
650 |
651 | {{if (and $.ActionPass $.Node (eq $.Expr.Type "string")) -}}
652 | }
653 | {{end -}}
654 | `
655 |
656 | var labelExprTemplate = `// {{$.Expr.String}}
657 | {{$name := $.Expr.Label.String -}}
658 | {{- $pos0 := id "pos" -}}
659 | {{- $subExpr := $.Expr.Expr -}}
660 | {
661 | {{$pos0}} := pos
662 | {{if $.ActionPass -}}
663 | {{gen $ $subExpr (printf "label%d" $.Expr.N) $.Fail -}}
664 | {{if $.Node -}}
665 | {{$.Node}} = label{{$.Expr.N}}
666 | {{end -}}
667 | {{else -}}
668 | {{gen $ $subExpr "" $.Fail -}}
669 | {{end -}}
670 | labels[{{$.Expr.N}}] = parser.text[{{$pos0}}:pos]
671 | }
672 | `
673 |
674 | var predExprTemplate = `// {{$.Expr.String}}
675 | {
676 | {{- $pre := $.Config.Prefix -}}
677 | {{- $ok := id "ok" -}}
678 | {{- $subExpr := $.Expr.Expr -}}
679 | {{- $pos0 := id "pos" -}}
680 | {{- $nkids := id "nkids" -}}
681 | {{- $perr0 := id "perr" -}}
682 | {{$pos0}} := pos
683 | {{if $.AcceptsPass -}}
684 | {{$perr0}} := perr
685 | {{else if $.NodePass -}}
686 | {{$nkids}} := len(node.Kids)
687 | {{else if $.FailPass -}}
688 | {{$nkids}} := len(failure.Kids)
689 | {{end -}}
690 |
691 | {{- if $.Expr.Neg -}}
692 | {{gen $ $subExpr "" $ok -}}
693 | pos = {{$pos0}}
694 | {{if $.NodePass -}}
695 | node.Kids = node.Kids[:{{$nkids}}]
696 | {{else if $.AcceptsPass -}}
697 | perr = {{$pre}}max({{$perr0}}, pos)
698 | {{else if $.FailPass -}}
699 | failure.Kids = failure.Kids[:{{$nkids}}]
700 | if pos >= errPos {
701 | failure.Kids = append(failure.Kids, &peg.Fail{
702 | Pos: int(pos),
703 | Want: {{quote $.Expr.String}},
704 | })
705 | }
706 | {{end -}}
707 | goto {{$.Fail}}
708 | {{else -}}
709 | {{- $fail := id "fail" -}}
710 | {{gen $ $subExpr "" $fail -}}
711 | goto {{$ok}}
712 | {{$fail}}:
713 | pos = {{$pos0}}
714 | {{if $.AcceptsPass -}}
715 | perr = {{$pre}}max({{$perr0}}, pos)
716 | {{else if $.FailPass -}}
717 | failure.Kids = failure.Kids[:{{$nkids}}]
718 | if pos >= errPos {
719 | failure.Kids = append(failure.Kids, &peg.Fail{
720 | Pos: int(pos),
721 | Want: {{quote $.Expr.String}},
722 | })
723 | }
724 | {{end -}}
725 | goto {{$.Fail}}
726 | {{end -}}
727 |
728 | {{$ok}}:
729 | pos = {{$pos0}}
730 | {{if $.AcceptsPass -}}
731 | perr = {{$perr0}}
732 | {{else if $.NodePass -}}
733 | node.Kids = node.Kids[:{{$nkids}}]
734 | {{else if $.FailPass -}}
735 | failure.Kids = failure.Kids[:{{$nkids}}]
736 | {{else if (and $.ActionPass $.Node) -}}
737 | {{$.Node}} = ""
738 | {{end -}}
739 | }
740 | `
741 |
742 | var repExprTemplate = `// {{$.Expr.String}}
743 | {{$nkids := id "nkids" -}}
744 | {{$pos0 := id "pos" -}}
745 | {{$node := id "node" -}}
746 | {{- $fail := id "fail" -}}
747 | {{- $subExpr := $.Expr.Expr -}}
748 | {{if eq $.Expr.Op '+' -}}
749 | {{if (and $.ActionPass $.Node) -}}
750 | {
751 | var {{$node}} {{$subExpr.Type}}
752 | {{gen $ $subExpr $node $.Fail -}}
753 | {{if (eq $.Expr.Type "string") -}}
754 | {{$.Node}} += {{$node}}
755 | {{else -}}
756 | {{$.Node}} = append({{$.Node}}, {{$node}})
757 | {{end -}}
758 | }
759 | {{else -}}
760 | {{gen $ $subExpr "" $.Fail -}}
761 | {{end -}}
762 | {{end -}}
763 | for {
764 | {{if $.NodePass -}}
765 | {{$nkids}} := len(node.Kids)
766 | {{end -}}
767 | {{$pos0}} := pos
768 | {{if (and $.ActionPass $.Node) -}}
769 | var {{$node}} {{$subExpr.Type}}
770 | {{gen $ $subExpr $node $fail -}}
771 | {{if (eq $.Expr.Type "string") -}}
772 | {{$.Node}} += {{$node}}
773 | {{else -}}
774 | {{$.Node}} = append({{$.Node}}, {{$node}})
775 | {{end -}}
776 | {{else -}}
777 | {{gen $ $subExpr "" $fail -}}
778 | {{end -}}
779 | continue
780 | {{$fail}}:
781 | {{if $.NodePass -}}
782 | node.Kids = node.Kids[:{{$nkids}}]
783 | {{end -}}
784 | pos = {{$pos0}}
785 | break
786 | }
787 | `
788 |
789 | var optExprTemplate = `// {{$.Expr.String}}
790 | {{$nkids := id "nkids" -}}
791 | {{$pos0 := id "pos" -}}
792 | {{- $fail := id "fail" -}}
793 | {{- $subExpr := $.Expr.Expr -}}
794 | {{- if $subExpr.CanFail -}}
795 | {
796 | {{if $.NodePass -}}
797 | {{$nkids}} := len(node.Kids)
798 | {{end -}}
799 | {{$pos0}} := pos
800 | {{if (and $.ActionPass $.Node (eq $subExpr.Type "string")) -}}
801 | {{gen $ $subExpr $.Node $fail -}}
802 | {{else if (and $.ActionPass $.Node) -}}
803 | {{$.Node}} = new({{$subExpr.Type}})
804 | {{gen $ $subExpr (printf "*%s" $.Node) $fail -}}
805 | {{else -}}
806 | {{gen $ $subExpr "" $fail -}}
807 | {{end -}}
808 | {{- $ok := id "ok" -}}
809 | goto {{$ok}}
810 | {{$fail}}:
811 | {{if $.NodePass -}}
812 | node.Kids = node.Kids[:{{$nkids}}]
813 | {{else if (and $.ActionPass $.Node (eq $subExpr.Type "string")) -}}
814 | {{$.Node}} = ""
815 | {{else if (and $.ActionPass $.Node) -}}
816 | {{$.Node}} = nil
817 | {{end -}}
818 | pos = {{$pos0}}
819 | {{$ok}}:
820 | }
821 | {{else -}}
822 | {{- /* TODO: disallow this case in check */ -}}
823 | {{gen $ $subExpr $fail -}}
824 | {{- end -}}
825 | `
826 |
827 | var subExprTemplate = `// {{$.Expr.String}}
828 | {{if $.NodePass -}}
829 | {
830 | {{- $pre := $.Config.Prefix -}}
831 | {{$nkids := id "nkids" -}}
832 | {{$nkids}} := len(node.Kids)
833 | {{$pos0 := id "pos0" -}}
834 | {{$pos0}} := pos
835 | {{gen $ $.Expr.Expr $.Node $.Fail -}}
836 | sub := {{$pre}}sub(parser, {{$pos0}}, pos, node.Kids[{{$nkids}}:])
837 | node.Kids = append(node.Kids[:{{$nkids}}], sub)
838 | }
839 | {{else -}}
840 | {{gen $ $.Expr.Expr $.Node $.Fail -}}
841 | {{end -}}
842 | `
843 |
844 | // TODO: instead, create a function for each predicate
845 | // with params that are the parser followed by
846 | // a string for each defined label.
847 | // Predicate code shouldn't have access to the label.Kids,
848 | // because it's undefined for the Accepts and Fail pass.
849 | // NOTE: kids are OK for actions,
850 | // because actions are only to be called by the Node pass
851 | // on a successful parse.
852 | var predCodeTemplate = `// pred code
853 | if ok := func(
854 | {{- if $.Expr.Labels -}}
855 | {{range $lexpr := $.Expr.Labels -}}
856 | {{$lexpr.Label}} string,
857 | {{- end -}}
858 | {{- end -}}) bool { return {{$.Expr.Code}} }(
859 | {{- if $.Expr.Labels -}}
860 | {{range $lexpr := $.Expr.Labels -}}
861 | labels[{{$lexpr.N}}],
862 | {{- end -}}
863 | {{- end -}}
864 | ); {{if not $.Expr.Neg}}!{{end}}ok {
865 | {{if $.AcceptsPass -}}
866 | {{- $pre := $.Config.Prefix -}}
867 | perr = {{$pre}}max(perr, pos)
868 | {{else if $.FailPass -}}
869 | if pos >= errPos {
870 | failure.Kids = append(failure.Kids, &peg.Fail{
871 | Pos: int(pos),
872 | Want:
873 | {{- if $.Expr.Neg}}"!{"{{else}}"&{"{{end}}+
874 | {{- quote $.Expr.Code.String}}+"}",
875 | })
876 | }
877 | {{end -}}
878 | goto {{$.Fail}}
879 | }
880 | {{if (and $.ActionPass $.Node) -}}
881 | {{$.Node}} = ""
882 | {{end -}}
883 | `
884 |
885 | var identTemplate = `// {{$.Expr.String}}
886 | {{$pre := $.Config.Prefix -}}
887 | {{- $name := $.Expr.Name.Ident -}}
888 | {{if $.AcceptsPass -}}
889 | if !{{$pre}}accept(parser, {{$pre}}{{$name}}Accepts, &pos, &perr) {
890 | goto {{$.Fail}}
891 | }
892 | {{else if $.NodePass -}}
893 | if !{{$pre}}node(parser, {{$pre}}{{$name}}Node, node, &pos) {
894 | goto {{$.Fail}}
895 | }
896 | {{else if $.FailPass -}}
897 | if !{{$pre}}fail(parser, {{$pre}}{{$name}}Fail, errPos, failure, &pos) {
898 | goto {{$.Fail}}
899 | }
900 | {{else if $.ActionPass -}}
901 | if p, n := {{$pre}}{{$name}}Action(parser, pos); n == nil {
902 | goto {{$.Fail}}
903 | } else {
904 | {{if (and $.ActionPass $.Node) -}}
905 | {{$.Node}} = *n
906 | {{end -}}
907 | pos = p
908 | }
909 | {{end -}}
910 | `
911 |
912 | var literalTemplate = `// {{$.Expr.String}}
913 | {{$want := quote $.Expr.Text.String -}}
914 | {{- $n := len $.Expr.Text.String -}}
915 | if len(parser.text[pos:]) < {{$n}} || parser.text[pos:pos+{{$n}}] != {{$want}} {
916 | {{if $.AcceptsPass -}}
917 | {{- $pre := $.Config.Prefix -}}
918 | perr = {{$pre}}max(perr, pos)
919 | {{else if $.FailPass -}}
920 | if pos >= errPos {
921 | failure.Kids = append(failure.Kids, &peg.Fail{
922 | Pos: int(pos),
923 | Want: {{quote $.Expr.String}},
924 | })
925 | }
926 | {{end -}}
927 | goto {{$.Fail}}
928 | }
929 | {{$pre := $.Config.Prefix -}}
930 | {{if $.NodePass -}}
931 | node.Kids = append(node.Kids, {{$pre}}leaf(parser, pos, pos + {{$n}}))
932 | {{else if (and $.ActionPass $.Node) -}}
933 | {{$.Node}} = parser.text[pos:pos+{{$n}}]
934 | {{end -}}
935 | {{if eq $n 1 -}}
936 | pos++
937 | {{- else -}}
938 | pos += {{$n}}
939 | {{- end}}
940 | `
941 |
942 | var anyTemplate = `// {{$.Expr.String}}
943 | {{$pre := $.Config.Prefix -}}
944 | {{- /* \uFFFD is utf8.RuneError */ -}}
945 | if r, w := {{$pre}}next(parser, pos); w == 0 || r == '\uFFFD' {
946 | {{if $.AcceptsPass -}}
947 | {{- $pre := $.Config.Prefix -}}
948 | perr = {{$pre}}max(perr, pos)
949 | {{else if $.FailPass -}}
950 | if pos >= errPos {
951 | failure.Kids = append(failure.Kids, &peg.Fail{
952 | Pos: int(pos),
953 | Want: ".",
954 | })
955 | }
956 | {{end -}}
957 | goto {{$.Fail}}
958 | } else {
959 | {{$pre := $.Config.Prefix -}}
960 | {{if $.NodePass -}}
961 | node.Kids = append(node.Kids, {{$pre}}leaf(parser, pos, pos + w))
962 | {{else if (and $.ActionPass $.Node) -}}
963 | {{$.Node}} = parser.text[pos:pos+w]
964 | {{end -}}
965 | pos += w
966 | }
967 | `
968 |
969 | // charClassCondition emits the if-condition for a character class,
970 | // assuming that r and w are the rune and its width respectively.
971 | var charClassCondition = `
972 | {{- /* \uFFFD is utf8.RuneError */ -}}
973 | {{- if $.Expr.Neg -}}w == 0 || r == '\uFFFD' ||{{end}}
974 | {{- range $i, $span := $.Expr.Spans -}}
975 | {{- $first := index $span 0 -}}
976 | {{- $second := index $span 1 -}}
977 | {{- if $.Expr.Neg -}}
978 | {{- if gt $i 0 -}} || {{- end -}}
979 | {{- if eq $first $second -}}
980 | r == {{quoteRune $first}}
981 | {{- else -}}
982 | (r >= {{quoteRune $first}} && r <= {{quoteRune $second}})
983 | {{- end -}}
984 | {{- else -}}
985 | {{- if gt $i 0}} && {{end -}}
986 | {{- if eq $first $second -}}
987 | r != {{quoteRune $first}}
988 | {{- else -}}
989 | (r < {{quoteRune $first}} || r > {{quoteRune $second}})
990 | {{- end -}}
991 | {{- end -}}
992 | {{- end -}}
993 | `
994 |
995 | var charClassTemplate = `// {{$.Expr.String}}
996 | {{$pre := $.Config.Prefix -}}
997 | if r, w := {{$pre}}next(parser, pos);
998 | {{template "charClassCondition" $}} {
999 | {{if $.AcceptsPass -}}
1000 | {{- $pre := $.Config.Prefix -}}
1001 | perr = {{$pre}}max(perr, pos)
1002 | {{else if $.FailPass -}}
1003 | if pos >= errPos {
1004 | failure.Kids = append(failure.Kids, &peg.Fail{
1005 | Pos: int(pos),
1006 | Want: {{quote $.Expr.String}},
1007 | })
1008 | }
1009 | {{end -}}
1010 | goto {{$.Fail}}
1011 | } else {
1012 | {{$pre := $.Config.Prefix -}}
1013 | {{if $.NodePass -}}
1014 | {{$pre := $.Config.Prefix -}}
1015 | node.Kids = append(node.Kids, {{$pre}}leaf(parser, pos, pos + w))
1016 | {{else if (and $.ActionPass $.Node) -}}
1017 | {{$.Node}} = parser.text[pos:pos+w]
1018 | {{end -}}
1019 | pos += w
1020 | }
1021 | `
1022 |
--------------------------------------------------------------------------------
/go.go:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package main
8 |
9 | import (
10 | "go/ast"
11 | "go/parser"
12 | "go/printer"
13 | "go/scanner"
14 | "go/token"
15 | "strings"
16 | )
17 |
18 | // ParseGoFile parses go function body statements, returning any syntax errors.
19 | // The errors contain location information starting from the given Loc.
20 | func ParseGoFile(loc Loc, code string) error {
21 | _, err := parser.ParseFile(token.NewFileSet(), loc.File, code, 0)
22 | if err == nil {
23 | return nil
24 | }
25 |
26 | el, ok := err.(scanner.ErrorList)
27 | if !ok {
28 | return err
29 | }
30 | p := el[0].Pos
31 | loc.Line += p.Line - 1 // -1 because p.Line is 1-based.
32 | if p.Line > 1 {
33 | loc.Col = 1
34 | }
35 | loc.Col += p.Column - 1
36 | return Err(loc, el[0].Msg)
37 | }
38 |
39 | // ParseGoBody parses go function body statements, returning any syntax errors.
40 | // The errors contain location information starting from the given Loc.
41 | func ParseGoBody(loc Loc, code string) (string, error) {
42 | code = "package main; func p() interface{} {\n" + code + "}"
43 | fset := token.NewFileSet()
44 | file, err := parser.ParseFile(fset, loc.File, code, 0)
45 | if err == nil {
46 | return inferType(loc, fset, file)
47 | }
48 |
49 | el, ok := err.(scanner.ErrorList)
50 | if !ok {
51 | return "", err
52 | }
53 | p := el[0].Pos
54 | loc.Line += p.Line - 2 // -2 because p.Line is 1-based and the func line.
55 | if p.Line > 2 {
56 | loc.Col = 1
57 | }
58 | loc.Col += p.Column - 1
59 | return "", Err(loc, el[0].Msg)
60 | }
61 |
62 | // inferType infers the type of a function by considering its first return statement.
63 | // If the returned expression is:
64 | // * a type conversion, the type is returned.
65 | // * a type assertion, the type is returned.
66 | // * a function literal, the type is returned.
67 | // * a composite literal, the type is returned.
68 | // * an &-composite literal, the type is returned.
69 | // * an int literal, int is returned.
70 | // * a float literal, float64 is returned.
71 | // * a character literal, rune is returned.
72 | // * a string literal, string is returned.
73 | //
74 | // If the file does not have exactly one top-level funciton, inferType panics.
75 | // If the function has no return statement, an error is returned.
76 | // If the return statement does not have exactly one returned value, an error is returned.
77 | // If the returned value is not an expression in the list above, an error is returned.
78 | func inferType(loc Loc, fset *token.FileSet, file *ast.File) (string, error) {
79 | var funcDecl *ast.FuncDecl
80 | for _, decl := range file.Decls {
81 | if d, ok := decl.(*ast.FuncDecl); ok {
82 | if funcDecl != nil {
83 | panic("multiple function declarations")
84 | }
85 | funcDecl = d
86 | }
87 | }
88 | if funcDecl == nil {
89 | panic("no function declarations")
90 | }
91 |
92 | var v findReturnVisitor
93 | ast.Walk(&v, funcDecl)
94 | if v.retStmt == nil {
95 | return "", Err(loc, "no return statement")
96 | }
97 | if len(v.retStmt.Results) != 1 {
98 | return "", Err(loc, "must return exactly one value")
99 | }
100 |
101 | var typ interface{}
102 | switch e := v.retStmt.Results[0].(type) {
103 | case *ast.CallExpr:
104 | if len(e.Args) != 1 {
105 | var s strings.Builder
106 | printer.Fprint(&s, fset, e)
107 | return "", Err(loc, "cannot infer type from a function call: "+s.String())
108 | }
109 | typ = e.Fun
110 | case *ast.TypeAssertExpr:
111 | typ = e.Type
112 | case *ast.FuncLit:
113 | typ = e.Type
114 | case *ast.CompositeLit:
115 | typ = e.Type
116 | case *ast.BasicLit:
117 | switch e.Kind {
118 | case token.INT:
119 | return "int", nil
120 | case token.FLOAT:
121 | return "float64", nil
122 | case token.CHAR:
123 | return "rune", nil
124 | case token.STRING:
125 | return "string", nil
126 | }
127 | case *ast.UnaryExpr:
128 | lit, ok := e.X.(*ast.CompositeLit)
129 | if !ok || e.Op != token.AND {
130 | return "", Err(loc, "cannot infer type")
131 | }
132 | var s strings.Builder
133 | printer.Fprint(&s, fset, lit.Type)
134 | return "*" + s.String(), nil
135 | default:
136 | return "", Err(loc, "cannot infer type")
137 | }
138 | var s strings.Builder
139 | printer.Fprint(&s, fset, typ)
140 | return s.String(), nil
141 | }
142 |
143 | type findReturnVisitor struct {
144 | retStmt *ast.ReturnStmt
145 | }
146 |
147 | func (v *findReturnVisitor) Visit(n ast.Node) ast.Visitor {
148 | if r, ok := n.(*ast.ReturnStmt); ok {
149 | v.retStmt = r
150 | return nil
151 | }
152 | return v
153 | }
154 |
155 | // ParseGoExpr parses a go expression, returning any syntax errors.
156 | // The errors contain location information starting from the given Loc.
157 | func ParseGoExpr(loc Loc, code string) error {
158 | _, err := parser.ParseExprFrom(token.NewFileSet(), loc.File, code, 0)
159 | if err == nil {
160 | return nil
161 | }
162 |
163 | el, ok := err.(scanner.ErrorList)
164 | if !ok {
165 | return err
166 | }
167 | p := el[0].Pos
168 | loc.Line += p.Line - 1 // -1 because p.Line is 1-based.
169 | if p.Line > 1 {
170 | loc.Col = 1
171 | }
172 | loc.Col += p.Column - 1
173 | return Err(loc, el[0].Msg)
174 | }
175 |
--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/eaburns/peggy
2 |
3 | go 1.13
4 |
5 | require github.com/eaburns/pretty v1.0.0
6 |
--------------------------------------------------------------------------------
/go.sum:
--------------------------------------------------------------------------------
1 | github.com/eaburns/pretty v1.0.0 h1:00W1wrrtMXUSqLPN0txS8j7g9qFXy6nA5vZVqVQOo6w=
2 | github.com/eaburns/pretty v1.0.0/go.mod h1:retcK8A0KEgdmb0nuxhvyxixwCmEPO7SKlK0IJhjg8A=
3 |
--------------------------------------------------------------------------------
/gok.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 | # Copyright 2017 The Peggy Authors
3 | #
4 | # Use of this source code is governed by a BSD-style
5 | # license that can be found in the LICENSE file or at
6 | # https://developers.google.com/open-source/licenses/bsd.
7 |
8 | #
9 | # Verifies that go code passes go fmt, go vet, golint, and go test.
10 | #
11 |
12 | o=$(mktemp tmp.XXXXXXXXXX)
13 |
14 | fail() {
15 | echo Failed
16 | cat $o
17 | rm $o
18 | exit 1
19 | }
20 |
21 | trap fail INT TERM
22 |
23 | #echo Generating
24 | #go generate . || fail
25 |
26 | echo Formatting
27 | gofmt -l $(find . -name '*.go') > $o 2>&1
28 | test $(wc -l $o | awk '{ print $1 }') = "0" || fail
29 |
30 | echo Vetting
31 | go vet ./... > $o 2>&1 || fail
32 |
33 | echo Testing
34 | go test -test.timeout=60s ./... > $o 2>&1 || fail
35 |
36 | echo Linting
37 | golint ./... \
38 | | grep -v 'receiver name peggyrcvr should be consistent'\
39 | | grep -v 'const peggyEofCode should be peggyEOFCode'\
40 | | egrep -v 'grammar.y.*ALL_CAPS'\
41 | | egrep -v '(Begin|End|FullParenString|Type|CanFail|Walk).*should have comment or be unexported'\
42 | | egrep -v 'GenAccept should have comment or'\
43 | | egrep -v 'calc.go.*use underscores'\
44 | | egrep -v 'calc.go.*const __ should be _'\
45 | > $o 2>&1
46 | # Silly: diff the grepped golint output with empty.
47 | # If it's non-empty, error, otherwise succeed.
48 | e=$(mktemp tmp.XXXXXXXXXX)
49 | touch $e
50 | diff $o $e > /dev/null || { rm $e; fail; }
51 |
52 | rm $o $e
53 |
--------------------------------------------------------------------------------
/grammar.go:
--------------------------------------------------------------------------------
1 | //line grammar.y:8
2 | package main
3 |
4 | import __yyfmt__ "fmt"
5 |
6 | //line grammar.y:8
7 | import "io"
8 |
9 | //line grammar.y:13
10 | type peggySymType struct {
11 | yys int
12 | text text
13 | cclass *CharClass
14 | loc Loc
15 | expr Expr
16 | action *Action
17 | rule Rule
18 | rules []Rule
19 | texts []Text
20 | name Name
21 | grammar Grammar
22 | }
23 |
24 | const _ERROR = 57346
25 | const _IDENT = 57347
26 | const _STRING = 57348
27 | const _CODE = 57349
28 | const _ARROW = 57350
29 | const _CHARCLASS = 57351
30 |
31 | var peggyToknames = [...]string{
32 | "$end",
33 | "error",
34 | "$unk",
35 | "_ERROR",
36 | "_IDENT",
37 | "_STRING",
38 | "_CODE",
39 | "_ARROW",
40 | "_CHARCLASS",
41 | "'.'",
42 | "'*'",
43 | "'+'",
44 | "'?'",
45 | "':'",
46 | "'/'",
47 | "'!'",
48 | "'&'",
49 | "'('",
50 | "')'",
51 | "'^'",
52 | "'<'",
53 | "'>'",
54 | "','",
55 | "'\\n'",
56 | }
57 | var peggyStatenames = [...]string{}
58 |
59 | const peggyEofCode = 1
60 | const peggyErrCode = 2
61 | const peggyInitialStackSize = 16
62 |
63 | //line grammar.y:174
64 |
65 | // Parse parses a Peggy input file, and returns the Grammar.
66 | func Parse(in io.RuneScanner, fileName string) (*Grammar, error) {
67 | x := &lexer{
68 | in: in,
69 | file: fileName,
70 | line: 1,
71 | }
72 | peggyParse(x)
73 | if x.err != nil {
74 | return nil, x.err
75 | }
76 | return &x.result, nil
77 | }
78 |
79 | //line yacctab:1
80 | var peggyExca = [...]int{
81 | -1, 1,
82 | 1, -1,
83 | -2, 0,
84 | -1, 64,
85 | 19, 42,
86 | -2, 0,
87 | }
88 |
89 | const peggyPrivate = 57344
90 |
91 | const peggyLast = 118
92 |
93 | var peggyAct = [...]int{
94 |
95 | 2, 31, 26, 27, 60, 68, 29, 4, 14, 42,
96 | 43, 18, 48, 69, 9, 44, 22, 21, 44, 18,
97 | 25, 3, 38, 41, 56, 10, 12, 4, 13, 15,
98 | 20, 24, 11, 49, 50, 46, 10, 54, 10, 7,
99 | 17, 15, 16, 1, 55, 57, 51, 52, 53, 58,
100 | 23, 59, 62, 19, 11, 63, 8, 64, 6, 45,
101 | 66, 65, 11, 39, 61, 67, 40, 37, 35, 34,
102 | 28, 5, 0, 33, 32, 36, 30, 39, 47, 0,
103 | 40, 37, 0, 0, 0, 0, 0, 33, 32, 36,
104 | 11, 39, 0, 0, 40, 37, 0, 0, 0, 0,
105 | 0, 33, 32, 36, 30, 39, 0, 0, 40, 37,
106 | 0, 0, 0, 0, 0, 33, 32, 36,
107 | }
108 | var peggyPact = [...]int{
109 |
110 | -17, -1000, 49, -1000, -17, -1000, -17, -17, -1000, -1000,
111 | 34, -10, -1000, 27, -1000, 27, -17, 8, 26, -17,
112 | -1000, 99, -17, -13, -1000, -1000, 0, -1000, 71, -1000,
113 | -2, -1000, -17, -17, 35, -1000, -17, -1000, -1000, -1000,
114 | -1000, 99, -1000, 19, -17, -1000, -1000, -1000, -17, 57,
115 | 57, -1000, -1000, -1000, 99, 0, -1000, 99, 85, -1000,
116 | -1000, -1000, -1000, -1000, 3, -1000, -1000, -6, -1000, -1000,
117 | }
118 | var peggyPgo = [...]int{
119 |
120 | 0, 71, 2, 3, 70, 6, 1, 69, 68, 59,
121 | 4, 58, 50, 14, 39, 22, 43, 0, 21,
122 | }
123 | var peggyR1 = [...]int{
124 |
125 | 0, 16, 1, 1, 11, 14, 14, 14, 13, 13,
126 | 15, 15, 12, 12, 2, 2, 3, 3, 4, 4,
127 | 5, 5, 6, 6, 6, 7, 7, 7, 7, 8,
128 | 8, 8, 8, 8, 8, 8, 8, 10, 9, 18,
129 | 18, 17, 17,
130 | }
131 | var peggyR2 = [...]int{
132 |
133 | 0, 2, 4, 2, 1, 3, 1, 0, 4, 5,
134 | 4, 1, 1, 3, 4, 1, 2, 1, 2, 1,
135 | 4, 1, 3, 3, 1, 2, 2, 2, 1, 5,
136 | 3, 3, 1, 1, 1, 1, 4, 1, 1, 2,
137 | 1, 1, 0,
138 | }
139 | var peggyChk = [...]int{
140 |
141 | -1000, -16, -17, -18, 24, -1, -11, -14, 7, -13,
142 | -15, 5, -18, -18, -17, -18, 8, 6, 21, -14,
143 | -13, -17, 8, -12, 5, -17, -2, -3, -4, -5,
144 | 5, -6, 17, 16, -7, -8, 18, 10, -15, 6,
145 | 9, -17, 22, 23, 15, -9, -5, 7, 14, -17,
146 | -17, 11, 12, 13, -17, -2, 5, -17, -17, -6,
147 | -10, 7, -6, -10, -2, -3, -6, -17, 2, 19,
148 | }
149 | var peggyDef = [...]int{
150 |
151 | 42, -2, 7, 41, 40, 1, 0, 42, 4, 6,
152 | 0, 11, 39, 7, 3, 41, 42, 0, 0, 42,
153 | 5, 0, 42, 0, 12, 2, 8, 15, 17, 19,
154 | 11, 21, 42, 42, 24, 28, 42, 32, 33, 34,
155 | 35, 0, 10, 0, 42, 16, 18, 38, 42, 0,
156 | 0, 25, 26, 27, 0, 9, 13, 0, 0, 22,
157 | 30, 37, 23, 31, -2, 14, 20, 0, 36, 29,
158 | }
159 | var peggyTok1 = [...]int{
160 |
161 | 1, 3, 3, 3, 3, 3, 3, 3, 3, 3,
162 | 24, 3, 3, 3, 3, 3, 3, 3, 3, 3,
163 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
164 | 3, 3, 3, 16, 3, 3, 3, 3, 17, 3,
165 | 18, 19, 11, 12, 23, 3, 10, 15, 3, 3,
166 | 3, 3, 3, 3, 3, 3, 3, 3, 14, 3,
167 | 21, 3, 22, 13, 3, 3, 3, 3, 3, 3,
168 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
169 | 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
170 | 3, 3, 3, 3, 20,
171 | }
172 | var peggyTok2 = [...]int{
173 |
174 | 2, 3, 4, 5, 6, 7, 8, 9,
175 | }
176 | var peggyTok3 = [...]int{
177 | 0,
178 | }
179 |
180 | var peggyErrorMessages = [...]struct {
181 | state int
182 | token int
183 | msg string
184 | }{}
185 |
186 | //line yaccpar:1
187 |
188 | /* parser for yacc output */
189 |
190 | var (
191 | peggyDebug = 0
192 | peggyErrorVerbose = false
193 | )
194 |
195 | type peggyLexer interface {
196 | Lex(lval *peggySymType) int
197 | Error(s string)
198 | }
199 |
200 | type peggyParser interface {
201 | Parse(peggyLexer) int
202 | Lookahead() int
203 | }
204 |
205 | type peggyParserImpl struct {
206 | lval peggySymType
207 | stack [peggyInitialStackSize]peggySymType
208 | char int
209 | }
210 |
211 | func (p *peggyParserImpl) Lookahead() int {
212 | return p.char
213 | }
214 |
215 | func peggyNewParser() peggyParser {
216 | return &peggyParserImpl{}
217 | }
218 |
219 | const peggyFlag = -1000
220 |
221 | func peggyTokname(c int) string {
222 | if c >= 1 && c-1 < len(peggyToknames) {
223 | if peggyToknames[c-1] != "" {
224 | return peggyToknames[c-1]
225 | }
226 | }
227 | return __yyfmt__.Sprintf("tok-%v", c)
228 | }
229 |
230 | func peggyStatname(s int) string {
231 | if s >= 0 && s < len(peggyStatenames) {
232 | if peggyStatenames[s] != "" {
233 | return peggyStatenames[s]
234 | }
235 | }
236 | return __yyfmt__.Sprintf("state-%v", s)
237 | }
238 |
239 | func peggyErrorMessage(state, lookAhead int) string {
240 | const TOKSTART = 4
241 |
242 | if !peggyErrorVerbose {
243 | return "syntax error"
244 | }
245 |
246 | for _, e := range peggyErrorMessages {
247 | if e.state == state && e.token == lookAhead {
248 | return "syntax error: " + e.msg
249 | }
250 | }
251 |
252 | res := "syntax error: unexpected " + peggyTokname(lookAhead)
253 |
254 | // To match Bison, suggest at most four expected tokens.
255 | expected := make([]int, 0, 4)
256 |
257 | // Look for shiftable tokens.
258 | base := peggyPact[state]
259 | for tok := TOKSTART; tok-1 < len(peggyToknames); tok++ {
260 | if n := base + tok; n >= 0 && n < peggyLast && peggyChk[peggyAct[n]] == tok {
261 | if len(expected) == cap(expected) {
262 | return res
263 | }
264 | expected = append(expected, tok)
265 | }
266 | }
267 |
268 | if peggyDef[state] == -2 {
269 | i := 0
270 | for peggyExca[i] != -1 || peggyExca[i+1] != state {
271 | i += 2
272 | }
273 |
274 | // Look for tokens that we accept or reduce.
275 | for i += 2; peggyExca[i] >= 0; i += 2 {
276 | tok := peggyExca[i]
277 | if tok < TOKSTART || peggyExca[i+1] == 0 {
278 | continue
279 | }
280 | if len(expected) == cap(expected) {
281 | return res
282 | }
283 | expected = append(expected, tok)
284 | }
285 |
286 | // If the default action is to accept or reduce, give up.
287 | if peggyExca[i+1] != 0 {
288 | return res
289 | }
290 | }
291 |
292 | for i, tok := range expected {
293 | if i == 0 {
294 | res += ", expecting "
295 | } else {
296 | res += " or "
297 | }
298 | res += peggyTokname(tok)
299 | }
300 | return res
301 | }
302 |
303 | func peggylex1(lex peggyLexer, lval *peggySymType) (char, token int) {
304 | token = 0
305 | char = lex.Lex(lval)
306 | if char <= 0 {
307 | token = peggyTok1[0]
308 | goto out
309 | }
310 | if char < len(peggyTok1) {
311 | token = peggyTok1[char]
312 | goto out
313 | }
314 | if char >= peggyPrivate {
315 | if char < peggyPrivate+len(peggyTok2) {
316 | token = peggyTok2[char-peggyPrivate]
317 | goto out
318 | }
319 | }
320 | for i := 0; i < len(peggyTok3); i += 2 {
321 | token = peggyTok3[i+0]
322 | if token == char {
323 | token = peggyTok3[i+1]
324 | goto out
325 | }
326 | }
327 |
328 | out:
329 | if token == 0 {
330 | token = peggyTok2[1] /* unknown char */
331 | }
332 | if peggyDebug >= 3 {
333 | __yyfmt__.Printf("lex %s(%d)\n", peggyTokname(token), uint(char))
334 | }
335 | return char, token
336 | }
337 |
338 | func peggyParse(peggylex peggyLexer) int {
339 | return peggyNewParser().Parse(peggylex)
340 | }
341 |
342 | func (peggyrcvr *peggyParserImpl) Parse(peggylex peggyLexer) int {
343 | var peggyn int
344 | var peggyVAL peggySymType
345 | var peggyDollar []peggySymType
346 | _ = peggyDollar // silence set and not used
347 | peggyS := peggyrcvr.stack[:]
348 |
349 | Nerrs := 0 /* number of errors */
350 | Errflag := 0 /* error recovery flag */
351 | peggystate := 0
352 | peggyrcvr.char = -1
353 | peggytoken := -1 // peggyrcvr.char translated into internal numbering
354 | defer func() {
355 | // Make sure we report no lookahead when not parsing.
356 | peggystate = -1
357 | peggyrcvr.char = -1
358 | peggytoken = -1
359 | }()
360 | peggyp := -1
361 | goto peggystack
362 |
363 | ret0:
364 | return 0
365 |
366 | ret1:
367 | return 1
368 |
369 | peggystack:
370 | /* put a state and value onto the stack */
371 | if peggyDebug >= 4 {
372 | __yyfmt__.Printf("char %v in %v\n", peggyTokname(peggytoken), peggyStatname(peggystate))
373 | }
374 |
375 | peggyp++
376 | if peggyp >= len(peggyS) {
377 | nyys := make([]peggySymType, len(peggyS)*2)
378 | copy(nyys, peggyS)
379 | peggyS = nyys
380 | }
381 | peggyS[peggyp] = peggyVAL
382 | peggyS[peggyp].yys = peggystate
383 |
384 | peggynewstate:
385 | peggyn = peggyPact[peggystate]
386 | if peggyn <= peggyFlag {
387 | goto peggydefault /* simple state */
388 | }
389 | if peggyrcvr.char < 0 {
390 | peggyrcvr.char, peggytoken = peggylex1(peggylex, &peggyrcvr.lval)
391 | }
392 | peggyn += peggytoken
393 | if peggyn < 0 || peggyn >= peggyLast {
394 | goto peggydefault
395 | }
396 | peggyn = peggyAct[peggyn]
397 | if peggyChk[peggyn] == peggytoken { /* valid shift */
398 | peggyrcvr.char = -1
399 | peggytoken = -1
400 | peggyVAL = peggyrcvr.lval
401 | peggystate = peggyn
402 | if Errflag > 0 {
403 | Errflag--
404 | }
405 | goto peggystack
406 | }
407 |
408 | peggydefault:
409 | /* default state action */
410 | peggyn = peggyDef[peggystate]
411 | if peggyn == -2 {
412 | if peggyrcvr.char < 0 {
413 | peggyrcvr.char, peggytoken = peggylex1(peggylex, &peggyrcvr.lval)
414 | }
415 |
416 | /* look through exception table */
417 | xi := 0
418 | for {
419 | if peggyExca[xi+0] == -1 && peggyExca[xi+1] == peggystate {
420 | break
421 | }
422 | xi += 2
423 | }
424 | for xi += 2; ; xi += 2 {
425 | peggyn = peggyExca[xi+0]
426 | if peggyn < 0 || peggyn == peggytoken {
427 | break
428 | }
429 | }
430 | peggyn = peggyExca[xi+1]
431 | if peggyn < 0 {
432 | goto ret0
433 | }
434 | }
435 | if peggyn == 0 {
436 | /* error ... attempt to resume parsing */
437 | switch Errflag {
438 | case 0: /* brand new error */
439 | peggylex.Error(peggyErrorMessage(peggystate, peggytoken))
440 | Nerrs++
441 | if peggyDebug >= 1 {
442 | __yyfmt__.Printf("%s", peggyStatname(peggystate))
443 | __yyfmt__.Printf(" saw %s\n", peggyTokname(peggytoken))
444 | }
445 | fallthrough
446 |
447 | case 1, 2: /* incompletely recovered error ... try again */
448 | Errflag = 3
449 |
450 | /* find a state where "error" is a legal shift action */
451 | for peggyp >= 0 {
452 | peggyn = peggyPact[peggyS[peggyp].yys] + peggyErrCode
453 | if peggyn >= 0 && peggyn < peggyLast {
454 | peggystate = peggyAct[peggyn] /* simulate a shift of "error" */
455 | if peggyChk[peggystate] == peggyErrCode {
456 | goto peggystack
457 | }
458 | }
459 |
460 | /* the current p has no shift on "error", pop stack */
461 | if peggyDebug >= 2 {
462 | __yyfmt__.Printf("error recovery pops state %d\n", peggyS[peggyp].yys)
463 | }
464 | peggyp--
465 | }
466 | /* there is no state on the stack with an error shift ... abort */
467 | goto ret1
468 |
469 | case 3: /* no shift yet; clobber input char */
470 | if peggyDebug >= 2 {
471 | __yyfmt__.Printf("error recovery discards %s\n", peggyTokname(peggytoken))
472 | }
473 | if peggytoken == peggyEofCode {
474 | goto ret1
475 | }
476 | peggyrcvr.char = -1
477 | peggytoken = -1
478 | goto peggynewstate /* try again in the same state */
479 | }
480 | }
481 |
482 | /* reduction by production peggyn */
483 | if peggyDebug >= 2 {
484 | __yyfmt__.Printf("reduce %v in:\n\t%v\n", peggyn, peggyStatname(peggystate))
485 | }
486 |
487 | peggynt := peggyn
488 | peggypt := peggyp
489 | _ = peggypt // guard against "declared and not used"
490 |
491 | peggyp -= peggyR2[peggyn]
492 | // peggyp is now the index of $0. Perform the default action. Iff the
493 | // reduced production is ε, $1 is possibly out of range.
494 | if peggyp+1 >= len(peggyS) {
495 | nyys := make([]peggySymType, len(peggyS)*2)
496 | copy(nyys, peggyS)
497 | peggyS = nyys
498 | }
499 | peggyVAL = peggyS[peggyp+1]
500 |
501 | /* consult goto table to find next state */
502 | peggyn = peggyR1[peggyn]
503 | peggyg := peggyPgo[peggyn]
504 | peggyj := peggyg + peggyS[peggyp].yys + 1
505 |
506 | if peggyj >= peggyLast {
507 | peggystate = peggyAct[peggyg]
508 | } else {
509 | peggystate = peggyAct[peggyj]
510 | if peggyChk[peggystate] != -peggyn {
511 | peggystate = peggyAct[peggyg]
512 | }
513 | }
514 | // dummy call; replaced with literal code
515 | switch peggynt {
516 |
517 | case 1:
518 | peggyDollar = peggyS[peggypt-2 : peggypt+1]
519 | //line grammar.y:43
520 | {
521 | peggylex.(*lexer).result = peggyDollar[2].grammar
522 | }
523 | case 2:
524 | peggyDollar = peggyS[peggypt-4 : peggypt+1]
525 | //line grammar.y:46
526 | {
527 | peggyVAL.grammar = Grammar{Prelude: peggyDollar[1].text, Rules: peggyDollar[3].rules}
528 | }
529 | case 3:
530 | peggyDollar = peggyS[peggypt-2 : peggypt+1]
531 | //line grammar.y:47
532 | {
533 | peggyVAL.grammar = Grammar{Rules: peggyDollar[1].rules}
534 | }
535 | case 4:
536 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
537 | //line grammar.y:51
538 | {
539 | loc := peggyDollar[1].text.Begin()
540 | loc.Col++ // skip the open {.
541 | err := ParseGoFile(loc, peggyDollar[1].text.String())
542 | if err != nil {
543 | peggylex.(*lexer).err = err
544 | }
545 | peggyVAL.text = peggyDollar[1].text
546 | }
547 | case 5:
548 | peggyDollar = peggyS[peggypt-3 : peggypt+1]
549 | //line grammar.y:62
550 | {
551 | peggyVAL.rules = append(peggyDollar[1].rules, peggyDollar[3].rule)
552 | }
553 | case 6:
554 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
555 | //line grammar.y:63
556 | {
557 | peggyVAL.rules = []Rule{peggyDollar[1].rule}
558 | }
559 | case 7:
560 | peggyDollar = peggyS[peggypt-0 : peggypt+1]
561 | //line grammar.y:67
562 | {
563 | peggyVAL.rules = nil
564 | }
565 | case 8:
566 | peggyDollar = peggyS[peggypt-4 : peggypt+1]
567 | //line grammar.y:70
568 | {
569 | peggyVAL.rule = Rule{Name: peggyDollar[1].name, Expr: peggyDollar[4].expr}
570 | }
571 | case 9:
572 | peggyDollar = peggyS[peggypt-5 : peggypt+1]
573 | //line grammar.y:73
574 | {
575 | peggyVAL.rule = Rule{Name: peggyDollar[1].name, ErrorName: peggyDollar[2].text, Expr: peggyDollar[5].expr}
576 | }
577 | case 10:
578 | peggyDollar = peggyS[peggypt-4 : peggypt+1]
579 | //line grammar.y:78
580 | {
581 | peggyVAL.name = Name{Name: peggyDollar[1].text, Args: peggyDollar[3].texts}
582 | }
583 | case 11:
584 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
585 | //line grammar.y:79
586 | {
587 | peggyVAL.name = Name{Name: peggyDollar[1].text}
588 | }
589 | case 12:
590 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
591 | //line grammar.y:82
592 | {
593 | peggyVAL.texts = []Text{peggyDollar[1].text}
594 | }
595 | case 13:
596 | peggyDollar = peggyS[peggypt-3 : peggypt+1]
597 | //line grammar.y:83
598 | {
599 | peggyVAL.texts = append(peggyDollar[1].texts, peggyDollar[3].text)
600 | }
601 | case 14:
602 | peggyDollar = peggyS[peggypt-4 : peggypt+1]
603 | //line grammar.y:87
604 | {
605 | e, ok := peggyDollar[1].expr.(*Choice)
606 | if !ok {
607 | e = &Choice{Exprs: []Expr{peggyDollar[1].expr}}
608 | }
609 | e.Exprs = append(e.Exprs, peggyDollar[4].expr)
610 | peggyVAL.expr = e
611 | }
612 | case 15:
613 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
614 | //line grammar.y:95
615 | {
616 | peggyVAL.expr = peggyDollar[1].expr
617 | }
618 | case 16:
619 | peggyDollar = peggyS[peggypt-2 : peggypt+1]
620 | //line grammar.y:99
621 | {
622 | peggyDollar[2].action.Expr = peggyDollar[1].expr
623 | peggyVAL.expr = peggyDollar[2].action
624 | }
625 | case 17:
626 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
627 | //line grammar.y:103
628 | {
629 | peggyVAL.expr = peggyDollar[1].expr
630 | }
631 | case 18:
632 | peggyDollar = peggyS[peggypt-2 : peggypt+1]
633 | //line grammar.y:107
634 | {
635 | e, ok := peggyDollar[1].expr.(*Sequence)
636 | if !ok {
637 | e = &Sequence{Exprs: []Expr{peggyDollar[1].expr}}
638 | }
639 | e.Exprs = append(e.Exprs, peggyDollar[2].expr)
640 | peggyVAL.expr = e
641 | }
642 | case 19:
643 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
644 | //line grammar.y:115
645 | {
646 | peggyVAL.expr = peggyDollar[1].expr
647 | }
648 | case 20:
649 | peggyDollar = peggyS[peggypt-4 : peggypt+1]
650 | //line grammar.y:118
651 | {
652 | peggyVAL.expr = &LabelExpr{Label: peggyDollar[1].text, Expr: peggyDollar[4].expr}
653 | }
654 | case 21:
655 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
656 | //line grammar.y:119
657 | {
658 | peggyVAL.expr = peggyDollar[1].expr
659 | }
660 | case 22:
661 | peggyDollar = peggyS[peggypt-3 : peggypt+1]
662 | //line grammar.y:122
663 | {
664 | peggyVAL.expr = &PredExpr{Expr: peggyDollar[3].expr, Loc: peggyDollar[1].loc}
665 | }
666 | case 23:
667 | peggyDollar = peggyS[peggypt-3 : peggypt+1]
668 | //line grammar.y:123
669 | {
670 | peggyVAL.expr = &PredExpr{Neg: true, Expr: peggyDollar[3].expr, Loc: peggyDollar[1].loc}
671 | }
672 | case 24:
673 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
674 | //line grammar.y:124
675 | {
676 | peggyVAL.expr = peggyDollar[1].expr
677 | }
678 | case 25:
679 | peggyDollar = peggyS[peggypt-2 : peggypt+1]
680 | //line grammar.y:127
681 | {
682 | peggyVAL.expr = &RepExpr{Op: '*', Expr: peggyDollar[1].expr, Loc: peggyDollar[2].loc}
683 | }
684 | case 26:
685 | peggyDollar = peggyS[peggypt-2 : peggypt+1]
686 | //line grammar.y:128
687 | {
688 | peggyVAL.expr = &RepExpr{Op: '+', Expr: peggyDollar[1].expr, Loc: peggyDollar[2].loc}
689 | }
690 | case 27:
691 | peggyDollar = peggyS[peggypt-2 : peggypt+1]
692 | //line grammar.y:129
693 | {
694 | peggyVAL.expr = &OptExpr{Expr: peggyDollar[1].expr, Loc: peggyDollar[2].loc}
695 | }
696 | case 28:
697 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
698 | //line grammar.y:130
699 | {
700 | peggyVAL.expr = peggyDollar[1].expr
701 | }
702 | case 29:
703 | peggyDollar = peggyS[peggypt-5 : peggypt+1]
704 | //line grammar.y:133
705 | {
706 | peggyVAL.expr = &SubExpr{Expr: peggyDollar[3].expr, Open: peggyDollar[1].loc, Close: peggyDollar[5].loc}
707 | }
708 | case 30:
709 | peggyDollar = peggyS[peggypt-3 : peggypt+1]
710 | //line grammar.y:134
711 | {
712 | peggyVAL.expr = &PredCode{Code: peggyDollar[3].text, Loc: peggyDollar[1].loc}
713 | }
714 | case 31:
715 | peggyDollar = peggyS[peggypt-3 : peggypt+1]
716 | //line grammar.y:135
717 | {
718 | peggyVAL.expr = &PredCode{Neg: true, Code: peggyDollar[3].text, Loc: peggyDollar[1].loc}
719 | }
720 | case 32:
721 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
722 | //line grammar.y:136
723 | {
724 | peggyVAL.expr = &Any{Loc: peggyDollar[1].loc}
725 | }
726 | case 33:
727 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
728 | //line grammar.y:137
729 | {
730 | peggyVAL.expr = &Ident{Name: peggyDollar[1].name}
731 | }
732 | case 34:
733 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
734 | //line grammar.y:138
735 | {
736 | peggyVAL.expr = &Literal{Text: peggyDollar[1].text}
737 | }
738 | case 35:
739 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
740 | //line grammar.y:139
741 | {
742 | peggyVAL.expr = peggyDollar[1].cclass
743 | }
744 | case 36:
745 | peggyDollar = peggyS[peggypt-4 : peggypt+1]
746 | //line grammar.y:140
747 | {
748 | peggylex.Error("unexpected end of file")
749 | }
750 | case 37:
751 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
752 | //line grammar.y:144
753 | {
754 | loc := peggyDollar[1].text.Begin()
755 | loc.Col++ // skip the open {.
756 | err := ParseGoExpr(loc, peggyDollar[1].text.String())
757 | if err != nil {
758 | peggylex.(*lexer).err = err
759 | }
760 | peggyVAL.text = peggyDollar[1].text
761 | }
762 | case 38:
763 | peggyDollar = peggyS[peggypt-1 : peggypt+1]
764 | //line grammar.y:156
765 | {
766 | loc := peggyDollar[1].text.Begin()
767 | loc.Col++ // skip the open {.
768 | typ, err := ParseGoBody(loc, peggyDollar[1].text.String())
769 | if err != nil {
770 | peggylex.(*lexer).err = err
771 | }
772 | peggyVAL.action = &Action{Code: peggyDollar[1].text, ReturnType: typ}
773 | }
774 | }
775 | goto peggystack /* stack new state and value */
776 | }
777 |
--------------------------------------------------------------------------------
/grammar.y:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | %{
8 | package main
9 |
10 | import "io"
11 | %}
12 |
13 | %union{
14 | text text
15 | cclass *CharClass
16 | loc Loc
17 | expr Expr
18 | action *Action
19 | rule Rule
20 | rules []Rule
21 | texts []Text
22 | name Name
23 | grammar Grammar
24 | }
25 |
26 | %type Grammar
27 | %type Expr, ActExpr, SeqExpr, LabelExpr, PredExpr, RepExpr, Operand
28 | %type GoAction
29 | %type GoPred Prelude
30 | %type Args
31 | %type Rule
32 | %type Rules
33 | %type Name
34 |
35 | %token _ERROR
36 | %token _IDENT _STRING _CODE _ARROW
37 | %token _CHARCLASS
38 | %token '.', '*', '+', '?', ':', '/', '!', '&', '(', ')', '^', '<', '>', ','
39 |
40 | %%
41 |
42 | Top:
43 | Nl Grammar { peggylex.(*lexer).result = $2 }
44 |
45 | Grammar:
46 | Prelude NewLine Rules Nl { $$ = Grammar{ Prelude: $1, Rules: $3 } }
47 | | Rules Nl { $$ = Grammar{ Rules: $1 } }
48 |
49 | Prelude:
50 | _CODE
51 | {
52 | loc := $1.Begin()
53 | loc.Col++ // skip the open {.
54 | err := ParseGoFile(loc, $1.String())
55 | if err != nil {
56 | peggylex.(*lexer).err = err
57 | }
58 | $$ = $1
59 | }
60 |
61 | Rules:
62 | Rules NewLine Rule { $$ = append($1, $3) }
63 | | Rule { $$ = []Rule{ $1 } }
64 | // The following production adds a shift/reduce conflict:
65 | // reduce the empty string or shift into a Rule?
66 | // Yacc always prefers shift in the case of both, which is the desired behavior.
67 | | { $$ = nil }
68 |
69 | Rule:
70 | Name _ARROW Nl Expr {
71 | $$ = Rule{ Name: $1, Expr: $4 }
72 | }
73 | | Name _STRING _ARROW Nl Expr {
74 | $$ = Rule{ Name: $1, ErrorName: $2, Expr: $5 }
75 | }
76 |
77 | Name:
78 | _IDENT '<' Args '>' { $$ = Name{ Name: $1, Args: $3 } }
79 | | _IDENT { $$ = Name{ Name: $1 } }
80 |
81 | Args:
82 | _IDENT { $$ = []Text{$1} }
83 | | Args ',' _IDENT { $$ = append($1, $3) }
84 |
85 | Expr:
86 | Expr '/' Nl ActExpr
87 | {
88 | e, ok := $1.(*Choice)
89 | if !ok {
90 | e = &Choice{ Exprs: []Expr{$1} }
91 | }
92 | e.Exprs = append(e.Exprs, $4)
93 | $$ = e
94 | }
95 | | ActExpr { $$ = $1 }
96 |
97 | ActExpr:
98 | SeqExpr GoAction
99 | {
100 | $2.Expr = $1
101 | $$ = $2
102 | }
103 | | SeqExpr { $$ = $1 }
104 |
105 | SeqExpr:
106 | SeqExpr LabelExpr
107 | {
108 | e, ok := $1.(*Sequence)
109 | if !ok {
110 | e = &Sequence{ Exprs: []Expr{$1} }
111 | }
112 | e.Exprs = append(e.Exprs, $2)
113 | $$ = e
114 | }
115 | | LabelExpr { $$ = $1 }
116 |
117 | LabelExpr:
118 | _IDENT ':' Nl PredExpr { $$ = &LabelExpr{ Label: $1, Expr: $4 } }
119 | | PredExpr { $$ = $1 }
120 |
121 | PredExpr:
122 | '&' Nl PredExpr { $$ = &PredExpr{ Expr: $3, Loc: $1 } }
123 | | '!' Nl PredExpr { $$ = &PredExpr{ Neg: true, Expr: $3, Loc: $1 } }
124 | | RepExpr { $$ = $1 }
125 |
126 | RepExpr:
127 | RepExpr '*' { $$ = &RepExpr{ Op: '*', Expr: $1, Loc: $2 } }
128 | | RepExpr '+' { $$ = &RepExpr{ Op: '+', Expr: $1, Loc: $2 } }
129 | | RepExpr '?' { $$ = &OptExpr{ Expr: $1, Loc: $2 } }
130 | | Operand { $$ = $1 }
131 |
132 | Operand:
133 | '(' Nl Expr Nl ')' { $$ = &SubExpr{ Expr: $3, Open: $1, Close: $5 } }
134 | | '&' Nl GoPred { $$ = &PredCode{ Code: $3, Loc: $1 } }
135 | | '!' Nl GoPred { $$ = &PredCode{ Neg: true, Code: $3, Loc: $1 } }
136 | | '.' { $$ = &Any{ Loc: $1 } }
137 | | Name { $$ = &Ident{ Name: $1 } }
138 | | _STRING { $$ = &Literal{ Text: $1 } }
139 | | _CHARCLASS { $$ =$1 }
140 | | '(' Nl Expr error { peggylex.Error("unexpected end of file") }
141 |
142 | GoPred:
143 | _CODE
144 | {
145 | loc := $1.Begin()
146 | loc.Col++ // skip the open {.
147 | err := ParseGoExpr(loc, $1.String())
148 | if err != nil {
149 | peggylex.(*lexer).err = err
150 | }
151 | $$ = $1
152 | }
153 |
154 | GoAction:
155 | _CODE
156 | {
157 | loc := $1.Begin()
158 | loc.Col++ // skip the open {.
159 | typ, err := ParseGoBody(loc, $1.String())
160 | if err != nil {
161 | peggylex.(*lexer).err = err
162 | }
163 | $$ = &Action{ Code: $1, ReturnType: typ }
164 | }
165 |
166 | NewLine:
167 | '\n' NewLine
168 | | '\n'
169 |
170 | Nl:
171 | NewLine
172 | |
173 |
174 | %%
175 |
176 | // Parse parses a Peggy input file, and returns the Grammar.
177 | func Parse(in io.RuneScanner, fileName string) (*Grammar, error) {
178 | x := &lexer{
179 | in: in,
180 | file: fileName,
181 | line: 1,
182 | }
183 | peggyParse(x)
184 | if x.err != nil {
185 | return nil, x.err
186 | }
187 | return &x.result, nil
188 | }
189 |
--------------------------------------------------------------------------------
/lex.go:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package main
8 |
9 | import (
10 | "errors"
11 | "fmt"
12 | "io"
13 | "unicode"
14 | )
15 |
16 | const eof = -1
17 |
18 | type text struct {
19 | str string
20 | begin, end Loc
21 | }
22 |
23 | func (t text) PrettyPrint() string {
24 | return fmt.Sprintf(`Text{%d:%d-%d:%d: "%s"}`,
25 | t.begin.Line, t.begin.Col,
26 | t.end.Line, t.end.Col,
27 | t.str)
28 | }
29 |
30 | func (t text) String() string { return t.str }
31 | func (t text) Begin() Loc { return t.begin }
32 | func (t text) End() Loc { return t.end }
33 |
34 | type lexer struct {
35 | in io.RuneScanner
36 | file string
37 | n, line, lineStart, prevLineStart int
38 | eof bool
39 |
40 | // prevBegin is the beginning of the most-recently scanned token.
41 | // prevEnd is the end of the most-recently scanned token.
42 | // These are used for error reporting.
43 | prevBegin, prevEnd Loc
44 |
45 | // err is non-nil if there was an error during parsing.
46 | err error
47 | // result contains the Grammar resulting from a successful parse.
48 | result Grammar
49 | }
50 |
51 | // Begin returns the begin location of the last returned token.
52 | func (x *lexer) Begin() Loc { return x.prevBegin }
53 |
54 | // End returns the end location of the last returned token.
55 | func (x *lexer) End() Loc { return x.prevEnd }
56 |
57 | func (x *lexer) loc() Loc {
58 | return Loc{
59 | File: x.file,
60 | Line: x.line,
61 | Col: x.n - x.lineStart + 1,
62 | }
63 | }
64 |
65 | func (x *lexer) next() (rune, error) {
66 | if x.eof {
67 | return eof, nil
68 | }
69 | r, _, err := x.in.ReadRune()
70 | if err == io.EOF {
71 | x.eof = true
72 | return eof, nil
73 | }
74 | x.n++
75 | if r == '\n' {
76 | x.prevLineStart = x.lineStart
77 | x.lineStart = x.n
78 | x.line++
79 | }
80 | return r, err
81 | }
82 |
83 | func (x *lexer) back() error {
84 | if x.eof {
85 | return nil
86 | }
87 | if x.lineStart == x.n {
88 | x.lineStart = x.prevLineStart
89 | x.line--
90 | }
91 | x.n--
92 | return x.in.UnreadRune()
93 | }
94 |
95 | func (x *lexer) Error(s string) {
96 | if x.err != nil {
97 | return
98 | }
99 | x.err = Err(x, s)
100 | }
101 |
102 | func (x *lexer) Lex(lval *peggySymType) (v int) {
103 | defer func() { x.prevEnd = x.loc() }()
104 | for {
105 | x.prevBegin = x.loc()
106 | lval.text.begin = x.loc()
107 | lval.loc = x.loc()
108 | r, err := x.next()
109 |
110 | switch {
111 | case err != nil:
112 | break
113 |
114 | case r == '#':
115 | if err = comment(x); err != nil {
116 | break
117 | }
118 | return '\n'
119 |
120 | case unicode.IsLetter(r) || r == '_':
121 | if lval.text.str, err = ident(x); err != nil {
122 | break
123 | }
124 | lval.text.str = string([]rune{r}) + lval.text.str
125 | lval.text.end = x.loc()
126 | return _IDENT
127 |
128 | case r == '<':
129 | b := x.loc()
130 | if r, err = x.next(); err != nil {
131 | break
132 | }
133 | lval.text.str = string([]rune{'<', r})
134 | lval.text.end = x.loc()
135 | if r != '-' {
136 | x.back()
137 | x.prevBegin = b
138 | return int('<')
139 | }
140 | return _ARROW
141 |
142 | case r == '{':
143 | if lval.text.str, err = code(x); err != nil {
144 | break
145 | }
146 | lval.text.end = x.loc()
147 | return _CODE
148 |
149 | case r == '[':
150 | if err = x.back(); err != nil {
151 | break
152 | }
153 | if lval.cclass, err = charClass(x); err != nil {
154 | x.err = err
155 | return _ERROR
156 | }
157 | return _CHARCLASS
158 |
159 | case r == '\'' || r == '"':
160 | if lval.text.str, err = delimited(x, r); err != nil {
161 | break
162 | }
163 | lval.text.end = x.loc()
164 | return _STRING
165 |
166 | case unicode.IsSpace(r) && r != '\n':
167 | continue
168 |
169 | default:
170 | return int(r)
171 | }
172 | x.prevEnd = x.loc()
173 | x.Error(err.Error())
174 | return _ERROR
175 | }
176 | }
177 |
178 | func delimited(x *lexer, d rune) (string, error) {
179 | var rs []rune
180 | for {
181 | r, esc, err := x.nextUnesc(d)
182 | switch {
183 | case err != nil:
184 | return "", err
185 | case r == eof:
186 | return "", errors.New("unclosed " + string([]rune{d}))
187 | case r == d && !esc:
188 | return string(rs), nil
189 | }
190 | rs = append(rs, r)
191 | }
192 | }
193 |
194 | func ident(x *lexer) (string, error) {
195 | var rs []rune
196 | for {
197 | r, err := x.next()
198 | if err != nil {
199 | return "", err
200 | }
201 | if !isIdentRune(r) {
202 | return string(rs), x.back()
203 | }
204 | rs = append(rs, r)
205 | }
206 | }
207 |
208 | func isIdentRune(r rune) bool {
209 | return unicode.IsLetter(r) || unicode.IsNumber(r) || r == '_'
210 | }
211 |
212 | func code(x *lexer) (string, error) {
213 | var rs []rune
214 | var n int
215 | for {
216 | r, err := x.next()
217 | if err != nil {
218 | return "", err
219 | }
220 | if r == eof {
221 | return "", errors.New("unclosed {")
222 | }
223 | if r == '{' {
224 | n++
225 | }
226 | if r == '}' {
227 | if n == 0 {
228 | break
229 | }
230 | n--
231 | }
232 | rs = append(rs, r)
233 | }
234 | return string(rs), nil
235 | }
236 |
237 | func comment(x *lexer) error {
238 | for {
239 | r, err := x.next()
240 | if err != nil {
241 | return err
242 | }
243 | if r == '\n' || r == eof {
244 | return nil
245 | }
246 | }
247 | }
248 |
249 | func charClass(x *lexer) (*CharClass, error) {
250 | c := &CharClass{Open: x.loc()}
251 | if r, err := x.next(); err != nil {
252 | return nil, Err(c.Open, err.Error())
253 | } else if r != '[' {
254 | panic("impossible, no [")
255 | }
256 |
257 | var prev rune
258 | var hasPrev, span bool
259 |
260 | // last is the Loc just before last read rune.
261 | var last Loc
262 |
263 | // spanLoc is the location of the current span.
264 | // (We use type text to borrow that it implements Located.
265 | // However we ignore the str field.)
266 | var spanLoc text
267 | loop:
268 | for {
269 | last = x.loc()
270 | if !span && !hasPrev {
271 | spanLoc.begin = x.loc()
272 | }
273 | r, esc, err := x.nextUnesc(']')
274 | switch {
275 | case err != nil:
276 | return nil, err
277 |
278 | case r == eof:
279 | c.Close = x.loc()
280 | return nil, Err(c, "unclosed [")
281 |
282 | case r == ']' && !esc:
283 | c.Close = x.loc()
284 | break loop
285 |
286 | case span:
287 | spanLoc.end = x.loc()
288 | if !hasPrev {
289 | return nil, Err(spanLoc, "bad span")
290 | }
291 | if prev >= r {
292 | return nil, Err(spanLoc, "bad span")
293 | }
294 | c.Spans = append(c.Spans, [2]rune{prev, r})
295 | hasPrev, span = false, false
296 | spanLoc.begin = spanLoc.end
297 |
298 | case r == '-' && !esc:
299 | span = true
300 |
301 | default:
302 | if r == '^' && !esc && !c.Neg && len(c.Spans) == 0 && !hasPrev {
303 | c.Neg = true
304 | continue
305 | }
306 | if hasPrev {
307 | c.Spans = append(c.Spans, [2]rune{prev, prev})
308 | spanLoc.begin = last // in case current rune starts a span.
309 | }
310 | prev, hasPrev = r, true
311 | }
312 | }
313 | if span {
314 | spanLoc.end = last // just before closing ]
315 | return nil, Err(spanLoc, "bad span")
316 | }
317 | if hasPrev {
318 | c.Spans = append(c.Spans, [2]rune{prev, prev})
319 | }
320 | if len(c.Spans) == 0 {
321 | return nil, Err(c, "bad char class: empty")
322 | }
323 | return c, nil
324 | }
325 |
326 | var errUnknownEsc = errors.New("unknown escape sequence")
327 |
328 | // Like next, but unescapes an escapes a rune according to Go's unescaping rules.
329 | // The second return value is whether the rune was escaped.
330 | func (x *lexer) nextUnesc(delim rune) (rune, bool, error) {
331 | switch r, err := x.next(); {
332 | case err != nil:
333 | return 0, false, err
334 | case r == delim:
335 | return r, false, nil
336 | case r == '\\':
337 | r, err = x.next()
338 | if err != nil {
339 | return 0, true, err
340 | }
341 | switch r {
342 | case eof:
343 | return eof, true, nil
344 | case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\':
345 | switch r {
346 | case 'a':
347 | r = '\a'
348 | case 'b':
349 | r = '\b'
350 | case 'f':
351 | r = '\f'
352 | case 'n':
353 | r = '\n'
354 | case 'r':
355 | r = '\r'
356 | case 't':
357 | r = '\t'
358 | case 'v':
359 | r = '\v'
360 | case '\\':
361 | r = '\\'
362 | }
363 | return r, true, nil
364 | case '0', '1', '2', '3', '4', '5', '6', '7':
365 | v, _ := oct(r)
366 | for i := 1; i < 3; i++ {
367 | r, err := x.next()
368 | if err != nil {
369 | return 0, false, err
370 | }
371 | d, ok := oct(r)
372 | if !ok {
373 | return 0, false, errUnknownEsc
374 | }
375 | v = (v << 3) | d
376 | }
377 | if v > 255 {
378 | return 0, false, errors.New("octal escape >255")
379 | }
380 | return v, true, nil
381 | case 'x', 'u', 'U':
382 | var n int
383 | switch r {
384 | case 'x':
385 | n = 2
386 | case 'u':
387 | n = 4
388 | case 'U':
389 | n = 8
390 | }
391 | var v int32
392 | for i := 0; i < n; i++ {
393 | r, err := x.next()
394 | if err != nil {
395 | return 0, false, err
396 | }
397 | d, ok := hex(r)
398 | if !ok {
399 | return 0, false, errUnknownEsc
400 | }
401 | v = (v << 4) | d
402 | }
403 | // TODO: surrogate halves are also illegal — whatever that is.
404 | if v > 0x10FFFF {
405 | return 0, false, errors.New("hex escape >0x10FFFF")
406 | }
407 | return v, true, nil
408 | default:
409 | if r == delim {
410 | return r, true, nil
411 | }
412 | // For character classes, allow \- as - and \^ as ^.
413 | if delim == ']' && (r == '-' || r == '^') {
414 | return r, true, nil
415 | }
416 | return 0, false, errUnknownEsc
417 | }
418 | default:
419 | return r, false, nil
420 | }
421 | }
422 |
423 | func oct(r rune) (int32, bool) {
424 | if '0' <= r && r <= '7' {
425 | return int32(r) - '0', true
426 | }
427 | return 0, false
428 | }
429 |
430 | func hex(r rune) (int32, bool) {
431 | if '0' <= r && r <= '9' {
432 | return int32(r) - '0', true
433 | }
434 | if 'a' <= r && r <= 'f' {
435 | return int32(r) - 'a' + 10, true
436 | }
437 | if 'A' <= r && r <= 'F' {
438 | return int32(r) - 'A' + 10, true
439 | }
440 | return 0, false
441 | }
442 |
--------------------------------------------------------------------------------
/main.go:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package main
8 |
9 | import (
10 | "bufio"
11 | "flag"
12 | "fmt"
13 | "io"
14 | "os"
15 | )
16 |
17 | //go:generate goyacc -o grammar.go -p "peggy" grammar.y
18 |
19 | var (
20 | out = flag.String("o", "", "output file path")
21 | prefix = flag.String("p", "_", "identifier prefix")
22 | genActions = flag.Bool("a", true, "generate action parsing")
23 | genParseTree = flag.Bool("t", true, "generate parse tree parsing")
24 | prettyPrint = flag.Bool("pretty", false, "don't check or generate, write the grammar without labels or actions")
25 | )
26 |
27 | func main() {
28 | flag.Parse()
29 | args := flag.Args()
30 |
31 | in := bufio.NewReader(os.Stdin)
32 | file := ""
33 | if len(args) > 0 {
34 | f, err := os.Open(args[0])
35 | if err != nil {
36 | fmt.Println(err)
37 | os.Exit(1)
38 | }
39 | in = bufio.NewReader(f)
40 | file = args[0]
41 | }
42 |
43 | g, err := Parse(in, file)
44 | if err != nil {
45 | fmt.Println(err)
46 | os.Exit(1)
47 | }
48 |
49 | var w io.Writer = os.Stdout
50 | if *out != "" {
51 | f, err := os.Create(*out)
52 | if err != nil {
53 | fmt.Println(err)
54 | os.Exit(1)
55 | }
56 | defer func() {
57 | if err := f.Close(); err != nil {
58 | fmt.Println(err)
59 | }
60 | }()
61 | w = f
62 | }
63 | if *prettyPrint {
64 | for i := range g.Rules {
65 | r := &g.Rules[i]
66 | if _, err := io.WriteString(w, r.String()+"\n"); err != nil {
67 | fmt.Println(err)
68 | os.Exit(1)
69 | }
70 | }
71 | os.Exit(0)
72 | }
73 | if err := Check(g); err != nil {
74 | fmt.Println(err)
75 | os.Exit(1)
76 | }
77 |
78 | cfg := Config{Prefix: *prefix}
79 | if err := cfg.Generate(w, file, g); err != nil {
80 | fmt.Println(err)
81 | os.Exit(1)
82 | }
83 | }
84 |
--------------------------------------------------------------------------------
/parse_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package main
8 |
9 | import (
10 | "errors"
11 | "io"
12 | "regexp"
13 | "strings"
14 | "testing"
15 |
16 | "github.com/eaburns/pretty"
17 | )
18 |
19 | // A ParserTest is a Peggy input-file parser test
20 | // with a given input and expected string formats.
21 | type ParserTest struct {
22 | Name string
23 | Input string
24 | // FullString is the expected fully parenthesized string.
25 | FullString string
26 | // String is the expected regular String string.
27 | // This is the same as Input, but without
28 | // comments and unnecessary whitespace,
29 | // except for a single space, " ",
30 | // separating sub-exprsessions of a sequence,
31 | // and on either side of <-.
32 | String string
33 | // Prelude is the expected file prelude text.
34 | Prelude string
35 | // Error is a regexp string that matches an expected parse error.
36 | Error string
37 | }
38 |
39 | // ParseTests is a set of tests matching
40 | // FullString and String outputs with expected outputs for successful parses,
41 | // and expected parse errors for failed parses.
42 | // If Input contains a ☹ rune, the io.RuneScanner returns an error on that rune.
43 | var ParseTests = []ParserTest{
44 | {
45 | Name: "empty",
46 | Input: "",
47 | FullString: "",
48 | String: "",
49 | },
50 | {
51 | Name: "only whitespace",
52 | Input: " \n\n\t ",
53 | FullString: "",
54 | String: "",
55 | },
56 | {
57 | Name: "simple rule",
58 | Input: "A <- B",
59 | FullString: "A <- (B)",
60 | String: "A <- B",
61 | },
62 | {
63 | Name: "named rule",
64 | Input: `A "name" <- B`,
65 | FullString: `A "name" <- (B)`,
66 | String: `A "name" <- B`,
67 | },
68 | {
69 | Name: "named rule, single quotes",
70 | Input: `A 'name' <- B`,
71 | FullString: `A "name" <- (B)`,
72 | String: `A "name" <- B`,
73 | },
74 | {
75 | Name: "named rule, empty name",
76 | Input: `A "" <- B`,
77 | FullString: `A "" <- (B)`,
78 | String: `A "" <- B`,
79 | },
80 | {
81 | Name: "named rule, escapes",
82 | Input: `A "\t\nabc" <- B`,
83 | FullString: `A "\t\nabc" <- (B)`,
84 | String: `A "\t\nabc" <- B`,
85 | },
86 | {
87 | Name: "prelude and simple rule",
88 | Input: `{
89 | package main
90 |
91 | import "fmt"
92 |
93 | func main() { fmt.Println("Hello, World") }
94 | }
95 | A <- B`,
96 | FullString: "A <- (B)",
97 | String: "A <- B",
98 | Prelude: `
99 | package main
100 |
101 | import "fmt"
102 |
103 | func main() { fmt.Println("Hello, World") }
104 | `,
105 | },
106 | {
107 | Name: "multiple simple rules",
108 | Input: "A <- B\nC <- D",
109 | FullString: "A <- (B)\nC <- (D)",
110 | String: "A <- B\nC <- D",
111 | },
112 | {
113 | Name: "multiple simple rules",
114 | Input: "A <- B\nC <- D",
115 | FullString: "A <- (B)\nC <- (D)",
116 | String: "A <- B\nC <- D",
117 | },
118 | {
119 | Name: "whitespace",
120 | Input: "\tA <- B\n \n\n C <- D\t ",
121 | FullString: "A <- (B)\nC <- (D)",
122 | String: "A <- B\nC <- D",
123 | },
124 | {
125 | Name: "comments",
126 | Input: "# comment\nA <- B # comment\n# comment",
127 | FullString: "A <- (B)",
128 | String: "A <- B",
129 | },
130 |
131 | // Operands.
132 | {
133 | Name: "& pred code",
134 | Input: "A <- &{pred}",
135 | FullString: "A <- (&{pred})",
136 | String: "A <- &{…}",
137 | },
138 | {
139 | Name: "! pred code",
140 | Input: "A <- !{pred}",
141 | FullString: "A <- (!{pred})",
142 | String: "A <- !{…}",
143 | },
144 | {
145 | Name: "any",
146 | Input: "A <- .",
147 | FullString: "A <- (.)",
148 | String: "A <- .",
149 | },
150 | {
151 | Name: "identifier",
152 | Input: "A <- BCD",
153 | FullString: "A <- (BCD)",
154 | String: "A <- BCD",
155 | },
156 | {
157 | Name: "non-ASCII identifier",
158 | Input: "Â <- _αβξ",
159 | FullString: "Â <- (_αβξ)",
160 | String: "Â <- _αβξ",
161 | },
162 | {
163 | Name: "double-quote string",
164 | Input: `A <- "BCD☺"`,
165 | FullString: `A <- ("BCD☺")`,
166 | String: `A <- "BCD☺"`,
167 | },
168 | {
169 | Name: "single-quote string",
170 | Input: `A <- 'BCD☺'`,
171 | FullString: `A <- ("BCD☺")`,
172 | String: `A <- "BCD☺"`,
173 | },
174 | {
175 | Name: "character class",
176 | Input: `A <- [abc\nxyzαβξ1-9A-Z\-]`,
177 | FullString: `A <- ([abc\nxyzαβξ1-9A-Z\-])`,
178 | String: `A <- [abc\nxyzαβξ1-9A-Z\-]`,
179 | },
180 | {
181 | Name: "^ character class",
182 | Input: `A <- [^^abc\nxyzαβξ]`,
183 | FullString: `A <- ([^\^abc\nxyzαβξ])`,
184 | String: `A <- [^\^abc\nxyzαβξ]`,
185 | },
186 | {
187 | Name: "character class, delimiters",
188 | Input: `A <- [[\]]`,
189 | FullString: `A <- ([[\]])`,
190 | String: `A <- [[\]]`,
191 | },
192 | {
193 | // ^ should only negate the class if it's at the beginning
194 | Name: "character class, non-first^",
195 | Input: `A <- [abc^]`,
196 | FullString: `A <- ([abc\^])`,
197 | String: `A <- [abc\^]`,
198 | },
199 | {
200 | Name: "character class, escaping",
201 | Input: `A <- [\a] [\b] [\f] [\n] [\r] [\t] [\v] [\\] [\-] [\]] [\101] [\x41] [\u0041] [\U00000041] [\aa\b] [a\ab] [\^]`,
202 | FullString: `A <- ((((((((((((((((([\a]) ([\b])) ([\f])) ([\n])) ([\r])) ([\t])) ([\v])) ([\\])) ([\-])) ([\]])) ([A])) ([A])) ([A])) ([A])) ([\aa\b])) ([a\ab])) ([\^]))`,
203 | String: `A <- [\a] [\b] [\f] [\n] [\r] [\t] [\v] [\\] [\-] [\]] [A] [A] [A] [A] [\aa\b] [a\ab] [\^]`,
204 | },
205 |
206 | // Associativity.
207 | {
208 | Name: "choice associativity",
209 | Input: "A <- B/C/D",
210 | FullString: "A <- (((B)/(C))/(D))",
211 | String: "A <- B/C/D",
212 | },
213 | {
214 | Name: "sequence associativity",
215 | Input: "A <- B C D",
216 | FullString: "A <- (((B) (C)) (D))",
217 | String: "A <- B C D",
218 | },
219 |
220 | // Precedence.
221 | {
222 | Name: "various precedences",
223 | Input: "A <- x:B*+ C?/(!D y:&E)* {return 0}/F !{p}",
224 | FullString: "A <- ((((x:(((B)*)+)) ((C)?))/((((!(D)) (y:(&(E))))*) {return 0}))/((F) (!{p})))",
225 | String: "A <- x:B*+ C?/(!D y:&E)* {…}/F !{…}",
226 | },
227 | {
228 | Name: "action < choice",
229 | Input: "A <- B { return 0 }/C { return 0 }",
230 | FullString: "A <- (((B) { return 0 })/((C) { return 0 }))",
231 | String: "A <- B {…}/C {…}",
232 | },
233 | {
234 | Name: "sequence < action",
235 | Input: "A <- B C { return 0 }",
236 | FullString: "A <- (((B) (C)) { return 0 })",
237 | String: "A <- B C {…}",
238 | },
239 | {
240 | Name: "label < sequence",
241 | Input: "A <- s:A t:B",
242 | FullString: "A <- ((s:(A)) (t:(B)))",
243 | String: "A <- s:A t:B",
244 | },
245 | {
246 | Name: "pred < label",
247 | Input: "A <- s:!A t:&B",
248 | FullString: "A <- ((s:(!(A))) (t:(&(B))))",
249 | String: "A <- s:!A t:&B",
250 | },
251 | {
252 | Name: "rep < pred",
253 | Input: "A <- !A* &B+ !C?",
254 | FullString: "A <- (((!((A)*)) (&((B)+))) (!((C)?)))",
255 | String: "A <- !A* &B+ !C?",
256 | },
257 | {
258 | Name: "operand < rep",
259 | Input: `A <- (a/b c)*
260 | B <- &{pred}*
261 | C <- !{pred}*
262 | D <- .*
263 | E <- Z*
264 | F <- "cde"*
265 | G <- [fgh]*`,
266 | FullString: `A <- (((a)/((b) (c)))*)
267 | B <- ((&{pred})*)
268 | C <- ((!{pred})*)
269 | D <- ((.)*)
270 | E <- ((Z)*)
271 | F <- (("cde")*)
272 | G <- (([fgh])*)`,
273 | String: `A <- (a/b c)*
274 | B <- &{…}*
275 | C <- !{…}*
276 | D <- .*
277 | E <- Z*
278 | F <- "cde"*
279 | G <- [fgh]*`,
280 | },
281 |
282 | // Templates
283 | {
284 | Name: "1-ary template rule",
285 | Input: `A <- x`,
286 | FullString: `A <- (x)`,
287 | String: `A <- x`,
288 | },
289 | {
290 | Name: "3-ary template rule",
291 | Input: `A <- x y z`,
292 | FullString: `A <- (((x) (y)) (z))`,
293 | String: `A <- x y z`,
294 | },
295 | {
296 | Name: "1-ary template invocation",
297 | Input: `A <- B C`,
298 | FullString: `A <- ((B) (C))`,
299 | String: `A <- B C`,
300 | },
301 | {
302 | Name: "3-ary template invocation",
303 | Input: `A <- B C`,
304 | FullString: `A <- ((B) (C))`,
305 | String: `A <- B C`,
306 | },
307 |
308 | // Rune escaping
309 | {
310 | Name: `escape \a`,
311 | Input: `A <- "\a"`,
312 | FullString: `A <- ("\a")`,
313 | String: `A <- "\a"`,
314 | },
315 | {
316 | Name: `escape \b`,
317 | Input: `A <- "\b"`,
318 | FullString: `A <- ("\b")`,
319 | String: `A <- "\b"`,
320 | },
321 | {
322 | Name: `escape \f`,
323 | Input: `A <- "\f"`,
324 | FullString: `A <- ("\f")`,
325 | String: `A <- "\f"`,
326 | },
327 | {
328 | Name: `escape \n`,
329 | Input: `A <- "\n"`,
330 | FullString: `A <- ("\n")`,
331 | String: `A <- "\n"`,
332 | },
333 | {
334 | Name: `escape \r`,
335 | Input: `A <- "\r"`,
336 | FullString: `A <- ("\r")`,
337 | String: `A <- "\r"`,
338 | },
339 | {
340 | Name: `escape \t`,
341 | Input: `A <- "\t"`,
342 | FullString: `A <- ("\t")`,
343 | String: `A <- "\t"`,
344 | },
345 | {
346 | Name: `escape \v`,
347 | Input: `A <- "\v"`,
348 | FullString: `A <- ("\v")`,
349 | String: `A <- "\v"`,
350 | },
351 | {
352 | Name: `escape \\`,
353 | Input: `A <- "\\"`,
354 | FullString: `A <- ("\\")`,
355 | String: `A <- "\\"`,
356 | },
357 | {
358 | Name: `escape \"`,
359 | Input: `A <- "\""`,
360 | FullString: `A <- ("\"")`,
361 | String: `A <- "\""`,
362 | },
363 | {
364 | Name: `escape \'`,
365 | Input: `A <- '\''`,
366 | FullString: `A <- ("'")`,
367 | String: `A <- "'"`,
368 | },
369 | {
370 | Name: `escape \000`,
371 | Input: `A <- "\000"`,
372 | FullString: `A <- ("\x00")`,
373 | String: `A <- "\x00"`,
374 | },
375 | {
376 | Name: `escape \101 (A)`,
377 | Input: `A <- "\101"`,
378 | FullString: `A <- ("A")`,
379 | String: `A <- "A"`,
380 | },
381 | {
382 | Name: `escape \101BCD`,
383 | Input: `A <- "\101BCD"`,
384 | FullString: `A <- ("ABCD")`,
385 | String: `A <- "ABCD"`,
386 | },
387 | {
388 | Name: `escape \377 (255)`,
389 | Input: `A <- "\377"`,
390 | FullString: `A <- ("ÿ")`, // \xFF
391 | String: `A <- "ÿ"`,
392 | },
393 | {
394 | Name: `escape \400 (256)`,
395 | Input: `A <- "\400"`,
396 | Error: "^test.file:1.6,1.11:.*>255",
397 | },
398 | {
399 | Name: `escape \400 (256)`,
400 | Input: `A <- "xyz\400"`,
401 | // TODO: report the correct error location.
402 | Error: "^test.file:1.6,1.14:.*>255",
403 | },
404 | {
405 | Name: `escape \4`,
406 | Input: `A <- "\4"`,
407 | Error: "^test.file:1.6,1.10: unknown escape sequence",
408 | },
409 | {
410 | Name: `escape \40`,
411 | Input: `A <- "\40"`,
412 | Error: "^test.file:1.6,1.11: unknown escape sequence",
413 | },
414 | {
415 | Name: `escape \x00`,
416 | Input: `A <- "\x00"`,
417 | FullString: `A <- ("\x00")`,
418 | String: `A <- "\x00"`,
419 | },
420 | {
421 | Name: `escape \x41 (A)`,
422 | Input: `A <- "\x41"`,
423 | FullString: `A <- ("A")`,
424 | String: `A <- "A"`,
425 | },
426 | {
427 | Name: `escape \x41BCD`,
428 | Input: `A <- "\x41BCD"`,
429 | FullString: `A <- ("ABCD")`,
430 | String: `A <- "ABCD"`,
431 | },
432 | {
433 | Name: `escape \xFF`,
434 | Input: `A <- "\xFF"`,
435 | FullString: `A <- ("ÿ")`, // \xFF
436 | String: `A <- "ÿ"`,
437 | },
438 | {
439 | Name: `escape \xF`,
440 | Input: `A <- "\xF"`,
441 | Error: "^test.file:1.6,1.11: unknown escape sequence",
442 | },
443 | {
444 | Name: `escape \u0000`,
445 | Input: `A <- "\u0000"`,
446 | FullString: `A <- ("\x00")`,
447 | String: `A <- "\x00"`,
448 | },
449 | {
450 | Name: `escape \u0041 (A)`,
451 | Input: `A <- "\u0041"`,
452 | FullString: `A <- ("A")`,
453 | String: `A <- "A"`,
454 | },
455 | {
456 | Name: `escape \u0041BCD`,
457 | Input: `A <- "\u0041BCD"`,
458 | FullString: `A <- ("ABCD")`,
459 | String: `A <- "ABCD"`,
460 | },
461 | {
462 | Name: `escape \u263A (☺)`,
463 | Input: `A <- "\u263A"`,
464 | FullString: `A <- ("☺")`,
465 | String: `A <- "☺"`,
466 | },
467 | {
468 | Name: `escape \u263a (☺)`,
469 | Input: `A <- "\u263a"`,
470 | FullString: `A <- ("☺")`,
471 | String: `A <- "☺"`,
472 | },
473 | {
474 | Name: `escape \uF`,
475 | Input: `A <- "\xF"`,
476 | Error: "^test.file:1.6,1.11: unknown escape sequence",
477 | },
478 | {
479 | Name: `escape \uFF`,
480 | Input: `A <- "\uFF"`,
481 | Error: "^test.file:1.6,1.12: unknown escape sequence",
482 | },
483 | {
484 | Name: `escape \uFFF`,
485 | Input: `A <- "\uFFF"`,
486 | Error: "^test.file:1.6,1.13: unknown escape sequence",
487 | },
488 | {
489 | Name: `escape \U00000000`,
490 | Input: `A <- "\U00000000"`,
491 | FullString: `A <- ("\x00")`,
492 | String: `A <- "\x00"`,
493 | },
494 | {
495 | Name: `escape \U00000041 (A)`,
496 | Input: `A <- "\U00000041"`,
497 | FullString: `A <- ("A")`,
498 | String: `A <- "A"`,
499 | },
500 | {
501 | Name: `escape \U00000041BCD`,
502 | Input: `A <- "\U00000041BCD"`,
503 | FullString: `A <- ("ABCD")`,
504 | String: `A <- "ABCD"`,
505 | },
506 | {
507 | Name: `escape \U0000263A (☺)`,
508 | Input: `A <- "\U0000263A"`,
509 | FullString: `A <- ("☺")`,
510 | String: `A <- "☺"`,
511 | },
512 | {
513 | Name: `escape \U0000263a (☺)`,
514 | Input: `A <- "\U0000263a"`,
515 | FullString: `A <- ("☺")`,
516 | String: `A <- "☺"`,
517 | },
518 | {
519 | Name: `escape \U0010FFFF`,
520 | Input: `A <- "\U0010FFFF"`,
521 | FullString: `A <- ("\U0010ffff")`,
522 | String: `A <- "\U0010ffff"`,
523 | },
524 | {
525 | Name: `escape \U00110000`,
526 | Input: `A <- "\U00110000"`,
527 | Error: "^test.file:1.6,1.17:.*>0x10FFFF",
528 | },
529 | {
530 | Name: `escape \UF`,
531 | Input: `A <- "\UF"`,
532 | Error: "^test.file:1.6,1.11: unknown escape sequence",
533 | },
534 | {
535 | Name: `escape \UFF`,
536 | Input: `A <- "\UFF"`,
537 | Error: "^test.file:1.6,1.12: unknown escape sequence",
538 | },
539 | {
540 | Name: `escape \UFFF`,
541 | Input: `A <- "\UFFF"`,
542 | Error: "^test.file:1.6,1.13: unknown escape sequence",
543 | },
544 | {
545 | Name: `escape \UFFFF`,
546 | Input: `A <- "\UFFFF"`,
547 | Error: "^test.file:1.6,1.14: unknown escape sequence",
548 | },
549 | {
550 | Name: `escape \UFFFFF`,
551 | Input: `A <- "\UFFFFF"`,
552 | Error: "^test.file:1.6,1.15: unknown escape sequence",
553 | },
554 | {
555 | Name: `escape \UFFFFFF`,
556 | Input: `A <- "\UFFFFFF"`,
557 | Error: "^test.file:1.6,1.16: unknown escape sequence",
558 | },
559 | {
560 | Name: `escape \UFFFFFFF`,
561 | Input: `A <- "\UFFFFFFF"`,
562 | Error: "^test.file:1.6,1.17: unknown escape sequence",
563 | },
564 | {
565 | Name: `string with multiple escapes`,
566 | Input: `A <- "x\a\b\f\n\r\t\v\\\"\000\x00\u0000\U00000000☺"`,
567 | FullString: `A <- ("x\a\b\f\n\r\t\v\\\"\x00\x00\x00\x00☺")`,
568 | String: `A <- "x\a\b\f\n\r\t\v\\\"\x00\x00\x00\x00☺"`,
569 | },
570 | {
571 | Name: `unknown escape`,
572 | Input: `A <- "\z"`,
573 | Error: "^test.file:1.6,1.9: unknown escape sequence",
574 | },
575 | {
576 | Name: `escape eof`,
577 | Input: `A <- "\`,
578 | Error: `^test.file:1.6,1.8: unclosed "`,
579 | },
580 |
581 | // Whitespace.
582 | // BUG: The current YACC grammar
583 | // doesn't allow whitespace between all tokens,
584 | // but only particular tokens.
585 | // Specifically whitespace can only appear after
586 | // delimiters after which a new rule cannot begin.
587 | // This is because, in order to remain LALR(1),
588 | // a newline terminates a sequence expression,
589 | // denoting that the next identifier is a rule name.
590 | {
591 | Name: `after <-`,
592 | Input: `A <-
593 | "a"
594 |
595 | B <- #comment
596 | "b"
597 |
598 | C "c" <-
599 | "c"
600 |
601 | D "d" <- #comment
602 | "d"`,
603 | FullString: `A <- ("a")
604 | B <- ("b")
605 | C "c" <- ("c")
606 | D "d" <- ("d")`,
607 | String: `A <- "a"
608 | B <- "b"
609 | C "c" <- "c"
610 | D "d" <- "d"`,
611 | },
612 | {
613 | Name: `after /`,
614 | Input: `A <- B /
615 | C / # comment
616 | D`,
617 | FullString: `A <- (((B)/(C))/(D))`,
618 | String: `A <- B/C/D`,
619 | },
620 | {
621 | Name: `after : label`,
622 | Input: `A <- l:
623 | B m: #comment
624 | C`,
625 | FullString: `A <- ((l:(B)) (m:(C)))`,
626 | String: `A <- l:B m:C`,
627 | },
628 | {
629 | Name: `after & predicate`,
630 | Input: `A <- &
631 | B & #comment
632 | C`,
633 | FullString: `A <- ((&(B)) (&(C)))`,
634 | String: `A <- &B &C`,
635 | },
636 | {
637 | Name: `after ! predicate`,
638 | Input: `A <- !
639 | B ! #comment
640 | C`,
641 | FullString: `A <- ((!(B)) (!(C)))`,
642 | String: `A <- !B !C`,
643 | },
644 | {
645 | Name: `after (`,
646 | Input: `A <- (
647 | B ( #comment
648 | C))`,
649 | FullString: `A <- ((B) (C))`,
650 | String: `A <- (B (C))`,
651 | },
652 | {
653 | Name: `before )`,
654 | Input: `A <- (B (C
655 | ) #comment
656 | )`,
657 | FullString: `A <- ((B) (C))`,
658 | String: `A <- (B (C))`,
659 | },
660 | {
661 | Name: `after & code`,
662 | Input: `A <- &
663 | {code} & #comment
664 | {CODE}`,
665 | FullString: `A <- ((&{code}) (&{CODE}))`,
666 | String: `A <- &{…} &{…}`,
667 | },
668 | {
669 | Name: `after ! code`,
670 | Input: `A <- !
671 | {code} ! #comment
672 | {CODE}`,
673 | FullString: `A <- ((!{code}) (!{CODE}))`,
674 | String: `A <- !{…} !{…}`,
675 | },
676 |
677 | // Systax errors.
678 | {
679 | Name: "bad rule name",
680 | Input: "\n\t\t&",
681 | Error: "^test.file:2.3,2.4:",
682 | },
683 | {
684 | Name: "missing <-",
685 | Input: "\nA B",
686 | Error: "^test.file:2.3,2.4:",
687 | },
688 | {
689 | Name: "bad <-",
690 | Input: "\nA <~ C",
691 | Error: "^test.file:2.4,2.5:",
692 | },
693 | {
694 | Name: "missing expr",
695 | Input: "\nA <-",
696 | Error: "^test.file:2.5:",
697 | },
698 | {
699 | Name: "unexpected rune",
700 | Input: "\nA <- C ☺",
701 | Error: "^test.file:2.8,2.9:",
702 | },
703 | {
704 | Name: "unclosed (",
705 | Input: "\nA <- (B",
706 | Error: "^test.file:2.8:",
707 | },
708 | {
709 | Name: "unclosed '",
710 | Input: "\nA <- 'B",
711 | Error: "^test.file:2.6,2.8: unclosed '",
712 | },
713 | {
714 | Name: `unclosed "`,
715 | Input: "\nA <- \"B",
716 | Error: "^test.file:2.6,2.8: unclosed \"",
717 | },
718 | {
719 | Name: `unclosed {`,
720 | Input: "\nA <- B { code",
721 | Error: "^test.file:2.8,2.14: unclosed {",
722 | },
723 | {
724 | Name: `unclosed spans lines`,
725 | Input: "\nA <- \"B\n\nC",
726 | Error: "^test.file:2.6,4.2: unclosed \"",
727 | },
728 | {
729 | Name: "unclosed [",
730 | Input: "\nA <- [B",
731 | Error: "^test.file:2.6,2.8: unclosed [[]",
732 | },
733 | {
734 | Name: "character class empty",
735 | Input: "\nA <- []",
736 | Error: "^test.file:2.6,2.8: bad char class: empty",
737 | },
738 | {
739 | Name: "character class starts with span",
740 | Input: "\nA <- [-9]",
741 | Error: "^test.file:2.7,2.9: bad span",
742 | },
743 | {
744 | Name: "character class no span start",
745 | Input: "\nA <- [1-3-9]",
746 | Error: "^test.file:2.10,2.12: bad span",
747 | },
748 | {
749 | Name: "character class ends with span",
750 | Input: "\nA <- [0-]",
751 | Error: "^test.file:2.7,2.9: bad span",
752 | },
753 | {
754 | Name: "character class inverted span",
755 | Input: "\nA <- [9-0]",
756 | Error: "^test.file:2.7,2.10: bad span",
757 | },
758 | {
759 | Name: "character class span after span",
760 | Input: "\nA <- [^0-9abcA-Zz-a]",
761 | Error: "^test.file:2.17,2.20: bad span",
762 | },
763 | {
764 | Name: "character class bad span after rune",
765 | Input: "\nA <- [^0-9abcZ-A]",
766 | Error: "^test.file:2.14,2.17: bad span",
767 | },
768 |
769 | // Go syntax errors.
770 | {
771 | Name: `bad prelude`,
772 | Input: "{ not package line }\nA <- B",
773 | Error: "^test.file:1.3",
774 | },
775 | {
776 | Name: `bad multi-line prelude`,
777 | Input: `{
778 | package main
779 |
780 | import "fmt"
781 |
782 | // Missing open paren.
783 | func main() { fmt.Println"Hello, World") }
784 | }
785 | A <- B`,
786 | Error: "^test.file:7.26",
787 | },
788 | {
789 | Name: `bad bool expression`,
790 | // = instead of ==.
791 | Input: "\nA <- &{ x = z}",
792 | Error: "^test.file:2.11",
793 | },
794 | {
795 | Name: `bad multi-line bool expression`,
796 | // Missing the closed paren on p(.
797 | Input: "\nA <- &{ x == \n p(y, z, h}",
798 | Error: "^test.file:3.11",
799 | },
800 | {
801 | Name: `bad action`,
802 | Input: "A <- B { if ( }",
803 | Error: "^test.file:1.15",
804 | },
805 | {
806 | Name: `bad multi-line action`,
807 | Input: "\nA <- B {\n if ( }",
808 | Error: "^test.file:3.7",
809 | },
810 | {
811 | Name: `bad action: invalid nested func def`,
812 | Input: "\nA <- B { func f() int { return 1 } }",
813 | Error: "^test.file:2.15",
814 | },
815 | {
816 | Name: `action with nested return`,
817 | Input: "A <- B { if true { return 0 } else { return 1 } }",
818 | FullString: "A <- ((B) { if true { return 0 } else { return 1 } })",
819 | String: "A <- B {…}",
820 | },
821 | {
822 | Name: `missing return`,
823 | Input: "A <- B { }",
824 | Error: "^test.file:1.9: no return statement",
825 | },
826 | {
827 | Name: `multi-value return`,
828 | Input: "A <- B { return 1, 2, 3 }",
829 | Error: "^test.file:1.9: must return exactly one value",
830 | },
831 | {
832 | Name: `non-conversion multi-ary function return`,
833 | Input: "A <- B { return f(a, b, c) }",
834 | Error: "^test.file:1.9: cannot infer type",
835 | },
836 | {
837 | Name: `non-conversion nil-ary function return`,
838 | Input: "A <- B { return f() }",
839 | Error: "^test.file:1.9: cannot infer type",
840 | },
841 | {
842 | Name: `non-conversion function return`,
843 | Input: "A <- B { return f(a, b, c) }",
844 | Error: "^test.file:1.9: cannot infer type",
845 | },
846 |
847 | // I/O errors.
848 | {
849 | Name: "only I/O error",
850 | Input: "☹",
851 | Error: testIOError,
852 | },
853 | {
854 | Name: "comment I/O error",
855 | Input: "#☹",
856 | Error: testIOError,
857 | },
858 | {
859 | Name: "ident I/O error",
860 | Input: "A☹",
861 | Error: testIOError,
862 | },
863 | {
864 | Name: "arrow I/O error",
865 | Input: "A <☹",
866 | Error: testIOError,
867 | },
868 | {
869 | Name: "code I/O error",
870 | Input: "A <- B { ☹",
871 | Error: testIOError,
872 | },
873 | {
874 | Name: "char class I/O error",
875 | Input: "A <- [☹",
876 | Error: testIOError,
877 | },
878 | {
879 | Name: "double-quoted string I/O error",
880 | Input: "A <- \"☹",
881 | Error: testIOError,
882 | },
883 | {
884 | Name: "single-quoted string I/O error",
885 | Input: "A <- '☹",
886 | Error: testIOError,
887 | },
888 | }
889 |
890 | func TestParse(t *testing.T) {
891 | for _, test := range ParseTests {
892 | test := test
893 | t.Run(test.Name, func(t *testing.T) {
894 | t.Parallel()
895 | in := testRuneScanner{strings.NewReader(test.Input)}
896 | g, err := Parse(in, "test.file")
897 |
898 | if test.Error != "" {
899 | if err == nil {
900 | t.Log(pretty.String(g.Rules))
901 | t.Errorf("Parse(%q) ok, but expected error matching %q",
902 | test.Input, test.Error)
903 | return
904 | }
905 | re := regexp.MustCompile(test.Error)
906 | if !re.MatchString(err.Error()) {
907 | t.Errorf("Parse(%q) err=%q, but expected to match %q",
908 | test.Input, err.Error(), test.Error)
909 | return
910 | }
911 | return
912 | }
913 |
914 | if err != nil {
915 | t.Errorf("Parse(%q) failed: %s", test.Input, err)
916 | return
917 | }
918 | var pre string
919 | if g.Prelude != nil {
920 | pre = g.Prelude.String()
921 | }
922 | if pre != test.Prelude {
923 | t.Errorf("Parse(%q).Prelude=\n%s\nwant:\n%s",
924 | test.Input, pre, test.Prelude)
925 | return
926 | }
927 | if s := FullString(g.Rules); s != test.FullString {
928 | t.Errorf("Parse(%q)\nfull string:\n%q\nwant:\n%q",
929 | test.Input, s, test.FullString)
930 | return
931 | }
932 | if s := String(g.Rules); s != test.String {
933 | t.Errorf("Parse(%q)\nstring:\n%q\nwant:\n%q",
934 | test.Input, s, test.String)
935 | return
936 | }
937 | })
938 | }
939 | }
940 |
941 | // testRuneScanner implements io.RuneScanner, wrapping another RuneScanner,
942 | // however, whenever the original scanner would've returned a ☹ rune,
943 | // testRuneScanner instead returns an error.
944 | type testRuneScanner struct {
945 | io.RuneScanner
946 | }
947 |
948 | const testIOError = "test I/O error"
949 |
950 | func (rs testRuneScanner) ReadRune() (rune, int, error) {
951 | r, n, err := rs.RuneScanner.ReadRune()
952 | if r == '☹' {
953 | return 0, 0, errors.New(testIOError)
954 | }
955 | return r, n, err
956 | }
957 |
--------------------------------------------------------------------------------
/peg/fail.go:
--------------------------------------------------------------------------------
1 | // Copyright 2018 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package peg
8 |
9 | import "fmt"
10 |
11 | // SimpleError returns an error with a basic error message
12 | // that describes what was expected at all of the leaf fails
13 | // with the greatest position in the tree.
14 | //
15 | // The FilePath field of the returned Error is the empty string.
16 | // The caller can set this field if to prefix the location
17 | // with the path to an input file.
18 | func SimpleError(text string, node *Fail) Error {
19 | leaves := LeafFails(node)
20 |
21 | var want string
22 | for i, l := range leaves {
23 | switch {
24 | case i == len(leaves)-1 && i == 1:
25 | want += " or "
26 | case i == len(leaves)-1 && len(want) > 1:
27 | want += ", or "
28 | case i > 0:
29 | want += ", "
30 | }
31 | want += l.Want
32 | }
33 |
34 | got := "EOF"
35 | pos := leaves[0].Pos
36 | if pos < len(text) {
37 | end := pos + 10
38 | if end > len(text) {
39 | end = len(text)
40 | }
41 | got = "'" + text[pos:end] + "'"
42 | }
43 |
44 | return Error{
45 | Loc: Location(text, pos),
46 | Message: fmt.Sprintf("want %s; got %s", want, got),
47 | }
48 | }
49 |
50 | // Error implements error, prefixing an error message
51 | // with location information for the error.
52 | type Error struct {
53 | // FilePath is the path of the input file containing the error.
54 | FilePath string
55 | // Loc is the location of the error.
56 | Loc Loc
57 | // Message is the error message.
58 | Message string
59 | }
60 |
61 | func (err Error) Error() string {
62 | return fmt.Sprintf("%s:%d.%d: %s",
63 | err.FilePath, err.Loc.Line, err.Loc.Column, err.Message)
64 | }
65 |
66 | // LeafFails returns all fails in the tree with the greatest Pos.
67 | func LeafFails(node *Fail) []*Fail {
68 | pos := -1
69 | var fails []*Fail
70 | seen := make(map[*Fail]bool)
71 | var walk func(*Fail)
72 | walk = func(n *Fail) {
73 | if seen[n] {
74 | return
75 | }
76 | seen[n] = true
77 | if len(n.Kids) == 0 {
78 | switch {
79 | case n.Pos > pos:
80 | pos = n.Pos
81 | fails = append(fails[:0], n)
82 | case n.Pos == pos:
83 | fails = append(fails, n)
84 | }
85 | return
86 | }
87 | for _, k := range n.Kids {
88 | walk(k)
89 | }
90 | }
91 | walk(node)
92 | return fails
93 | }
94 |
95 | // DedupFails removes duplicate fail branches from the tree,
96 | // keeping only the first occurrence of each.
97 | // This is useful for example before printing the Fail tree,
98 | // because the non-deduped Fail tree can be exponential
99 | // in the input size.
100 | func DedupFails(node *Fail) {
101 | seen := make(map[*Fail]bool)
102 | var walk func(*Fail) bool
103 | walk = func(n *Fail) bool {
104 | if seen[n] {
105 | return false
106 | }
107 | seen[n] = true
108 | var kids []*Fail
109 | for _, k := range n.Kids {
110 | if walk(k) {
111 | kids = append(kids, k)
112 | }
113 | }
114 | n.Kids = kids
115 | return true
116 | }
117 | walk(node)
118 | }
119 |
--------------------------------------------------------------------------------
/peg/fail_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2018 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package peg
8 |
9 | import (
10 | "reflect"
11 | "testing"
12 |
13 | "github.com/eaburns/pretty"
14 | )
15 |
16 | func TestDedupFails(t *testing.T) {
17 | x := &Fail{Name: "x"}
18 | z := &Fail{Name: "z"}
19 | y := &Fail{Name: "y", Kids: []*Fail{z, z}}
20 | root := &Fail{
21 | Kids: []*Fail{
22 | x,
23 | &Fail{
24 | Kids: []*Fail{
25 | y,
26 | y,
27 | },
28 | },
29 | x,
30 | },
31 | }
32 | DedupFails(root)
33 | want := &Fail{
34 | Kids: []*Fail{
35 | &Fail{Name: "x"},
36 | &Fail{
37 | Kids: []*Fail{
38 | &Fail{
39 | Name: "y",
40 | Kids: []*Fail{
41 | &Fail{Name: "z"},
42 | },
43 | },
44 | },
45 | },
46 | },
47 | }
48 | if !reflect.DeepEqual(root, want) {
49 | t.Errorf("DedupFails()=%v, want %v",
50 | pretty.String(root), pretty.String(want))
51 | }
52 | }
53 |
54 | func TestLeafFails(t *testing.T) {
55 | x0 := &Fail{Name: "x0", Pos: 10}
56 | x1 := &Fail{Name: "x1", Pos: 10}
57 | y0 := &Fail{Name: "y0", Pos: 15}
58 | y1 := &Fail{Name: "y1", Pos: 15}
59 | z0 := &Fail{Name: "z0", Pos: 20}
60 | z1 := &Fail{Name: "z1", Pos: 20}
61 |
62 | root := &Fail{
63 | Kids: []*Fail{
64 | x0,
65 | y0,
66 | z0,
67 | &Fail{
68 | Kids: []*Fail{
69 | x1,
70 | y1,
71 | z1,
72 | z0,
73 | },
74 | },
75 | z1,
76 | x0,
77 | y1,
78 | },
79 | }
80 |
81 | got := LeafFails(root)
82 | want := []*Fail{z0, z1}
83 | if !reflect.DeepEqual(got, want) {
84 | t.Errorf("LeafFails()=%s, want %s",
85 | pretty.String(got), pretty.String(want))
86 | }
87 | }
88 |
89 | func TestSimpleError_1(t *testing.T) {
90 | text := "123456789\nabcdefg"
91 | root := &Fail{
92 | Kids: []*Fail{
93 | &Fail{Pos: 10, Want: "A"},
94 | },
95 | }
96 | err := SimpleError(text, root)
97 | want := ":2.1: want A; got 'abcdefg'"
98 | if err.Error() != want {
99 | t.Errorf("err.Error()=%q, want %q", err.Error(), want)
100 | }
101 | }
102 |
103 | func TestSimpleError_2(t *testing.T) {
104 | text := "123456789\nabcdefg"
105 | root := &Fail{
106 | Kids: []*Fail{
107 | &Fail{Pos: 10, Want: "A"},
108 | &Fail{Pos: 10, Want: "B"},
109 | },
110 | }
111 | err := SimpleError(text, root)
112 | want := ":2.1: want A or B; got 'abcdefg'"
113 | if err.Error() != want {
114 | t.Errorf("err.Error()=%q, want %q", err.Error(), want)
115 | }
116 | }
117 |
118 | func TestSimpleError_3(t *testing.T) {
119 | text := "123456789\nabcdefg"
120 | root := &Fail{
121 | Kids: []*Fail{
122 | &Fail{Pos: 10, Want: "A"},
123 | &Fail{Pos: 10, Want: "B"},
124 | &Fail{Pos: 10, Want: "C"},
125 | },
126 | }
127 | err := SimpleError(text, root)
128 | want := ":2.1: want A, B, or C; got 'abcdefg'"
129 | if err.Error() != want {
130 | t.Errorf("err.Error()=%q, want %q", err.Error(), want)
131 | }
132 | }
133 |
--------------------------------------------------------------------------------
/peg/loc.go:
--------------------------------------------------------------------------------
1 | // Copyright 2018 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package peg
8 |
9 | import "unicode/utf8"
10 |
11 | // A Loc is a location in the input text.
12 | type Loc struct {
13 | Byte int
14 | Rune int
15 | Line int
16 | Column int
17 | }
18 |
19 | // Location returns the Loc at the corresponding byte offset in the text.
20 | func Location(text string, byte int) Loc {
21 | var loc Loc
22 | loc.Line = 1
23 | loc.Column = 1
24 | for byte > loc.Byte {
25 | r, w := utf8.DecodeRuneInString(text[loc.Byte:])
26 | loc.Byte += w
27 | loc.Rune++
28 | loc.Column++
29 | if r == '\n' {
30 | loc.Line++
31 | loc.Column = 1
32 | }
33 | }
34 | return loc
35 | }
36 |
--------------------------------------------------------------------------------
/peg/loc_test.go:
--------------------------------------------------------------------------------
1 | // Copyright 2018 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package peg
8 |
9 | import (
10 | "strings"
11 | "testing"
12 | )
13 |
14 | func TestLocation(t *testing.T) {
15 | tests := []struct {
16 | in string
17 | want Loc
18 | }{
19 | {
20 | in: "*",
21 | want: Loc{Byte: 0, Rune: 0, Line: 1, Column: 1},
22 | },
23 | {
24 | in: "abc*",
25 | want: Loc{Byte: 3, Rune: 3, Line: 1, Column: 4},
26 | },
27 | {
28 | in: "ab\n*",
29 | want: Loc{Byte: 3, Rune: 3, Line: 2, Column: 1},
30 | },
31 | {
32 | in: "ab\n*",
33 | want: Loc{Byte: 3, Rune: 3, Line: 2, Column: 1},
34 | },
35 | {
36 | in: "ab\nabc\nxyz*",
37 | want: Loc{Byte: 10, Rune: 10, Line: 3, Column: 4},
38 | },
39 | {
40 | in: "☺*",
41 | want: Loc{Byte: len("☺"), Rune: 1, Line: 1, Column: 2},
42 | },
43 | {
44 | in: "☺☺☺*",
45 | want: Loc{Byte: 3 * len("☺"), Rune: 3, Line: 1, Column: 4},
46 | },
47 | {
48 | in: "☺☺\n☺*",
49 | want: Loc{Byte: 3*len("☺") + 1, Rune: 4, Line: 2, Column: 2},
50 | },
51 | {
52 | in: "☺☺\n☺*☹☹☹",
53 | want: Loc{Byte: 3*len("☺") + 1, Rune: 4, Line: 2, Column: 2},
54 | },
55 | }
56 | for _, test := range tests {
57 | b := strings.Index(test.in, "*")
58 | if b < 0 {
59 | panic("no *")
60 | }
61 | got := Location(test.in, b)
62 | if got != test.want {
63 | t.Errorf("Location(%q, %d)=%v, want %v", test.in, b, got, test.want)
64 | }
65 | }
66 | }
67 |
--------------------------------------------------------------------------------
/peg/peg.go:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package peg
8 |
9 | import "unicode/utf8"
10 |
11 | // A Node is a node in a Peggy parse tree.
12 | type Node struct {
13 | // Name is the name of the Rule associated with the node,
14 | // or the empty string for anonymous Nodes
15 | // that are not associated with any Rule.
16 | Name string
17 |
18 | // Text is the input text of the Node's subtree.
19 | Text string
20 |
21 | // Kids are the immediate successors of this node.
22 | Kids []*Node
23 | }
24 |
25 | // A Fail is a node in a failed-parse tree.
26 | // A failed-parse tree contains all paths in a failed parse
27 | // that lead to the furthest error location in the input text.
28 | // There are two types of nodes: named and unnamed.
29 | // Named nodes represent grammar rules that failed to parse.
30 | // Unnamed nodes represent terminal expressions that failed to parse.
31 | type Fail struct {
32 | // Name is the name of the Rule associated with the node,
33 | // or the empty string if the Fail is a terminal expression failure.
34 | Name string
35 |
36 | // Pos is the byte offset into the input of the Fail.
37 | Pos int
38 |
39 | // Kids are the immediate succors of this Fail.
40 | // Kids is only non-nil for named Fail nodes.
41 | Kids []*Fail
42 |
43 | // Want is a string describing what was expected at the error position.
44 | // It is only non-empty for unnamed Fail nodes.
45 | //
46 | // It can be of one of the following forms:
47 | // "…" indicating a failed literal match, where the text between the quotes is the expected literal using Go escaping.
48 | // . indicating a failed . match.
49 | // […] indicating a failed character class match, where the text between the [ and ] is the character class.
50 | // !… where the text after ! is the string representation of a failed predicate subexpression.
51 | // &… where the text after & is the string representation of a failed predicate subexpression.
52 | // … the error-name of a rule.
53 | // For example, "int" in rule: Integer "int" <- [0-9].
54 | Want string
55 | }
56 |
57 | // DecodeRuneInString is utf8.DecodeRuneInString.
58 | // It's here so parsers can just include peg, and not also need unicode/utf8.
59 | func DecodeRuneInString(s string) (rune, int) {
60 | return utf8.DecodeRuneInString(s)
61 | }
62 |
--------------------------------------------------------------------------------
/peg/pretty.go:
--------------------------------------------------------------------------------
1 | // Copyright 2018 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package peg
8 |
9 | import (
10 | "bytes"
11 | "io"
12 | "strconv"
13 | )
14 |
15 | type nodeOrFail interface {
16 | name() string
17 | numKids() int
18 | kid(int) nodeOrFail
19 | text() string
20 | }
21 |
22 | func (f *Node) name() string { return f.Name }
23 | func (f *Node) numKids() int { return len(f.Kids) }
24 | func (f *Node) kid(i int) nodeOrFail { return f.Kids[i] }
25 | func (f *Node) text() string { return f.Text }
26 | func (f *Fail) name() string { return f.Name }
27 | func (f *Fail) numKids() int { return len(f.Kids) }
28 | func (f *Fail) kid(i int) nodeOrFail { return f.Kids[i] }
29 | func (f *Fail) text() string { return f.Want }
30 |
31 | // Pretty returns a human-readable string of a Node or Fail
32 | // and the subtree beneath it.
33 | // The output looks like:
34 | // {
35 | // ,
36 | // ,
37 | // …
38 | // ,
39 | // }
40 | func Pretty(n nodeOrFail) string {
41 | b := bytes.NewBuffer(nil)
42 | PrettyWrite(b, n)
43 | return b.String()
44 | }
45 |
46 | // PrettyWrite is like Pretty but outputs to an io.Writer.
47 | func PrettyWrite(w io.Writer, n nodeOrFail) error {
48 | return prettyWrite(w, "", n)
49 | }
50 |
51 | func prettyWrite(w io.Writer, tab string, n nodeOrFail) error {
52 | if _, err := io.WriteString(w, tab); err != nil {
53 | return err
54 | }
55 | if n.numKids() == 0 {
56 | if n.name() != "" {
57 | if _, err := io.WriteString(w, n.name()+"("); err != nil {
58 | return err
59 | }
60 | }
61 | if _, err := io.WriteString(w, `"`+n.text()+`"`); err != nil {
62 | return err
63 | }
64 | if n.name() != "" {
65 | if _, err := io.WriteString(w, ")"); err != nil {
66 | return err
67 | }
68 | }
69 | return nil
70 | }
71 | if _, err := io.WriteString(w, n.name()); err != nil {
72 | return err
73 | }
74 | if f, ok := n.(*Fail); ok {
75 | pos := "[" + strconv.Itoa(f.Pos) + "]"
76 | if _, err := io.WriteString(w, pos); err != nil {
77 | return err
78 | }
79 | }
80 | if n.numKids() == 0 {
81 | if n.name() == "" {
82 | if _, err := io.WriteString(w, "{}"); err != nil {
83 | return err
84 | }
85 | }
86 | return nil
87 | }
88 | if _, err := io.WriteString(w, "{"); err != nil {
89 | return err
90 | }
91 | if n.numKids() == 1 && n.kid(0).numKids() == 0 {
92 | if err := prettyWrite(w, "", n.kid(0)); err != nil {
93 | return err
94 | }
95 | if _, err := io.WriteString(w, "}"); err != nil {
96 | return err
97 | }
98 | return nil
99 | }
100 | for i := 0; i < n.numKids(); i++ {
101 | if _, err := io.WriteString(w, "\n"); err != nil {
102 | return err
103 | }
104 | if err := prettyWrite(w, tab+"\t", n.kid(i)); err != nil {
105 | return err
106 | }
107 | if _, err := io.WriteString(w, ","); err != nil {
108 | return err
109 | }
110 | }
111 | if _, err := io.WriteString(w, "\n"+tab+"}"); err != nil {
112 | return err
113 | }
114 | return nil
115 | }
116 |
--------------------------------------------------------------------------------
/rule.go:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package main
8 |
9 | import "fmt"
10 |
11 | // Grammar is a PEG grammar.
12 | type Grammar struct {
13 | // Prelude is custom code added to the beginning of the generated output.
14 | Prelude Text
15 |
16 | // Rules are the rules of the grammar.
17 | Rules []Rule
18 |
19 | // CheckedRules are the rules successfully checked by the Check pass.
20 | // It contains all non-template rules and all expanded templates.
21 | CheckedRules []*Rule
22 | }
23 |
24 | // A Rule defines a production in a PEG grammar.
25 | type Rule struct {
26 | Name
27 |
28 | // ErrorName, if non-nil, indicates that this is a named rule.
29 | // Errors beneath a named rule are collapsed,
30 | // reporting the error position as the start of the rule's parse
31 | // with the "want" message set to ErrorName.
32 | //
33 | // If nil, the rule is unnamed and does not collapse errors.
34 | ErrorName Text
35 |
36 | // Expr is the PEG expression matched by the rule.
37 | Expr Expr
38 |
39 | // N is the rule's unique integer within its containing Grammar.
40 | // It is a small integer that may be used as an array index.
41 | N int
42 |
43 | // typ is the type of the rule in the action pass.
44 | // typ is nil before the checkLeft pass add non-nil after.
45 | typ *string
46 |
47 | // epsilon indicates whether the rule can match the empty string.
48 | epsilon bool
49 |
50 | // Labels is the set of all label names in the rule's expression.
51 | Labels []*LabelExpr
52 | }
53 |
54 | func (r *Rule) Begin() Loc { return r.Name.Begin() }
55 | func (r *Rule) End() Loc { return r.Expr.End() }
56 | func (r Rule) Type() string { return *r.typ }
57 |
58 | // A Name is the name of a rule template.
59 | type Name struct {
60 | // Name is the name of the template.
61 | Name Text
62 |
63 | // Args are the arguments or parameters of the template.
64 | Args []Text
65 | }
66 |
67 | func (n Name) Begin() Loc { return n.Name.Begin() }
68 | func (n Name) End() Loc {
69 | if len(n.Args) == 0 {
70 | return n.Name.End()
71 | }
72 | return n.Args[len(n.Args)-1].End()
73 | }
74 |
75 | // Text is a string of text located along with its location in the input.
76 | type Text interface {
77 | Located
78 | // String is the text string.
79 | String() string
80 | }
81 |
82 | // Loc identifies a location in a file by its line and column numbers.
83 | type Loc struct {
84 | // File is the name of the input file.
85 | File string
86 | // Line is line number of the location.
87 | // The first line of input is line number 1.
88 | Line int
89 | // Col is the Loc's rune offset into the line.
90 | // Col 0 is before the first rune on the line.
91 | Col int
92 | }
93 |
94 | // Less returns whether the receiver is earlier in the input than the argument.
95 | func (l Loc) Less(j Loc) bool {
96 | if l.Line == j.Line {
97 | return l.Col < j.Col
98 | }
99 | return l.Line < j.Line
100 | }
101 |
102 | // PrettyPrint implements the pretty.PrettyPrinter interface,
103 | // returning a simpler, one-line string form of the Loc.
104 | func (l Loc) PrettyPrint() string { return fmt.Sprintf("Loc{%d, %d}", l.Line, l.Col) }
105 |
106 | // Begin returns the Loc.
107 | func (l Loc) Begin() Loc { return l }
108 |
109 | // End returns the Loc.
110 | func (l Loc) End() Loc { return l }
111 |
112 | // Expr is PEG expression that matches a sequence of input runes.
113 | type Expr interface {
114 | Located
115 | String() string
116 |
117 | // fullString returns the fully parenthesized string representation.
118 | fullString() string
119 |
120 | // Walk calls a function for each expression in the tree.
121 | // Walk stops early if the function returns false.
122 | Walk(func(Expr) bool) bool
123 |
124 | // substitute returns a clone of the expression
125 | // with all occurrences of identifiers that are keys of sub
126 | // substituted with the corresponding value.
127 | // substitute must not be called after Check,
128 | // because it does not update bookkeeping fields
129 | // that are set by the Check pass.
130 | substitute(sub map[string]string) Expr
131 |
132 | // Type returns the type of the expression in the Action Tree.
133 | // This is the Go type associated with the expression.
134 | Type() string
135 |
136 | // epsilon returns whether the rule can match the empty string.
137 | epsilon() bool
138 |
139 | // CanFail returns whether the node can ever fail to parse.
140 | // Nodes like * or ?, for example, can never fail.
141 | // Parents of never-fail nodes needn't emit a failure branch,
142 | // as it will never be called.
143 | CanFail() bool
144 |
145 | // checkLeft checks for left-recursion and sets rule types.
146 | checkLeft(rules map[string]*Rule, p path, errs *Errors)
147 |
148 | // check checks for undefined identifiers,
149 | // linking defined identifiers to rules;
150 | // and checks for type mismatches.
151 | check(ctx ctx, valueUsed bool, errs *Errors)
152 | }
153 |
154 | // A Choice is an ordered choice between expressions.
155 | type Choice struct{ Exprs []Expr }
156 |
157 | func (e *Choice) Begin() Loc { return e.Exprs[0].Begin() }
158 | func (e *Choice) End() Loc { return e.Exprs[len(e.Exprs)-1].End() }
159 |
160 | func (e *Choice) Walk(f func(Expr) bool) bool {
161 | if !f(e) {
162 | return false
163 | }
164 | for _, kid := range e.Exprs {
165 | if !kid.Walk(f) {
166 | return false
167 | }
168 | }
169 | return true
170 | }
171 |
172 | func (e *Choice) substitute(sub map[string]string) Expr {
173 | substitute := *e
174 | substitute.Exprs = make([]Expr, len(e.Exprs))
175 | for i, kid := range e.Exprs {
176 | substitute.Exprs[i] = kid.substitute(sub)
177 | }
178 | return &substitute
179 | }
180 |
181 | // Type returns the type of a choice expression,
182 | // which is the type of it's first branch.
183 | // All other branches must have the same type;
184 | // this is verified during the Check pass.
185 | func (e *Choice) Type() string { return e.Exprs[0].Type() }
186 |
187 | func (e *Choice) epsilon() bool {
188 | for _, e := range e.Exprs {
189 | if e.epsilon() {
190 | return true
191 | }
192 | }
193 | return false
194 | }
195 |
196 | func (e *Choice) CanFail() bool {
197 | // A choice node can only fail if all of its branches can fail.
198 | // If there is a non-failing branch, it will always return accept.
199 | for _, s := range e.Exprs {
200 | if !s.CanFail() {
201 | return false
202 | }
203 | }
204 | return true
205 | }
206 |
207 | // An Action is an action expression:
208 | // a subexpression and code to run if matched.
209 | type Action struct {
210 | Expr Expr
211 | // Code is the Go code to execute if the subexpression is matched.
212 | // The Begin and End locations of Code includes the { } delimiters,
213 | // but the string does not.
214 | //
215 | // TODO: specify the environment under which the code is run.
216 | Code Text
217 |
218 | // ReturnType is the go type of the value returned by the action.
219 | ReturnType string
220 |
221 | // Labels are the labels that are in scope of this action.
222 | Labels []*LabelExpr
223 | }
224 |
225 | func (e *Action) Begin() Loc { return e.Expr.Begin() }
226 | func (e *Action) End() Loc { return e.Code.End() }
227 | func (e *Action) Type() string { return e.ReturnType }
228 | func (e *Action) epsilon() bool { return e.Expr.epsilon() }
229 | func (e *Action) CanFail() bool { return e.Expr.CanFail() }
230 |
231 | func (e *Action) Walk(f func(Expr) bool) bool {
232 | return f(e) && e.Expr.Walk(f)
233 | }
234 |
235 | func (e *Action) substitute(sub map[string]string) Expr {
236 | substitute := *e
237 | substitute.Expr = e.Expr.substitute(sub)
238 | substitute.Labels = nil
239 | return &substitute
240 | }
241 |
242 | // A Sequence is a sequence of expressions.
243 | type Sequence struct{ Exprs []Expr }
244 |
245 | func (e *Sequence) Begin() Loc { return e.Exprs[0].Begin() }
246 | func (e *Sequence) End() Loc { return e.Exprs[len(e.Exprs)-1].End() }
247 |
248 | func (e *Sequence) Walk(f func(Expr) bool) bool {
249 | if !f(e) {
250 | return false
251 | }
252 | for _, kid := range e.Exprs {
253 | if !kid.Walk(f) {
254 | return false
255 | }
256 | }
257 | return true
258 | }
259 |
260 | func (e *Sequence) substitute(sub map[string]string) Expr {
261 | substitute := *e
262 | substitute.Exprs = make([]Expr, len(e.Exprs))
263 | for i, kid := range e.Exprs {
264 | substitute.Exprs[i] = kid.substitute(sub)
265 | }
266 | return &substitute
267 | }
268 |
269 | // Type returns the type of a sequence expression,
270 | // which is based on the type of its first sub-expression.
271 | // All other other sub-expressions must have the same type;
272 | // this is verified during the Check pass.
273 | //
274 | // If the first sub-expression is a string,
275 | // the type of the entire sequence is a string.
276 | // The value is the concatenation of all sub-expressions.
277 | //
278 | // Otherwise, the type is a slice of the first sub-expression type.
279 | // The value is the slice of all sub-expression values.
280 | func (e *Sequence) Type() string {
281 | t := e.Exprs[0].Type()
282 | switch t {
283 | case "":
284 | return ""
285 | case "string":
286 | return "string"
287 | default:
288 | return "[]" + t
289 | }
290 | }
291 |
292 | func (e *Sequence) epsilon() bool {
293 | for _, e := range e.Exprs {
294 | if !e.epsilon() {
295 | return false
296 | }
297 | }
298 | return true
299 | }
300 |
301 | func (e *Sequence) CanFail() bool {
302 | for _, s := range e.Exprs {
303 | if s.CanFail() {
304 | return true
305 | }
306 | }
307 | return false
308 | }
309 |
310 | // A LabelExpr is a labeled subexpression.
311 | // The label can be used in actions to refer to the result of the subexperssion.
312 | type LabelExpr struct {
313 | // Label is the text of the label, not including the :.
314 | Label Text
315 | Expr Expr
316 | // N is a small integer assigned to this label
317 | // that is unique within the containing Rule.
318 | // It is a small integer that may be used as an array index.
319 | N int
320 | }
321 |
322 | func (e *LabelExpr) Begin() Loc { return e.Label.Begin() }
323 | func (e *LabelExpr) End() Loc { return e.Expr.End() }
324 | func (e *LabelExpr) Type() string { return e.Expr.Type() }
325 | func (e *LabelExpr) epsilon() bool { return e.Expr.epsilon() }
326 | func (e *LabelExpr) CanFail() bool { return e.Expr.CanFail() }
327 |
328 | func (e *LabelExpr) Walk(f func(Expr) bool) bool {
329 | return f(e) && e.Expr.Walk(f)
330 | }
331 |
332 | func (e *LabelExpr) substitute(sub map[string]string) Expr {
333 | substitute := *e
334 | substitute.Expr = e.Expr.substitute(sub)
335 | return &substitute
336 | }
337 |
338 | // A PredExpr is a non-consuming predicate expression:
339 | // If it succeeds (or fails, in the case of Neg),
340 | // return success and consume no input.
341 | // If it fails (or succeeds, in the case of Neg),
342 | // return failure and consume no input.
343 | // Predicate expressions allow a powerful form of lookahead.
344 | type PredExpr struct {
345 | Expr Expr
346 | // Neg indicates that the result of the predicate is negated.
347 | Neg bool
348 | // Loc is the location of the operator, & or !.
349 | Loc Loc
350 | }
351 |
352 | func (e *PredExpr) Begin() Loc { return e.Loc }
353 | func (e *PredExpr) End() Loc { return e.Expr.End() }
354 |
355 | // Type returns the type of the predicate expression,
356 | // which is a string; the value is always the empty string.
357 | func (e *PredExpr) Type() string { return "string" }
358 |
359 | func (e *PredExpr) epsilon() bool { return true }
360 | func (e *PredExpr) CanFail() bool { return e.Expr.CanFail() }
361 |
362 | func (e *PredExpr) Walk(f func(Expr) bool) bool {
363 | return f(e) && e.Expr.Walk(f)
364 | }
365 |
366 | func (e *PredExpr) substitute(sub map[string]string) Expr {
367 | substitute := *e
368 | substitute.Expr = e.Expr.substitute(sub)
369 | return &substitute
370 | }
371 |
372 | // A RepExpr is a repetition expression, sepecifying whether the sub-expression
373 | // should be matched any number of times (*) or one or more times (+),
374 | type RepExpr struct {
375 | // Op is one of * or +.
376 | Op rune
377 | Expr Expr
378 | // Loc is the location of the operator, * or +.
379 | Loc Loc
380 | }
381 |
382 | func (e *RepExpr) Begin() Loc { return e.Expr.Begin() }
383 | func (e *RepExpr) End() Loc { return e.Loc }
384 |
385 | // Type returns the type of the repetition expression,
386 | // which is based on the type of its sub-expression.
387 | //
388 | // If the sub-expression type is string,
389 | // the repetition expression type is a string.
390 | // The value is the concatenation of all matches,
391 | // or the empty string if nothing matches.
392 | //
393 | // Otherwise, the type is a slice of the sub-expression type.
394 | // The value contains an element for each match
395 | // of the sub-expression.
396 | func (e *RepExpr) Type() string {
397 | switch t := e.Expr.Type(); t {
398 | case "":
399 | return ""
400 | case "string":
401 | return t
402 | default:
403 | return "[]" + t
404 | }
405 | }
406 |
407 | func (e *RepExpr) epsilon() bool { return e.Op == '*' }
408 | func (e *RepExpr) CanFail() bool { return e.Op == '+' && e.Expr.CanFail() }
409 |
410 | func (e *RepExpr) Walk(f func(Expr) bool) bool {
411 | return f(e) && e.Expr.Walk(f)
412 | }
413 |
414 | func (e *RepExpr) substitute(sub map[string]string) Expr {
415 | substitute := *e
416 | substitute.Expr = e.Expr.substitute(sub)
417 | return &substitute
418 | }
419 |
420 | // An OptExpr is an optional expression, which may or may not be matched.
421 | type OptExpr struct {
422 | Expr Expr
423 | // Loc is the location of the ?.
424 | Loc Loc
425 | }
426 |
427 | func (e *OptExpr) Begin() Loc { return e.Expr.Begin() }
428 | func (e *OptExpr) End() Loc { return e.Loc }
429 |
430 | // Type returns the type of the optional expression,
431 | // which is based on the type of its sub-expression.
432 | //
433 | // If the sub-expression type is string,
434 | // the optional expression type is a string.
435 | // The value is the value of the sub-expression if it matched,
436 | // or the empty string if it did not match.
437 | //
438 | // Otherwise, the type is a pointer to the type of the sub-expression.
439 | // The value is a pointer to the sub-expression's value if it matched,
440 | // or a nil pointer if it did not match.
441 | func (e *OptExpr) Type() string {
442 | switch t := e.Expr.Type(); {
443 | case t == "":
444 | return ""
445 | case t == "string":
446 | return t
447 | default:
448 | return "*" + e.Expr.Type()
449 | }
450 | }
451 |
452 | func (e *OptExpr) epsilon() bool { return true }
453 | func (e *OptExpr) CanFail() bool { return false }
454 |
455 | func (e *OptExpr) Walk(f func(Expr) bool) bool {
456 | return f(e) && e.Expr.Walk(f)
457 | }
458 |
459 | func (e *OptExpr) substitute(sub map[string]string) Expr {
460 | substitute := *e
461 | substitute.Expr = e.Expr.substitute(sub)
462 | return &substitute
463 | }
464 |
465 | // An Ident is an identifier referring to the name of anothe rule,
466 | // indicating to match that rule's expression.
467 | type Ident struct {
468 | Name
469 |
470 | // rule is the rule referred to by this identifier.
471 | // It is set during check.
472 | rule *Rule
473 | }
474 |
475 | func (e *Ident) Begin() Loc { return e.Name.Begin() }
476 | func (e *Ident) End() Loc { return e.Name.End() }
477 | func (e *Ident) CanFail() bool { return true }
478 | func (e *Ident) Walk(f func(Expr) bool) bool { return f(e) }
479 |
480 | // Type returns the type of the identifier expression,
481 | // which is the type of its corresponding rule.
482 | func (e *Ident) Type() string {
483 | if e.rule == nil {
484 | return ""
485 | }
486 | return e.rule.Type()
487 | }
488 |
489 | func (e *Ident) epsilon() bool {
490 | if e.rule == nil {
491 | return false
492 | }
493 | return e.rule.epsilon
494 | }
495 |
496 | func (e *Ident) substitute(sub map[string]string) Expr {
497 | substitute := *e
498 | if s, ok := sub[e.Name.String()]; ok {
499 | substitute.Name = Name{
500 | Name: text{
501 | str: s,
502 | begin: e.Name.Begin(),
503 | end: e.Name.End(),
504 | },
505 | }
506 | }
507 | substitute.Args = make([]Text, len(e.Args))
508 | for i, a := range e.Args {
509 | if s, ok := sub[a.String()]; !ok {
510 | substitute.Args[i] = e.Args[i]
511 | } else {
512 | substitute.Args[i] = text{
513 | str: s,
514 | begin: a.Begin(),
515 | end: a.End(),
516 | }
517 | }
518 | }
519 | return &substitute
520 | }
521 |
522 | // A SubExpr simply wraps an expression.
523 | // It holds no extra information beyond tracking parentheses.
524 | // It's purpose is to allow easily re-inserting the parentheses
525 | // when stringifying an expression, whithout the need
526 | // to compute precedence inversion for each subexpression.
527 | type SubExpr struct {
528 | Expr
529 | // Open is the location of the open parenthesis.
530 | // Close is the location of the close parenthesis.
531 | Open, Close Loc
532 | }
533 |
534 | func (e *SubExpr) Begin() Loc { return e.Open }
535 | func (e *SubExpr) End() Loc { return e.Close }
536 | func (e *SubExpr) Type() string { return e.Expr.Type() }
537 | func (e *SubExpr) epsilon() bool { return e.Expr.epsilon() }
538 | func (e *SubExpr) CanFail() bool { return e.Expr.CanFail() }
539 |
540 | func (e *SubExpr) Walk(f func(Expr) bool) bool {
541 | return f(e) && e.Expr.Walk(f)
542 | }
543 |
544 | func (e *SubExpr) substitute(sub map[string]string) Expr {
545 | substitute := *e
546 | substitute.Expr = e.Expr.substitute(sub)
547 | return &substitute
548 | }
549 |
550 | // A PredCode is a predicate code expression,
551 | // allowing predication using a Go boolean expression.
552 | //
553 | // TODO: Specify the conditions under which the expression is evaluated.
554 | type PredCode struct {
555 | // Code is a Go boolean expression.
556 | // The Begin and End locations of Code includes the { } delimiters,
557 | // but the string does not.
558 | Code Text
559 | // Neg indicates that the result of the predicate is negated.
560 | Neg bool
561 | // Loc is the location of the operator, & or !.
562 | Loc Loc
563 |
564 | // Labels are the labels that are in scope of this action.
565 | Labels []*LabelExpr
566 | }
567 |
568 | func (e *PredCode) Begin() Loc { return e.Loc }
569 | func (e *PredCode) End() Loc { return e.Code.End() }
570 |
571 | // Type returns the type of the predicate code expression,
572 | // which is a string; the value is always the empty string.
573 | func (e *PredCode) Type() string { return "string" }
574 |
575 | func (e *PredCode) epsilon() bool { return true }
576 | func (e *PredCode) CanFail() bool { return true }
577 | func (e *PredCode) Walk(f func(Expr) bool) bool { return f(e) }
578 |
579 | func (e *PredCode) substitute(sub map[string]string) Expr {
580 | substitute := *e
581 | substitute.Labels = nil
582 | return &substitute
583 | }
584 |
585 | // A Literal matches a literal text string.
586 | type Literal struct {
587 | // Text is the text to match.
588 | // The Begin and End locations of Text includes the ' or " delimiters,
589 | // but the string does not.
590 | Text Text
591 | }
592 |
593 | func (e *Literal) Begin() Loc { return e.Text.Begin() }
594 | func (e *Literal) End() Loc { return e.Text.End() }
595 | func (e *Literal) Type() string { return "string" }
596 | func (e *Literal) epsilon() bool { return false }
597 | func (e *Literal) CanFail() bool { return true }
598 | func (e *Literal) Walk(f func(Expr) bool) bool { return f(e) }
599 |
600 | func (e *Literal) substitute(sub map[string]string) Expr {
601 | substitute := *e
602 | return &substitute
603 | }
604 |
605 | // A CharClass matches a single rune from a set of acceptable
606 | // (or unacceptable if Neg) runes.
607 | type CharClass struct {
608 | // Spans are rune spans accepted (or rejected) by the character class.
609 | // The 0th rune is always ≤ the 1st.
610 | // Single rune matches are a span of both the same rune.
611 | Spans [][2]rune
612 |
613 | // Neg indicates that the input must not match any in the set.
614 | Neg bool
615 |
616 | // Open and Close are the Loc of [ and ] respectively.
617 | Open, Close Loc
618 | }
619 |
620 | func (e *CharClass) Begin() Loc { return e.Open }
621 | func (e *CharClass) End() Loc { return e.Close }
622 | func (e *CharClass) Type() string { return "string" }
623 | func (e *CharClass) epsilon() bool { return false }
624 | func (e *CharClass) CanFail() bool { return true }
625 | func (e *CharClass) Walk(f func(Expr) bool) bool { return f(e) }
626 |
627 | func (e *CharClass) substitute(sub map[string]string) Expr {
628 | substitute := *e
629 | return &substitute
630 | }
631 |
632 | // Any matches any rune.
633 | type Any struct {
634 | // Loc is the location of the . symbol.
635 | Loc Loc
636 | }
637 |
638 | func (e *Any) Begin() Loc { return e.Loc }
639 | func (e *Any) End() Loc { return Loc{Line: e.Loc.Line, Col: e.Loc.Col + 1} }
640 | func (e *Any) Type() string { return "string" }
641 | func (e *Any) epsilon() bool { return false }
642 | func (e *Any) CanFail() bool { return true }
643 | func (e *Any) Walk(f func(Expr) bool) bool { return f(e) }
644 |
645 | func (e *Any) substitute(sub map[string]string) Expr {
646 | substitute := *e
647 | return &substitute
648 | }
649 |
--------------------------------------------------------------------------------
/string.go:
--------------------------------------------------------------------------------
1 | // Copyright 2017 The Peggy Authors
2 | //
3 | // Use of this source code is governed by a BSD-style
4 | // license that can be found in the LICENSE file or at
5 | // https://developers.google.com/open-source/licenses/bsd.
6 |
7 | package main
8 |
9 | import (
10 | "fmt"
11 | "strconv"
12 | "strings"
13 | )
14 |
15 | // String returns the string representation of the rules.
16 | // The output contains no comments or whitespace,
17 | // except for a single space, " ",
18 | // separating sub-exprsessions of a sequence,
19 | // and on either side of <-.
20 | func String(rules []Rule) string {
21 | var s string
22 | for _, r := range rules {
23 | if s != "" {
24 | s += "\n"
25 | }
26 | s += r.String()
27 | }
28 | return s
29 | }
30 |
31 | // String returns the string representation of a rule.
32 | // The output contains no comments or whitespace,
33 | // except for a single space, " ",
34 | // separating sub-exprsessions of a sequence,
35 | // and on either side of <-.
36 | func (r *Rule) String() string {
37 | var name string
38 | if r.ErrorName != nil {
39 | name = " " + strconv.Quote(r.ErrorName.String())
40 | }
41 | return r.Name.String() + name + " <- " + r.Expr.String()
42 | }
43 |
44 | func (n Name) String() string {
45 | if len(n.Args) == 0 {
46 | return n.Name.String()
47 | }
48 | s := n.Name.String() + "<"
49 | for i, a := range n.Args {
50 | if i > 0 {
51 | s += ", "
52 | }
53 | s += a.String()
54 | }
55 | return s + ">"
56 | }
57 |
58 | // Ident returns a Go identifier for the name.
59 | func (n Name) Ident() string {
60 | if len(n.Args) == 0 {
61 | return n.Name.String()
62 | }
63 | s := n.Name.String() + "__"
64 | for i, a := range n.Args {
65 | if i > 0 {
66 | s += "__"
67 | }
68 | s += a.String()
69 | }
70 | return s
71 | }
72 |
73 | func (e *Choice) String() string {
74 | s := e.Exprs[0].String()
75 | for _, sub := range e.Exprs[1:] {
76 | s += "/" + sub.String()
77 | }
78 | return s
79 | }
80 |
81 | func (e *Action) String() string {
82 | if *prettyPrint {
83 | return e.Expr.String()
84 | }
85 | return e.Expr.String() + " {…}"
86 | }
87 |
88 | func (e *Sequence) String() string {
89 | s := e.Exprs[0].String()
90 | for _, sub := range e.Exprs[1:] {
91 | s += " " + sub.String()
92 | }
93 | return s
94 | }
95 |
96 | func (e *LabelExpr) String() string {
97 | if *prettyPrint {
98 | return e.Expr.String()
99 | }
100 | return e.Label.String() + ":" + e.Expr.String()
101 | }
102 |
103 | func (e *PredExpr) String() string {
104 | s := "&"
105 | if e.Neg {
106 | s = "!"
107 | }
108 | return s + e.Expr.String()
109 | }
110 |
111 | func (e *RepExpr) String() string {
112 | return e.Expr.String() + string([]rune{e.Op})
113 | }
114 |
115 | func (e *OptExpr) String() string {
116 | return e.Expr.String() + "?"
117 | }
118 |
119 | func (e *SubExpr) String() string {
120 | return "(" + e.Expr.String() + ")"
121 | }
122 |
123 | func (e *Ident) String() string {
124 | return e.Name.String()
125 | }
126 |
127 | func (e *PredCode) String() string {
128 | s := "&{"
129 | if e.Neg {
130 | s = "!{"
131 | }
132 | return s + "…}"
133 | }
134 |
135 | func (e *Literal) String() string {
136 | s := strconv.QuoteToGraphic(e.Text.String())
137 | // Replace some combining characters with their escaped version.
138 | for _, sub := range []string{
139 | "\u0301",
140 | "\u0304",
141 | "\u030C",
142 | "\u0306",
143 | "\u0309",
144 | "\u0302",
145 | "\u0300",
146 | "\u0303",
147 | } {
148 | q := strconv.QuoteToASCII(sub)
149 | s = strings.Replace(s, sub, q[1:len(q)-1], -1)
150 | }
151 | return s
152 | }
153 |
154 | func (e *CharClass) String() string {
155 | s := "["
156 | if e.Neg {
157 | s += "^"
158 | }
159 | for _, sp := range e.Spans {
160 | if sp[0] == sp[1] {
161 | s += charClassEsc(sp[0])
162 | } else {
163 | s += charClassEsc(sp[0]) + "-" + charClassEsc(sp[1])
164 | }
165 | }
166 | return s + "]"
167 | }
168 |
169 | func charClassEsc(r rune) string {
170 | switch r {
171 | case '^':
172 | return `\^`
173 | case '-':
174 | return `\-`
175 | case ']':
176 | return `\]`
177 | }
178 | s := strconv.QuoteRuneToGraphic(r)
179 | return strings.TrimPrefix(strings.TrimSuffix(s, "'"), "'")
180 | }
181 |
182 | func (e *Any) String() string { return "." }
183 |
184 | // FullString returns the fully parenthesized string representation of the rules.
185 | // The output contains no comments or whitespace,
186 | // except for a single space, " ",
187 | // separating sub-exprsessions of a sequence,
188 | // and on either side of <-.
189 | func FullString(rules []Rule) string {
190 | var s string
191 | for _, r := range rules {
192 | if s != "" {
193 | s += "\n"
194 | }
195 |
196 | var name string
197 | if r.ErrorName != nil {
198 | name = " " + strconv.Quote(r.ErrorName.String())
199 | }
200 | s += fmt.Sprintf("%s%s <- %s", r.Name, name, r.Expr.fullString())
201 | }
202 | return s
203 | }
204 |
205 | func (e *Choice) fullString() string {
206 | s := strings.Repeat("(", len(e.Exprs)-1) + e.Exprs[0].fullString()
207 | for _, sub := range e.Exprs[1:] {
208 | s += "/" + sub.fullString() + ")"
209 | }
210 | return s
211 | }
212 |
213 | func (e *Action) fullString() string {
214 | return "(" + e.Expr.fullString() + " {" + e.Code.String() + "})"
215 | }
216 |
217 | func (e *Sequence) fullString() string {
218 | s := strings.Repeat("(", len(e.Exprs)-1) + e.Exprs[0].fullString()
219 | for _, sub := range e.Exprs[1:] {
220 | s += " " + sub.fullString() + ")"
221 | }
222 | return s
223 | }
224 |
225 | func (e *LabelExpr) fullString() string {
226 | return fmt.Sprintf("(%s:%s)", e.Label.String(), e.Expr.fullString())
227 | }
228 |
229 | func (e *PredExpr) fullString() string {
230 | if e.Neg {
231 | return fmt.Sprintf("(!%s)", e.Expr.fullString())
232 | }
233 | return fmt.Sprintf("(&%s)", e.Expr.fullString())
234 | }
235 |
236 | func (e *RepExpr) fullString() string {
237 | return fmt.Sprintf("(%s%c)", e.Expr.fullString(), e.Op)
238 | }
239 |
240 | func (e *OptExpr) fullString() string {
241 | return "(" + e.Expr.fullString() + "?)"
242 | }
243 |
244 | func (e *Ident) fullString() string { return "(" + e.String() + ")" }
245 |
246 | func (e *PredCode) fullString() string {
247 | s := "(&{"
248 | if e.Neg {
249 | s = "(!{"
250 | }
251 | return s + e.Code.String() + "})"
252 | }
253 |
254 | func (e *Literal) fullString() string { return "(" + e.String() + ")" }
255 |
256 | func (e *CharClass) fullString() string { return "(" + e.String() + ")" }
257 |
258 | func (e *Any) fullString() string { return "(" + e.String() + ")" }
259 |
--------------------------------------------------------------------------------