├── go.mod
├── testdata
    ├── ungrammar.ungrammar
    ├── exprlang.ungrammar
    └── rust.ungrammar
├── .github
    └── workflows
    │   └── go.yml
├── .gitignore
├── errorlist.go
├── example_test.go
├── LICENSE
├── README.md
├── cmd
    └── ungrammar2json
    │   └── ungrammar2json.go
├── lexer_test.go
├── ungrammar.go
├── lexer.go
├── parser.go
└── parser_test.go


/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/eliben/go-ungrammar
2 | 
3 | go 1.22.2
4 | 


--------------------------------------------------------------------------------
/testdata/ungrammar.ungrammar:
--------------------------------------------------------------------------------
 1 | /// ungrammar for ungrammar
 2 | /// copied from https://github.com/rust-analyzer/ungrammar/
 3 | 
 4 | Grammar =
 5 |   Node *
 6 | 
 7 | Node =
 8 |   name:'ident' '=' Rule
 9 | 
10 | Rule =
11 |   'ident'
12 | | 'token_ident'
13 | | Rule *
14 | | Rule ( '|' Rule) *
15 | | Rule '?'
16 | | Rule '*'
17 | | '(' Rule ')'
18 | | label:'ident' ':' Rule
19 | 


--------------------------------------------------------------------------------
/.github/workflows/go.yml:
--------------------------------------------------------------------------------
 1 | name: Run Go tests
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [ "main" ]
 6 |   pull_request:
 7 |     branches: [ "main" ]
 8 | 
 9 | jobs:
10 | 
11 |   build:
12 |     runs-on: ubuntu-latest
13 |     steps:
14 |     - uses: actions/checkout@v3
15 | 
16 |     - name: Set up Go
17 |       uses: actions/setup-go@v3
18 |       with:
19 |         go-version: "1.22.2"
20 | 
21 |     - name: Test
22 |       run: go test -v ./...
23 | 


--------------------------------------------------------------------------------
/testdata/exprlang.ungrammar:
--------------------------------------------------------------------------------
 1 | // Ungrammar for a simple expression language
 2 | 
 3 | Program = Stmt*
 4 | 
 5 | Stmt = AssignStmt | Expr
 6 | 
 7 | AssignStmt = 'set' 'ident' '=' Expr
 8 | 
 9 | Expr =
10 |     Literal
11 |   | UnaryExpr
12 |   | ParenExpr
13 |   | BinExpr
14 | 
15 | UnaryExpr = op:('+' | '-') Expr
16 | 
17 | ParenExpr = '(' Expr ')'
18 | 
19 | BinExpr = lhs:Expr op:('+' | '-' | '*' | '/' | '%') rhs:Expr
20 | 
21 | Literal = 'int_literal' | 'ident'
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | # If you prefer the allow list template instead of the deny list, see community template:
 2 | # https://github.com/github/gitignore/blob/main/community/Golang/Go.AllowList.gitignore
 3 | #
 4 | # Binaries for programs and plugins
 5 | *.exe
 6 | *.exe~
 7 | *.dll
 8 | *.so
 9 | *.dylib
10 | 
11 | # Test binary, built with `go test -c`
12 | *.test
13 | 
14 | # Output of the go coverage tool, specifically when used with LiteIDE
15 | *.out
16 | 
17 | # Dependency directories (remove the comment below to include it)
18 | # vendor/
19 | 
20 | # Go workspace file
21 | go.work
22 | 


--------------------------------------------------------------------------------
/errorlist.go:
--------------------------------------------------------------------------------
 1 | // go-ungrammar: ErrorList type
 2 | //
 3 | // Eli Bendersky [https://eli.thegreenplace.net]
 4 | // This code is in the public domain.
 5 | 
 6 | package ungrammar
 7 | 
 8 | import "fmt"
 9 | 
10 | // ErrorList represents multiple parse errors reported by the parser on a given
11 | // source. It's loosely modeled on scanner.ErrorList in the Go standard library.
12 | // ErrorList implements the error interface.
13 | type ErrorList []error
14 | 
15 | func (el *ErrorList) Add(err error) {
16 | 	*el = append(*el, err)
17 | }
18 | 
19 | func (el ErrorList) Error() string {
20 | 	if len(el) == 0 {
21 | 		return "no errors"
22 | 	} else if len(el) == 1 {
23 | 		return el[0].Error()
24 | 	} else {
25 | 		return fmt.Sprintf("%s (and %d more errors)", el[0], len(el)-1)
26 | 	}
27 | }
28 | 


--------------------------------------------------------------------------------
/example_test.go:
--------------------------------------------------------------------------------
 1 | // go-ungrammar: basic usage example.
 2 | //
 3 | // Eli Bendersky [https://eli.thegreenplace.net]
 4 | // This code is in the public domain.
 5 | 
 6 | package ungrammar_test
 7 | 
 8 | import (
 9 | 	"fmt"
10 | 
11 | 	"github.com/eliben/go-ungrammar"
12 | )
13 | 
14 | func ExampleParser() {
15 | 	input := `
16 | Foo = Bar Baz
17 | Baz = ( Kay Jay )* | 'id'`
18 | 
19 | 	// Create an Ungrammar parser and parse input.
20 | 	p := ungrammar.NewParser(input)
21 | 	ungram, err := p.ParseGrammar()
22 | 	if err != nil {
23 | 		panic(err)
24 | 	}
25 | 
26 | 	// Display the string representation of the parsed ungrammar.
27 | 	fmt.Println(ungram.Rules["Foo"].String())
28 | 	fmt.Println(ungram.Rules["Baz"].String())
29 | 	// Output:
30 | 	// Seq(Bar, Baz)
31 | 	// Alt(Rep(Seq(Kay, Jay)), 'id')
32 | }
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | This is free and unencumbered software released into the public domain.
 2 | 
 3 | Anyone is free to copy, modify, publish, use, compile, sell, or
 4 | distribute this software, either in source code form or as a compiled
 5 | binary, for any purpose, commercial or non-commercial, and by any
 6 | means.
 7 | 
 8 | In jurisdictions that recognize copyright laws, the author or authors
 9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 | 
24 | For more information, please refer to <https://unlicense.org>
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # go-ungrammar
 2 | 
 3 | Ungrammar implementation and API in Go. [Blog post for background](https://eli.thegreenplace.net/2023/ungrammar-in-go-and-resilient-parsing/).
 4 | 
 5 | Ungrammar is a DSL for
 6 | [concrete syntax trees (CST)](https://en.wikipedia.org/wiki/Parse_tree). This implementation is based on the original
 7 | [ungrammar crate](https://github.com/rust-analyzer/ungrammar/), also borrowing
 8 | some test files from it.
 9 | 
10 | ## Ungrammar syntax
11 | 
12 | The syntax of Ungrammar files is very simple:
13 | 
14 | ```
15 | //           -- comment
16 | Name =       -- non-terminal definition
17 | 'ident'      -- token (terminal)
18 | A B          -- sequence
19 | A | B        -- alternation
20 | A*           -- repetition (zero or more)
21 | A?           -- optional (zero or one)
22 | (A B)        -- grouping elements for precedence control
23 | label:A      -- label hint for naming
24 | ```
25 | 
26 | For some concrete examples, look at files in the `testdata` directory.
27 | 
28 | ## Usage
29 | 
30 | [![Go Reference](https://pkg.go.dev/badge/github.com/eliben/go-ungrammar.svg)](https://pkg.go.dev/github.com/eliben/go-ungrammar)
31 | 
32 | Usage example:
33 | 
34 | https://github.com/eliben/go-ungrammar/blob/229d0dd20660980d5069ed676c5c728a9fda5723/example_test.go#L13-L31
35 | 
36 | For somewhat more sophisticated usage, see the `cmd/ungrammar2json` command.
37 | 


--------------------------------------------------------------------------------
/cmd/ungrammar2json/ungrammar2json.go:
--------------------------------------------------------------------------------
 1 | // This program parses an ungrammar file and dumps the ungrammar into JSON
 2 | // format that any tool/language can read.
 3 | //
 4 | // It reads stdin and writes to stdout.
 5 | //
 6 | // The emitted JSON is has minimal whitespace and is not formatted; pipe through
 7 | // `jq .` for a pretty/formatted output.
 8 | //
 9 | // Eli Bendersky [https://eli.thegreenplace.net]
10 | // This code is in the public domain.
11 | 
12 | package main
13 | 
14 | import (
15 | 	"encoding/json"
16 | 	"io"
17 | 	"log"
18 | 	"os"
19 | 
20 | 	"github.com/eliben/go-ungrammar"
21 | )
22 | 
23 | func main() {
24 | 	if len(os.Args) != 1 {
25 | 		log.Fatal("Usage: ungrammar2json < input.ungram")
26 | 	}
27 | 
28 | 	stdinBytes, err := io.ReadAll(os.Stdin)
29 | 	if err != nil {
30 | 		log.Fatal(err)
31 | 	}
32 | 
33 | 	p := ungrammar.NewParser(string(stdinBytes))
34 | 	grammar, err := p.ParseGrammar()
35 | 	if err != nil {
36 | 		log.Fatal("Error parsing ungrammar:", err)
37 | 	}
38 | 
39 | 	grammarObj := make(object)
40 | 	for name, rule := range grammar.Rules {
41 | 		grammarObj[name] = ruleToObj(rule)
42 | 	}
43 | 
44 | 	enc := json.NewEncoder(os.Stdout)
45 | 	if err := enc.Encode(grammarObj); err != nil {
46 | 		log.Fatal("Error encoding to JSON:", err)
47 | 	}
48 | }
49 | 
50 | // object is a map with arbitrary values suitable for JSON encoding.
51 | type object map[string]any
52 | 
53 | func ruleToObj(r ungrammar.Rule) object {
54 | 	switch rr := r.(type) {
55 | 	case *ungrammar.Labeled:
56 | 		return object{"label": rr.Label, "rule": ruleToObj(rr.Rule)}
57 | 	case *ungrammar.Node:
58 | 		return object{"node": rr.Name}
59 | 	case *ungrammar.Token:
60 | 		return object{"token": rr.Value}
61 | 	case *ungrammar.Rep:
62 | 		return object{"rep": ruleToObj(rr.Rule)}
63 | 	case *ungrammar.Opt:
64 | 		return object{"opt": ruleToObj(rr.Rule)}
65 | 	case *ungrammar.Seq:
66 | 		var subRules []object
67 | 		for _, sr := range rr.Rules {
68 | 			subRules = append(subRules, ruleToObj(sr))
69 | 		}
70 | 		return object{"seq": subRules}
71 | 	case *ungrammar.Alt:
72 | 		var subRules []object
73 | 		for _, sr := range rr.Rules {
74 | 			subRules = append(subRules, ruleToObj(sr))
75 | 		}
76 | 		return object{"alt": subRules}
77 | 	default:
78 | 		return nil
79 | 	}
80 | }
81 | 


--------------------------------------------------------------------------------
/lexer_test.go:
--------------------------------------------------------------------------------
  1 | // Eli Bendersky [https://eli.thegreenplace.net]
  2 | // This code is in the public domain.
  3 | 
  4 | package ungrammar
  5 | 
  6 | import (
  7 | 	"testing"
  8 | )
  9 | 
 10 | func TestLexer(t *testing.T) {
 11 | 	const input = `
 12 | someid
 13 | : ? anotherid 'sometok'
 14 | // comment
 15 |                          ( idmore 'tt tt' ) // doc
 16 | 'tt\'q' 'tt\\s'
 17 | |
 18 | `
 19 | 
 20 | 	lex := newLexer(input)
 21 | 	var toks []token
 22 | 
 23 | 	for {
 24 | 		t := lex.nextToken()
 25 | 		toks = append(toks, t)
 26 | 		if t.name == EOF {
 27 | 			break
 28 | 		}
 29 | 	}
 30 | 
 31 | 	wantToks := []token{
 32 | 		token{NODE, "someid", location{2, 1}},
 33 | 		token{COLON, ":", location{3, 1}},
 34 | 		token{QMARK, "?", location{3, 3}},
 35 | 		token{NODE, "anotherid", location{3, 5}},
 36 | 		token{TOKEN, "sometok", location{3, 15}},
 37 | 		token{LPAREN, "(", location{5, 26}},
 38 | 		token{NODE, "idmore", location{5, 28}},
 39 | 		token{TOKEN, "tt tt", location{5, 35}},
 40 | 		token{RPAREN, ")", location{5, 43}},
 41 | 		token{TOKEN, `tt'q`, location{6, 1}},
 42 | 		token{TOKEN, `tt\s`, location{6, 9}},
 43 | 		token{PIPE, "|", location{7, 1}},
 44 | 		token{EOF, "<end of input>", location{8, 0}},
 45 | 	}
 46 | 
 47 | 	if len(wantToks) != len(toks) {
 48 | 		t.Fatalf("length mismatch wantToks=%v, toks=%v", len(wantToks), len(toks))
 49 | 	}
 50 | 	for i := 0; i < len(wantToks); i++ {
 51 | 		if wantToks[i] != toks[i] {
 52 | 			t.Errorf("mismatch at index %2v: got %v, want %v", i, wantToks[i], toks[i])
 53 | 		}
 54 | 	}
 55 | }
 56 | 
 57 | func TestLexerEOF(t *testing.T) {
 58 | 	// Test that we get as many EOF tokens at the end of the input as we ask for.
 59 | 	const input = `:  `
 60 | 	lex := newLexer(input)
 61 | 
 62 | 	if tok := lex.nextToken(); tok.name != COLON {
 63 | 		t.Errorf("got %v, want COLON", tok)
 64 | 	}
 65 | 	for i := 0; i < 10; i++ {
 66 | 		if tok := lex.nextToken(); tok.name != EOF {
 67 | 			t.Errorf("got %v, want EOF", tok)
 68 | 		}
 69 | 	}
 70 | }
 71 | 
 72 | func allTokens(lex *lexer) []token {
 73 | 	var toks []token
 74 | 	for {
 75 | 		t := lex.nextToken()
 76 | 		toks = append(toks, t)
 77 | 		if t.name == EOF {
 78 | 			break
 79 | 		}
 80 | 	}
 81 | 	return toks
 82 | }
 83 | 
 84 | func TestLexerError(t *testing.T) {
 85 | 	var tests = []struct {
 86 | 		input         string
 87 | 		errorIndex    int
 88 | 		errorValue    string
 89 | 		errorLocation location
 90 | 	}{
 91 | 		{`hello $ bye`, 1, `unknown token starting with '$'`, location{1, 7}},
 92 | 		{`hello | $no`, 2, `unknown token starting with '$'`, location{1, 9}},
 93 | 		{`hello | $no @`, 4, `unknown token starting with '@'`, location{1, 13}},
 94 | 		{`he '202020`, 1, `unterminated token literal`, location{1, 4}},
 95 | 	}
 96 | 
 97 | 	for _, tt := range tests {
 98 | 		t.Run(tt.input, func(t *testing.T) {
 99 | 			lex := newLexer(tt.input)
100 | 			toks := allTokens(lex)
101 | 			gotTok := toks[tt.errorIndex]
102 | 			if gotTok.name != ERROR || gotTok.value != tt.errorValue || gotTok.loc != tt.errorLocation {
103 | 				t.Errorf("got token %s, want ERROR with value=%q loc=%v", gotTok, tt.errorValue, tt.errorLocation)
104 | 			}
105 | 		})
106 | 	}
107 | }
108 | 


--------------------------------------------------------------------------------
/ungrammar.go:
--------------------------------------------------------------------------------
  1 | // go-ungrammar: Ungrammar Concrete Syntax Tree (CST).
  2 | //
  3 | // Eli Bendersky [https://eli.thegreenplace.net]
  4 | // This code is in the public domain.
  5 | 
  6 | // package ungrammar provides a parser and representation for Ungrammar
  7 | // concrete syntax trees.
  8 | package ungrammar
  9 | 
 10 | import (
 11 | 	"fmt"
 12 | 	"strings"
 13 | )
 14 | 
 15 | // Grammar represents a parsed Ungrammar file. The input is represented as
 16 | // a mapping between strings (rule names on the left-hand-side of Ungrammar
 17 | // rules) and rules (CST).
 18 | // For example, if we have a rule like "Foo = Bar Baz", the Rules map will
 19 | // contain a mapping between the string "Foo" and the CST
 20 | // Seq(Node(Bar), Node(Baz)).
 21 | type Grammar struct {
 22 | 	// Rules maps ruleName --> Rule
 23 | 	Rules map[string]Rule
 24 | 
 25 | 	// NameLoc maps ruleName --> its location in the input, for accurate error
 26 | 	// reporting. Rules carry their own locations, but since names are just
 27 | 	// strings, locations are kept here.
 28 | 	NameLoc map[string]location
 29 | }
 30 | 
 31 | // Rule is the interface defining an Ungrammar CST subtree. At runtime, a value
 32 | // implemeting the Rule interface will have a concrete type which is one of the
 33 | // exported types in this file.
 34 | type Rule interface {
 35 | 	Location() location
 36 | 	String() string
 37 | }
 38 | 
 39 | type Labeled struct {
 40 | 	Label    string
 41 | 	Rule     Rule
 42 | 	labelLoc location
 43 | }
 44 | 
 45 | type Node struct {
 46 | 	Name    string
 47 | 	nameLoc location
 48 | }
 49 | 
 50 | type Token struct {
 51 | 	Value    string
 52 | 	valueLoc location
 53 | }
 54 | 
 55 | type Seq struct {
 56 | 	Rules []Rule
 57 | }
 58 | 
 59 | type Alt struct {
 60 | 	Rules []Rule
 61 | }
 62 | 
 63 | type Opt struct {
 64 | 	Rule Rule
 65 | }
 66 | 
 67 | type Rep struct {
 68 | 	Rule Rule
 69 | }
 70 | 
 71 | // Location methods
 72 | 
 73 | func (seq *Seq) Location() location {
 74 | 	return seq.Rules[0].Location()
 75 | }
 76 | 
 77 | func (tok *Token) Location() location {
 78 | 	return tok.valueLoc
 79 | }
 80 | 
 81 | func (node *Node) Location() location {
 82 | 	return node.nameLoc
 83 | }
 84 | 
 85 | func (alt *Alt) Location() location {
 86 | 	return alt.Rules[0].Location()
 87 | }
 88 | 
 89 | func (lbl *Labeled) Location() location {
 90 | 	return lbl.labelLoc
 91 | }
 92 | 
 93 | func (opt *Opt) Location() location {
 94 | 	return opt.Rule.Location()
 95 | }
 96 | 
 97 | func (rep *Rep) Location() location {
 98 | 	return rep.Rule.Location()
 99 | }
100 | 
101 | // String methods
102 | 
103 | func (g *Grammar) String() string {
104 | 	var sb strings.Builder
105 | 	for name, rule := range g.Rules {
106 | 		fmt.Fprintf(&sb, "%s: %s\n", name, ruleString(rule))
107 | 	}
108 | 	return sb.String()
109 | }
110 | 
111 | func (lbl *Labeled) String() string {
112 | 	return fmt.Sprintf("%s:%s", lbl.Label, ruleString(lbl.Rule))
113 | }
114 | 
115 | func (node *Node) String() string {
116 | 	return node.Name
117 | }
118 | 
119 | func (tok *Token) String() string {
120 | 	return fmt.Sprintf("'%s'", tok.Value)
121 | }
122 | 
123 | func (seq *Seq) String() string {
124 | 	var parts []string
125 | 	for _, r := range seq.Rules {
126 | 		parts = append(parts, ruleString(r))
127 | 	}
128 | 	return fmt.Sprintf("Seq(%v)", strings.Join(parts, ", "))
129 | }
130 | 
131 | func (alt *Alt) String() string {
132 | 	var parts []string
133 | 	for _, r := range alt.Rules {
134 | 		parts = append(parts, ruleString(r))
135 | 	}
136 | 	return fmt.Sprintf("Alt(%v)", strings.Join(parts, ", "))
137 | }
138 | 
139 | func (opt *Opt) String() string {
140 | 	return fmt.Sprintf("Opt(%s)", ruleString(opt.Rule))
141 | }
142 | 
143 | func (rep *Rep) String() string {
144 | 	return fmt.Sprintf("Rep(%s)", ruleString(rep.Rule))
145 | }
146 | 
147 | // ruleString returns a Rule's String() representation, or <nil> if r == nil.
148 | func ruleString(r Rule) string {
149 | 	if r == nil {
150 | 		return "<nil>"
151 | 	} else {
152 | 		return r.String()
153 | 	}
154 | }
155 | 


--------------------------------------------------------------------------------
/lexer.go:
--------------------------------------------------------------------------------
  1 | // go-ungrammar: lexical analyzer.
  2 | //
  3 | // Eli Bendersky [https://eli.thegreenplace.net]
  4 | // This code is in the public domain.
  5 | 
  6 | package ungrammar
  7 | 
  8 | import (
  9 | 	"fmt"
 10 | 	"strings"
 11 | 	"unicode/utf8"
 12 | )
 13 | 
 14 | // token represents a Ungrammar language token - it has a name (one of the
 15 | // constants declared below), string value and a location.
 16 | //
 17 | // The term "token" is slightly overloaded in this file; in Ungrammar, a quoted
 18 | // string literal is also called a "Token" -- this is just one of the kinds of
 19 | // tokens this lexer returns.
 20 | type token struct {
 21 | 	name  tokenName
 22 | 	value string
 23 | 	loc   location
 24 | }
 25 | 
 26 | type location struct {
 27 | 	line   int
 28 | 	column int
 29 | }
 30 | 
 31 | func (loc location) String() string {
 32 | 	return fmt.Sprintf("%v:%v", loc.line, loc.column)
 33 | }
 34 | 
 35 | type tokenName int
 36 | 
 37 | const (
 38 | 	// Special tokens
 39 | 	ERROR tokenName = iota
 40 | 	EOF
 41 | 
 42 | 	NODE
 43 | 	TOKEN
 44 | 
 45 | 	EQ
 46 | 	STAR
 47 | 	PIPE
 48 | 	QMARK
 49 | 	COLON
 50 | 	LPAREN
 51 | 	RPAREN
 52 | )
 53 | 
 54 | var tokenNames = [...]string{
 55 | 	ERROR: "ERROR",
 56 | 	EOF:   "EOF",
 57 | 
 58 | 	NODE:  "NODE",
 59 | 	TOKEN: "TOKEN",
 60 | 
 61 | 	EQ:     "EQ",
 62 | 	STAR:   "STAR",
 63 | 	PIPE:   "PIPE",
 64 | 	QMARK:  "QMARK",
 65 | 	COLON:  "COLON",
 66 | 	LPAREN: "LPAREN",
 67 | 	RPAREN: "RPAREN",
 68 | }
 69 | 
 70 | func (tok token) String() string {
 71 | 	return fmt.Sprintf("token{%s, '%s', %s}", tokenNames[tok.name], tok.value, tok.loc)
 72 | }
 73 | 
 74 | // lexer provides lexical scanning of text into Ungrammar tokens.
 75 | //
 76 | // Create a new lexer with newLexer and then call nextToken repeatedly to get
 77 | // tokens from the stream. The lexer will return an EOF token when done.
 78 | type lexer struct {
 79 | 	buf string
 80 | 
 81 | 	// Current rune.
 82 | 	r rune
 83 | 
 84 | 	// Offset of the current rune in buf.
 85 | 	rpos int
 86 | 
 87 | 	// Offset of the next rune in buf.
 88 | 	nextpos int
 89 | 
 90 | 	// location of r
 91 | 	loc location
 92 | }
 93 | 
 94 | // newLexer creates a new lexer for the given string.
 95 | func newLexer(buf string) *lexer {
 96 | 	lex := lexer{
 97 | 		buf:     buf,
 98 | 		r:       -1,
 99 | 		rpos:    0,
100 | 		nextpos: 0,
101 | 
102 | 		// column starts at 0 since advace() always increments it before we have
103 | 		// the first rune in r
104 | 		loc: location{1, 0},
105 | 	}
106 | 
107 | 	lex.advance()
108 | 	return &lex
109 | }
110 | 
111 | // nextToken returns the next token in the input string.
112 | func (lex *lexer) nextToken() token {
113 | 	lex.skipNontokens()
114 | 
115 | 	rloc := lex.loc
116 | 	if lex.r < 0 {
117 | 		return token{EOF, "<end of input>", rloc}
118 | 	} else if isIdChar(lex.r) {
119 | 		return lex.scanNode()
120 | 	}
121 | 
122 | 	switch lex.r {
123 | 	case '\'':
124 | 		return lex.scanQuoted()
125 | 	case '=':
126 | 		lex.advance()
127 | 		return token{EQ, "=", rloc}
128 | 	case '*':
129 | 		lex.advance()
130 | 		return token{STAR, "*", rloc}
131 | 	case '?':
132 | 		lex.advance()
133 | 		return token{QMARK, "?", rloc}
134 | 	case '(':
135 | 		lex.advance()
136 | 		return token{LPAREN, "(", rloc}
137 | 	case ')':
138 | 		lex.advance()
139 | 		return token{RPAREN, ")", rloc}
140 | 	case '|':
141 | 		lex.advance()
142 | 		return token{PIPE, "|", rloc}
143 | 	case ':':
144 | 		lex.advance()
145 | 		return token{COLON, ":", rloc}
146 | 	default:
147 | 		errtok := lex.emitError(fmt.Sprintf("unknown token starting with %q", lex.r), rloc)
148 | 		lex.advance()
149 | 		return errtok
150 | 	}
151 | }
152 | 
153 | // advance the lexer's internal state to point to the next rune in the
154 | // input. advance is responsible for maintaining the main invariant of the
155 | // lexer: at any point after advance has been called at least once, lex.r
156 | // is the current token the lexer is looking at; lex.rpos is its offset
157 | // the string and lex.loc is its location. lex.nextpost is the offset of the
158 | // next token in the input. When the end of the input is reached, lex.r
159 | // becomes EOF.
160 | func (lex *lexer) advance() {
161 | 	if lex.nextpos < len(lex.buf) {
162 | 		lex.rpos = lex.nextpos
163 | 		r, w := rune(lex.buf[lex.nextpos]), 1
164 | 
165 | 		if r >= utf8.RuneSelf {
166 | 			r, w = utf8.DecodeRuneInString(lex.buf[lex.nextpos:])
167 | 		}
168 | 
169 | 		lex.nextpos += w
170 | 		lex.r = r
171 | 		lex.loc.column += 1
172 | 	} else {
173 | 		lex.rpos = len(lex.buf)
174 | 		lex.r = -1 // EOF
175 | 	}
176 | }
177 | 
178 | // peekNext looks at the next rune in the input, after lex.r. It only works
179 | // correctly for rune values < 128.
180 | func (lex *lexer) peekNext() rune {
181 | 	if lex.nextpos < len(lex.buf) {
182 | 		return rune(lex.buf[lex.nextpos])
183 | 	} else {
184 | 		return -1
185 | 	}
186 | }
187 | 
188 | func (lex *lexer) emitError(msg string, loc location) token {
189 | 	return token{
190 | 		name:  ERROR,
191 | 		value: msg,
192 | 		loc:   loc,
193 | 	}
194 | }
195 | 
196 | func (lex *lexer) skipNontokens() {
197 | 	for {
198 | 		switch lex.r {
199 | 		case ' ', '\t', '\r':
200 | 			lex.advance()
201 | 		case '\n':
202 | 			lex.loc.line++
203 | 			// Set column to 0 because advance() immediately increments it
204 | 			lex.loc.column = 0
205 | 			lex.advance()
206 | 		case '/':
207 | 			if lex.peekNext() == '/' {
208 | 				lex.skipLineComment()
209 | 			}
210 | 		default:
211 | 			return
212 | 		}
213 | 	}
214 | }
215 | 
216 | func (lex *lexer) skipLineComment() {
217 | 	for lex.r != '\n' && lex.r > 0 {
218 | 		lex.advance()
219 | 	}
220 | }
221 | 
222 | func (lex *lexer) scanNode() token {
223 | 	startloc := lex.loc
224 | 	startpos := lex.rpos
225 | 	for isIdChar(lex.r) {
226 | 		lex.advance()
227 | 	}
228 | 	return token{NODE, lex.buf[startpos:lex.rpos], startloc}
229 | }
230 | 
231 | func (lex *lexer) scanQuoted() token {
232 | 	startloc := lex.loc
233 | 	lex.advance() // skip leading quote
234 | 	var tokbuf strings.Builder
235 | 	for {
236 | 		if lex.r == '\'' {
237 | 			lex.advance()
238 | 			return token{TOKEN, tokbuf.String(), startloc}
239 | 		} else if lex.r == -1 {
240 | 			return lex.emitError("unterminated token literal", startloc)
241 | 		} else if lex.r == '\\' {
242 | 			// Skip the backslash and write the rune following it into the buffer.
243 | 			lex.advance()
244 | 			tokbuf.WriteRune(lex.r)
245 | 		} else {
246 | 			tokbuf.WriteRune(lex.r)
247 | 		}
248 | 		lex.advance()
249 | 	}
250 | }
251 | 
252 | func isIdChar(r rune) bool {
253 | 	if r >= 256 {
254 | 		return false
255 | 	}
256 | 
257 | 	const mask = 0 |
258 | 		(1<<26-1)<<'A' |
259 | 		(1<<26-1)<<'a' |
260 | 		1<<'_'
261 | 
262 | 	b := byte(r)
263 | 	return (uint64(1)<<b)&(mask&(1<<64-1))|(uint64(1)<<(b-64))&(mask>>64) != 0
264 | }
265 | 


--------------------------------------------------------------------------------
/parser.go:
--------------------------------------------------------------------------------
  1 | // go-ungrammar: parser.
  2 | //
  3 | // Eli Bendersky [https://eli.thegreenplace.net]
  4 | // This code is in the public domain.
  5 | 
  6 | package ungrammar
  7 | 
  8 | import "fmt"
  9 | 
 10 | // Parser parses ungrammar syntax into a Grammar. Create a new parser with
 11 | // NewParser, and then call its ParseGrammar method.
 12 | type Parser struct {
 13 | 	lex *lexer
 14 | 
 15 | 	tok     token
 16 | 	nextTok token
 17 | 
 18 | 	errs ErrorList
 19 | }
 20 | 
 21 | // NewParser creates a new parser with the given string input.
 22 | func NewParser(buf string) *Parser {
 23 | 	p := &Parser{
 24 | 		lex:  newLexer(buf),
 25 | 		errs: nil,
 26 | 	}
 27 | 
 28 | 	p.tok = p.lex.nextToken()
 29 | 	p.nextTok = p.lex.nextToken()
 30 | 	return p
 31 | }
 32 | 
 33 | // ParseGrammar takes the input the Parser was initialized with and parses it
 34 | // into a Grammar. It returns an ErrorList which collects all the errors
 35 | // encountered during parsing, and in case of errors the returned Grammar may be
 36 | // partial.
 37 | func (p *Parser) ParseGrammar() (*Grammar, error) {
 38 | 	rules := make(map[string]Rule)
 39 | 	locs := make(map[string]location)
 40 | 	for !p.eof() {
 41 | 		name, location, rule := p.parseNamedRule()
 42 | 		if rule != nil {
 43 | 			if _, found := rules[name]; found {
 44 | 				p.emitError(location, fmt.Sprintf("duplicate rule name %v", name))
 45 | 			}
 46 | 			rules[name] = rule
 47 | 			locs[name] = location
 48 | 		}
 49 | 	}
 50 | 
 51 | 	grammar := &Grammar{
 52 | 		Rules:   rules,
 53 | 		NameLoc: locs,
 54 | 	}
 55 | 
 56 | 	if len(p.errs) > 0 {
 57 | 		return grammar, p.errs
 58 | 	} else {
 59 | 		return grammar, nil
 60 | 	}
 61 | }
 62 | 
 63 | // advance returns the current token and consumes it (the next call to advance
 64 | // will return the next token in the stream, etc.)
 65 | func (p *Parser) advance() token {
 66 | 	tok := p.tok
 67 | 	if tok.name == EOF {
 68 | 		return tok
 69 | 	}
 70 | 
 71 | 	// Shift the lookahead "buffer"
 72 | 	p.tok = p.nextTok
 73 | 	p.nextTok = p.lex.nextToken()
 74 | 	return tok
 75 | }
 76 | 
 77 | func (p *Parser) eof() bool {
 78 | 	return p.tok.name == EOF
 79 | }
 80 | 
 81 | // parseNamedRule parses a top-level named rule: Node '=' <rule>, and returns
 82 | // its name, the location of the name and the rule itself. It returns an empty
 83 | // name and rule if the parser doesn't currently point to a rule.
 84 | func (p *Parser) parseNamedRule() (string, location, Rule) {
 85 | 	tok := p.tok
 86 | 	if tok.name == NODE {
 87 | 		p.advance()
 88 | 		if p.tok.name == EQ {
 89 | 			p.advance()
 90 | 			rule := p.parseAlt()
 91 | 			return tok.value, tok.loc, rule
 92 | 		}
 93 | 	}
 94 | 
 95 | 	// If we're here, a named rule was not found.
 96 | 	p.emitError(tok.loc, fmt.Sprintf("expected named rule, got %v", tok.value))
 97 | 	p.synchronize()
 98 | 	return "", location{}, nil
 99 | }
100 | 
101 | // parseAlt parses a top-level rule, the LHS of Node '=' <Rule>. It's
102 | // potentially a '|'-seprated alternation of sequences.
103 | func (p *Parser) parseAlt() Rule {
104 | 	alts := []Rule{p.parseSeq()}
105 | 	for p.tok.name == PIPE {
106 | 		p.advance()
107 | 		alts = append(alts, p.parseSeq())
108 | 	}
109 | 	if len(alts) == 1 {
110 | 		return alts[0]
111 | 	} else {
112 | 		return &Alt{alts}
113 | 	}
114 | }
115 | 
116 | // parseSeq parses a sequence of single rules.
117 | func (p *Parser) parseSeq() Rule {
118 | 	sr := p.parseSingleRule()
119 | 	if sr == nil {
120 | 		p.emitError(p.tok.loc, fmt.Sprintf("expected rule, got %v", p.tok.value))
121 | 		p.synchronize()
122 | 		return nil
123 | 	}
124 | 	seq := []Rule{sr}
125 | 
126 | 	for {
127 | 		sr = p.parseSingleRule()
128 | 		if sr == nil {
129 | 			break
130 | 		}
131 | 		seq = append(seq, sr)
132 | 	}
133 | 	if len(seq) == 1 {
134 | 		return seq[0]
135 | 	} else {
136 | 		return &Seq{seq}
137 | 	}
138 | }
139 | 
140 | // parseSingleRule parses a single rule atom that's potentially followed by
141 | // a '?' or '*' quantifier. It can return nil if there are no more single
142 | // rules to parse.
143 | //
144 | // The Ungrammar grammar contains an ambiguity, since named rules are not
145 | // terminated explicitly, consider:
146 | //
147 | //	Foo = Bar Baz
148 | //	Bob = Rob
149 | //
150 | // After "Foo =" we parse a sequence of Bar, Baz, but then we see Bob, which
151 | // shouldn't be in the sequence, but rather start a new named rule. When we
152 | // parse a single rule, we look ahead for a '=' and bail if it's found, leaving
153 | // "Bob =" to a higher-level parser. In that case, nil is returned.
154 | func (p *Parser) parseSingleRule() Rule {
155 | 	atom := p.parseSingleRuleAtom()
156 | 	if atom == nil {
157 | 		return nil
158 | 	}
159 | 	if p.tok.name == QMARK {
160 | 		p.advance()
161 | 		return &Opt{atom}
162 | 	} else if p.tok.name == STAR {
163 | 		p.advance()
164 | 		return &Rep{atom}
165 | 	}
166 | 	return atom
167 | }
168 | 
169 | // parseSingleRuleAtom parses a single rule atom - either a node, token, a
170 | // labeled rule, or a rule in parentheses. See the comment on parseSingleRule
171 | // for the grammar ambiguity this has to handle.
172 | func (p *Parser) parseSingleRuleAtom() Rule {
173 | 	switch p.tok.name {
174 | 	case NODE:
175 | 		// Lookahead to see if this is actually the beginning of the next top-level
176 | 		// rule definition, and bail if yes.
177 | 		if p.nextTok.name == EQ {
178 | 			return nil
179 | 		} else if p.nextTok.name == COLON {
180 | 			labelTok := p.advance()
181 | 			// This is a labeled rule and the label is now in labelTok.
182 | 			// Skip the colon.
183 | 			p.advance()
184 | 			r := p.parseSingleRule()
185 | 			if r == nil {
186 | 				p.emitError(p.tok.loc, fmt.Sprintf("expected rule after label, got %v", p.tok.value))
187 | 				p.synchronize()
188 | 			}
189 | 			return &Labeled{
190 | 				Label:    labelTok.value,
191 | 				Rule:     r,
192 | 				labelLoc: labelTok.loc,
193 | 			}
194 | 		} else {
195 | 			tok := p.tok
196 | 			p.advance()
197 | 			return &Node{
198 | 				Name:    tok.value,
199 | 				nameLoc: tok.loc,
200 | 			}
201 | 		}
202 | 	case TOKEN:
203 | 		tok := p.tok
204 | 		p.advance()
205 | 		return &Token{
206 | 			Value:    tok.value,
207 | 			valueLoc: tok.loc,
208 | 		}
209 | 	case LPAREN:
210 | 		// Consume '(' and parse the full rule
211 | 		p.advance()
212 | 		r := p.parseAlt()
213 | 
214 | 		// Expect closing ')', but return the rule anyway if we don't find it.
215 | 		if p.tok.name != RPAREN {
216 | 			p.emitError(p.tok.loc, fmt.Sprintf("expected ')', got %v", p.tok.value))
217 | 			p.synchronize()
218 | 			return r
219 | 		}
220 | 
221 | 		// Consume ')'
222 | 		p.advance()
223 | 		return r
224 | 	case ERROR:
225 | 		p.emitError(p.tok.loc, p.tok.value)
226 | 		p.synchronize()
227 | 	}
228 | 	return nil
229 | }
230 | 
231 | // synchronize consumes tokens until it finds a safe place to restart parsing.
232 | // It tries to find the next Node '=' where a new named rule can be defined.
233 | func (p *Parser) synchronize() {
234 | 	for !p.eof() {
235 | 		if p.tok.name == NODE && p.nextTok.name == EQ {
236 | 			return
237 | 		}
238 | 		p.advance()
239 | 	}
240 | }
241 | 
242 | func (p *Parser) emitError(loc location, msg string) {
243 | 	p.errs.Add(fmt.Errorf("%s: %s", loc, msg))
244 | }
245 | 


--------------------------------------------------------------------------------
/parser_test.go:
--------------------------------------------------------------------------------
  1 | // Eli Bendersky [https://eli.thegreenplace.net]
  2 | // This code is in the public domain.
  3 | 
  4 | package ungrammar
  5 | 
  6 | import (
  7 | 	"fmt"
  8 | 	"os"
  9 | 	"path/filepath"
 10 | 	"slices"
 11 | 	"sort"
 12 | 	"strings"
 13 | 	"testing"
 14 | )
 15 | 
 16 | // Tests parsing without errors
 17 | func TestParserTable(t *testing.T) {
 18 | 	var tests = []struct {
 19 | 		input     string
 20 | 		wantRules []string
 21 | 	}{
 22 | 		// Basic rules
 23 | 		{`x = mynode`, []string{`x: mynode`}},
 24 | 		{`x = (mynode)`, []string{`x: mynode`}},
 25 | 		{`x = mynode*`, []string{`x: Rep(mynode)`}},
 26 | 		{`x = mynode?`, []string{`x: Opt(mynode)`}},
 27 | 		{`x = 'atok'`, []string{`x: 'atok'`}},
 28 | 		{`x = lab:mynode`, []string{`x: lab:mynode`}},
 29 | 		{`x = node 'tok'`, []string{`x: Seq(node, 'tok')`}},
 30 | 		{`x = foo | bar`, []string{`x: Alt(foo, bar)`}},
 31 | 
 32 | 		// Multiple alts/seqs
 33 | 		{`x = a | b | c | d | e | f`, []string{`x: Alt(a, b, c, d, e, f)`}},
 34 | 		{`x = a b c   d  e     f`, []string{`x: Seq(a, b, c, d, e, f)`}},
 35 | 
 36 | 		// Precedence between Seq and Alt and using (...)
 37 | 		{`x = n | t p`, []string{`x: Alt(n, Seq(t, p))`}},
 38 | 		{`x = n i | t p | i b`, []string{`x: Alt(Seq(n, i), Seq(t, p), Seq(i, b))`}},
 39 | 		{`x = (n | t) p`, []string{`x: Seq(Alt(n, t), p)`}},
 40 | 		{`x = (n | t) p v w | y`, []string{`x: Alt(Seq(Alt(n, t), p, v, w), y)`}},
 41 | 		{`x = (n | t)? p`, []string{`x: Seq(Opt(Alt(n, t)), p)`}},
 42 | 		{`x = (n | t)? p *`, []string{`x: Seq(Opt(Alt(n, t)), Rep(p))`}},
 43 | 
 44 | 		// Misc. nesting
 45 | 		{`x = (lab:Path '::')? labb:Seg`, []string{`x: Seq(Opt(Seq(lab:Path, '::')), labb:Seg)`}},
 46 | 		{`x = '=='? 't' (n (',' n)* ','?)? 't'`, []string{`x: Seq(Opt('=='), 't', Opt(Seq(n, Rep(Seq(',', n)), Opt(','))), 't')`}},
 47 | 
 48 | 		// Multiple rules
 49 | 		{`x = a b y = d`, []string{`x: Seq(a, b)`, `y: d`}},
 50 | 		{`x = a b c
 51 | 		  y = d | t
 52 | 			z = 'tok'`,
 53 | 			[]string{`x: Seq(a, b, c)`, `y: Alt(d, t)`, `z: 'tok'`}},
 54 | 		{`x =
 55 | 			  lab:Rule 'tok'
 56 | 
 57 | 			Rule =
 58 | 			    'tok'
 59 | 			  | Rule '*'`,
 60 | 			[]string{`x: Seq(lab:Rule, 'tok')`, `Rule: Alt('tok', Seq(Rule, '*'))`}},
 61 | 
 62 | 		// Expected parsing of ungrammar.ungrammar
 63 | 		{
 64 | 			readFileOrPanic(filepath.Join("testdata", "ungrammar.ungrammar")),
 65 | 			[]string{
 66 | 				`Grammar: Rep(Node)`,
 67 | 				`Node: Seq(name:'ident', '=', Rule)`,
 68 | 				`Rule: Alt('ident', 'token_ident', Rep(Rule), Seq(Rule, Rep(Seq('|', Rule))), Seq(Rule, '?'), Seq(Rule, '*'), Seq('(', Rule, ')'), Seq(label:'ident', ':', Rule))`,
 69 | 			},
 70 | 		},
 71 | 
 72 | 		{
 73 | 			readFileOrPanic(filepath.Join("testdata", "exprlang.ungrammar")),
 74 | 			[]string{
 75 | 				`AssignStmt: Seq('set', 'ident', '=', Expr)`,
 76 | 				`BinExpr: Seq(lhs:Expr, op:Alt('+', '-', '*', '/', '%'), rhs:Expr)`,
 77 | 				`Expr: Alt(Literal, UnaryExpr, ParenExpr, BinExpr)`,
 78 | 				`Literal: Alt('int_literal', 'ident')`,
 79 | 				`ParenExpr: Seq('(', Expr, ')')`,
 80 | 				`Program: Rep(Stmt)`,
 81 | 				`Stmt: Alt(AssignStmt, Expr)`,
 82 | 				`UnaryExpr: Seq(op:Alt('+', '-'), Expr)`,
 83 | 			},
 84 | 		},
 85 | 	}
 86 | 
 87 | 	for _, tt := range tests {
 88 | 		t.Run(tt.input, func(t *testing.T) {
 89 | 			p := NewParser(tt.input)
 90 | 			g, err := p.ParseGrammar()
 91 | 			if err != nil {
 92 | 				t.Error(err)
 93 | 			}
 94 | 			gotRules := grammarToStrings(g)
 95 | 
 96 | 			sort.Strings(tt.wantRules)
 97 | 			if !slices.Equal(gotRules, tt.wantRules) {
 98 | 				t.Errorf("mismatch got != want:\n%v", displaySliceDiff(gotRules, tt.wantRules))
 99 | 			}
100 | 		})
101 | 	}
102 | }
103 | 
104 | // Check that we can read/parse the full rust.ungrammar without errors, and
105 | // perform basic sanity checking.
106 | func TestRustUngrammarFile(t *testing.T) {
107 | 	contents := readFileOrPanic(filepath.Join("testdata", "rust.ungrammar"))
108 | 	p := NewParser(string(contents))
109 | 	g, err := p.ParseGrammar()
110 | 	if err != nil {
111 | 		t.Error(err)
112 | 	}
113 | 	rules := grammarToStrings(g)
114 | 
115 | 	// Sanity check: the expected number of rules, and the first and last rules
116 | 	// match (note that they are first/last in string-sorted order).
117 | 	if len(rules) != 143 {
118 | 		t.Errorf("grammar got %v rules, want 143", len(g.Rules))
119 | 	}
120 | 
121 | 	want0 := `Abi: Seq('extern', Opt('string'))`
122 | 	if rules[0] != want0 {
123 | 		t.Errorf("rule 0 got %v, want %v", rules[0], want0)
124 | 	}
125 | 	want142 := `YieldExpr: Seq(Rep(Attr), 'yield', Opt(Expr))`
126 | 	if rules[142] != want142 {
127 | 		t.Errorf("rule 142 got %v, want %v", rules[142], want142)
128 | 	}
129 | }
130 | 
131 | func TestLocations(t *testing.T) {
132 | 	input := `
133 | x = foo | bar
134 | y = a b?`
135 | 
136 | 	p := NewParser(input)
137 | 	g, err := p.ParseGrammar()
138 | 	if err != nil {
139 | 		t.Error(err)
140 | 	}
141 | 
142 | 	xrule := g.Rules["x"]
143 | 	xalt := xrule.(*Alt)
144 | 	yrule := g.Rules["y"]
145 | 	yseq := yrule.(*Seq)
146 | 	yseq1opt := yseq.Rules[1].(*Opt)
147 | 
148 | 	var tests = []struct {
149 | 		name          string
150 | 		loc           location
151 | 		wantLocString string
152 | 	}{
153 | 		{"x name", g.NameLoc["x"], "2:1"},
154 | 		{"x rule", xrule.Location(), "2:5"},
155 | 		{"y name", g.NameLoc["y"], "3:1"},
156 | 		{"x alt 0", xalt.Rules[0].Location(), "2:5"},
157 | 		{"x alt 1", xalt.Rules[1].Location(), "2:11"},
158 | 		{"y seq 0", yseq.Rules[0].Location(), "3:5"},
159 | 		{"y seq 1", yseq.Rules[1].Location(), "3:7"},
160 | 		{"y seq 1 opt", yseq1opt.Location(), "3:7"},
161 | 		{"y seq 1 opt rule", yseq1opt.Rule.Location(), "3:7"},
162 | 	}
163 | 
164 | 	for _, tt := range tests {
165 | 		t.Run(tt.name, func(t *testing.T) {
166 | 			if tt.loc.String() != tt.wantLocString {
167 | 				t.Errorf("got %v, want %v", tt.loc.String(), tt.wantLocString)
168 | 			}
169 | 		})
170 | 	}
171 | }
172 | 
173 | // Test error handling and parser recovery. The parser will try to make progress
174 | // even in face of errors, returning partial results while errors persist.
175 | func TestParseErrors(t *testing.T) {
176 | 	var tests = []struct {
177 | 		input      string
178 | 		wantRules  []string
179 | 		wantErrors []string
180 | 	}{
181 | 		// Missing a named rule
182 | 		{`foo bar`, []string{}, []string{"1:1: expected named rule, got foo"}},
183 | 
184 | 		// Missing alternation content, partial tree created with error
185 | 		{`x = a | | b`, []string{`x: Alt(a, <nil>)`}, []string{"1:9: expected rule, got |"}},
186 | 
187 | 		// Missing closing ')' before new rule, but both rules created
188 | 		{`x = ( a b t = foo`, []string{`t: foo`, `x: Seq(a, b)`}, []string{"1:11: expected ')', got t"}},
189 | 
190 | 		// Recovery after spurious '='
191 | 		{`x = = foo`, []string{}, []string{"1:5: expected rule, got ="}},
192 | 		{`x = = foo = y`, []string{`foo: y`}, []string{"1:5: expected rule, got ="}},
193 | 
194 | 		// Duplicate rule name
195 | 		{`x = a b   x = y z`, []string{`x: Seq(y, z)`}, []string{`1:11: duplicate rule name x`}},
196 | 
197 | 		// Lexer errors
198 | 		{`x = a @   y = t`, []string{`x: a`, `y: t`}, []string{"1:7: unknown token starting with '@'"}},
199 | 		{`x = a b 'two   y = t`, []string{`x: Seq(a, b)`}, []string{"1:9: unterminated token literal"}},
200 | 
201 | 		// Multiple errors
202 | 		{`x = a @ y = t z = ( k`, []string{`x: a`, `y: t`, `z: k`}, []string{`1:7: unknown token starting with '@'`, `1:21: expected ')', got <end of input>`}},
203 | 	}
204 | 
205 | 	for _, tt := range tests {
206 | 		t.Run(tt.input, func(t *testing.T) {
207 | 			p := NewParser(tt.input)
208 | 			g, err := p.ParseGrammar()
209 | 			gotRules := grammarToStrings(g)
210 | 
211 | 			sort.Strings(tt.wantRules)
212 | 			if !slices.Equal(gotRules, tt.wantRules) {
213 | 				t.Errorf("rules mismatch got != want:\n%v", displaySliceDiff(gotRules, tt.wantRules))
214 | 			}
215 | 
216 | 			if err == nil {
217 | 				t.Error("expected errors, got nil")
218 | 			}
219 | 			errlist := err.(ErrorList)
220 | 			var gotErrors []string
221 | 			for _, err := range errlist {
222 | 				gotErrors = append(gotErrors, err.Error())
223 | 			}
224 | 
225 | 			if !slices.Equal(gotErrors, tt.wantErrors) {
226 | 				fmt.Println(gotErrors, tt.wantErrors)
227 | 				t.Errorf("errors mismatch got != want:\n%v", displaySliceDiff(gotErrors, tt.wantErrors))
228 | 			}
229 | 		})
230 | 	}
231 | }
232 | 
233 | // Test the message received when multiple errors are present
234 | func TestMultipleErrorsMessage(t *testing.T) {
235 | 	// This has two errors:
236 | 	//   - encountering the first |
237 | 	//   - unterminated '('
238 | 	input := `
239 | foo = |
240 | bar = ( joe
241 | x = y`
242 | 
243 | 	p := NewParser(input)
244 | 	_, err := p.ParseGrammar()
245 | 	wantErr := "2:7: expected rule, got | (and 1 more errors)"
246 | 	if err.Error() != wantErr {
247 | 		t.Errorf("got %v, want %v", err.Error(), wantErr)
248 | 	}
249 | }
250 | 
251 | // A single isolated test useful for debugging the parser.
252 | func TestIsolated(t *testing.T) {
253 | 	input := `x = = foo = x`
254 | 	p := NewParser(input)
255 | 	g, err := p.ParseGrammar()
256 | 
257 | 	if len(g.Rules) != 1 {
258 | 		t.Errorf("got %v rules, want 1", len(g.Rules))
259 | 	}
260 | 	if err == nil {
261 | 		t.Error("got no error, want error")
262 | 	}
263 | }
264 | 
265 | func TestIsolatedErrors(t *testing.T) {
266 | 	input := `
267 | foo = @
268 | bar = ( joe
269 | x = y`
270 | 	p := NewParser(input)
271 | 	g, err := p.ParseGrammar()
272 | 
273 | 	gotRules := grammarToStrings(g)
274 | 
275 | 	if len(gotRules) != 2 {
276 | 		t.Errorf("got %v rules, want 2", len(gotRules))
277 | 	}
278 | 	errlist := err.(ErrorList)
279 | 	var gotErrors []string
280 | 	for _, err := range errlist {
281 | 		gotErrors = append(gotErrors, err.Error())
282 | 	}
283 | 	if len(errlist) != 3 {
284 | 		t.Errorf("got %v errors, want 3", len(errlist))
285 | 	}
286 | }
287 | 
288 | // grammarToStrings takes a Grammar's string representation and splits it into
289 | // a sorted slice of strings (one per top-level rule) suitable for testing.
290 | func grammarToStrings(g *Grammar) []string {
291 | 	if len(g.String()) == 0 {
292 | 		return []string{}
293 | 	}
294 | 	ss := strings.Split(strings.TrimRight(g.String(), "\n"), "\n")
295 | 	sort.Strings(ss)
296 | 	return ss
297 | }
298 | 
299 | // readFileOrPanic reads the given file's contents and returns them as a string.
300 | // In case of an error, it panics.
301 | func readFileOrPanic(filename string) string {
302 | 	contents, err := os.ReadFile(filename)
303 | 	if err != nil {
304 | 		panic(err)
305 | 	}
306 | 	return string(contents)
307 | }
308 | 
309 | // displaySliceDiff displays a diff between two slices in a way that's
310 | // readable in test output.
311 | func displaySliceDiff[T any](got []T, want []T) string {
312 | 	maxLen := 0
313 | 	for _, g := range got {
314 | 		gs := fmt.Sprintf("%v", g)
315 | 		maxLen = max(maxLen + 1, len(gs))
316 | 	}
317 | 
318 | 	var sb strings.Builder
319 | 	fmt.Fprintf(&sb, "%-*v      %v\n", maxLen, "got", "want")
320 | 
321 | 	for i := 0; i < max(len(got), len(want)); i++ {
322 | 		var sgot string
323 | 		if i < len(got) {
324 | 			sgot = fmt.Sprintf("%v", got[i])
325 | 		}
326 | 
327 | 		var swant string
328 | 		if i < len(want) {
329 | 			swant = fmt.Sprintf("%v", want[i])
330 | 		}
331 | 
332 | 		sign := "  "
333 | 		if swant != sgot {
334 | 			sign = "!="
335 | 		}
336 | 		fmt.Fprintf(&sb, "%-*v  %v  %v\n", maxLen, sgot, sign, swant)
337 | 	}
338 | 	return sb.String()
339 | }
340 | 


--------------------------------------------------------------------------------
/testdata/rust.ungrammar:
--------------------------------------------------------------------------------
  1 | /// copied from https://github.com/rust-analyzer/ungrammar/
  2 | 
  3 | // Rust Un-Grammar.
  4 | //
  5 | // This grammar specifies the structure of Rust's concrete syntax tree.
  6 | // It does not specify parsing rules (ambiguities, precedence, etc are out of scope).
  7 | // Tokens are processed -- contextual keywords are recognised, compound operators glued.
  8 | //
  9 | // Legend:
 10 | //
 11 | //   //          -- comment
 12 | //   Name =      -- non-terminal definition
 13 | //   'ident'     -- token (terminal)
 14 | //   A B         -- sequence
 15 | //   A | B       -- alternation
 16 | //   A*          -- zero or more repetition
 17 | //   A?          -- zero or one repetition
 18 | //   (A)         -- same as A
 19 | //   label:A     -- suggested name for field of AST node
 20 | 
 21 | //*************************//
 22 | // Names, Paths and Macros //
 23 | //*************************//
 24 | 
 25 | Name =
 26 |   'ident' | 'self'
 27 | 
 28 | NameRef =
 29 |   'ident' | 'int_number' | 'self' | 'super' | 'crate' | 'Self'
 30 | 
 31 | Lifetime =
 32 |   'lifetime_ident'
 33 | 
 34 | Path =
 35 |   (qualifier:Path '::')? segment:PathSegment
 36 | 
 37 | PathSegment =
 38 |   '::'? NameRef
 39 | | NameRef GenericArgList?
 40 | | NameRef ParamList RetType?
 41 | | '<' PathType ('as' PathType)? '>'
 42 | 
 43 | GenericArgList =
 44 |   '::'? '<' (GenericArg (',' GenericArg)* ','?)? '>'
 45 | 
 46 | GenericArg =
 47 |   TypeArg
 48 | | AssocTypeArg
 49 | | LifetimeArg
 50 | | ConstArg
 51 | 
 52 | TypeArg =
 53 |   Type
 54 | 
 55 | AssocTypeArg =
 56 |   NameRef GenericParamList? (':' TypeBoundList | '=' Type)
 57 | 
 58 | LifetimeArg =
 59 |   Lifetime
 60 | 
 61 | ConstArg =
 62 |   Expr
 63 | 
 64 | MacroCall =
 65 |   Attr* Path '!' TokenTree ';'?
 66 | 
 67 | TokenTree =
 68 |   '(' ')'
 69 | | '{' '}'
 70 | | '[' ']'
 71 | 
 72 | MacroItems =
 73 |   Item*
 74 | 
 75 | MacroStmts =
 76 |   statements:Stmt*
 77 |   Expr?
 78 | 
 79 | //*************************//
 80 | //          Items          //
 81 | //*************************//
 82 | 
 83 | SourceFile =
 84 |   'shebang'?
 85 |   Attr*
 86 |   Item*
 87 | 
 88 | Item =
 89 |   Const
 90 | | Enum
 91 | | ExternBlock
 92 | | ExternCrate
 93 | | Fn
 94 | | Impl
 95 | | MacroCall
 96 | | MacroRules
 97 | | MacroDef
 98 | | Module
 99 | | Static
100 | | Struct
101 | | Trait
102 | | TypeAlias
103 | | Union
104 | | Use
105 | 
106 | MacroRules =
107 |   Attr* Visibility?
108 |   'macro_rules' '!' Name
109 |   TokenTree
110 | 
111 | MacroDef =
112 |   Attr* Visibility?
113 |   'macro' Name args:TokenTree?
114 |   body:TokenTree
115 | 
116 | Module =
117 |   Attr* Visibility?
118 |   'mod' Name
119 |   (ItemList | ';')
120 | 
121 | ItemList =
122 |   '{' Attr* Item* '}'
123 | 
124 | ExternCrate =
125 |   Attr* Visibility?
126 |   'extern' 'crate' NameRef Rename? ';'
127 | 
128 | Rename =
129 |   'as' (Name | '_')
130 | 
131 | Use =
132 |   Attr* Visibility?
133 |   'use' UseTree ';'
134 | 
135 | UseTree =
136 |   (Path? '::')? ('*' | UseTreeList)
137 | | Path Rename?
138 | 
139 | UseTreeList =
140 |   '{' (UseTree (',' UseTree)* ','?)? '}'
141 | 
142 | Fn =
143 |  Attr* Visibility?
144 |  'default'? 'const'? 'async'? 'unsafe'? Abi?
145 |  'fn' Name GenericParamList? ParamList RetType? WhereClause?
146 |  (body:BlockExpr | ';')
147 | 
148 | Abi =
149 |   'extern' 'string'?
150 | 
151 | ParamList =
152 |   '('(
153 |     SelfParam
154 |   | (SelfParam ',')? (Param (',' Param)* ','?)?
155 |   )')'
156 | | '|' (Param (',' Param)* ','?)? '|'
157 | 
158 | SelfParam =
159 |   Attr* (
160 |     ('&' Lifetime?)? 'mut'? Name
161 |   | 'mut'? Name ':' Type
162 |   )
163 | 
164 | Param =
165 |   Attr* (
166 |     Pat (':' Type)?
167 |   | Type
168 |   | '...'
169 |   )
170 | 
171 | RetType =
172 |   '->' Type
173 | 
174 | TypeAlias =
175 |   Attr* Visibility?
176 |   'default'?
177 |   'type' Name GenericParamList? (':' TypeBoundList?)? WhereClause?
178 |   ('=' Type)? ';'
179 | 
180 | Struct =
181 |   Attr* Visibility?
182 |   'struct' Name GenericParamList? (
183 |     WhereClause? (RecordFieldList | ';')
184 |   | TupleFieldList WhereClause? ';'
185 |   )
186 | 
187 | RecordFieldList =
188 |  '{' fields:(RecordField (',' RecordField)* ','?)? '}'
189 | 
190 | RecordField =
191 |   Attr* Visibility?
192 |   Name ':' Type
193 | 
194 | TupleFieldList =
195 |   '(' fields:(TupleField (',' TupleField)* ','?)? ')'
196 | 
197 | TupleField =
198 |   Attr* Visibility?
199 |   Type
200 | 
201 | FieldList =
202 |   RecordFieldList
203 | | TupleFieldList
204 | 
205 | Enum =
206 |   Attr* Visibility?
207 |   'enum' Name GenericParamList? WhereClause?
208 |   VariantList
209 | 
210 | VariantList =
211 |  '{' (Variant (',' Variant)* ','?)? '}'
212 | 
213 | Variant =
214 |   Attr* Visibility?
215 |   Name FieldList? ('=' Expr)?
216 | 
217 | Union =
218 |   Attr* Visibility?
219 |   'union' Name GenericParamList? WhereClause?
220 |   RecordFieldList
221 | 
222 | // A Data Type.
223 | //
224 | // Not used directly in the grammar, but handy to have anyway.
225 | Adt =
226 |   Enum
227 | | Struct
228 | | Union
229 | 
230 | Const =
231 |   Attr* Visibility?
232 |   'default'?
233 |   'const' (Name | '_') ':' Type
234 |   ('=' body:Expr)? ';'
235 | 
236 | Static =
237 |   Attr* Visibility?
238 |   'static' 'mut'? Name ':' Type
239 |   ('=' body:Expr)? ';'
240 | 
241 | Trait =
242 |   Attr* Visibility?
243 |   'unsafe'? 'auto'?
244 |   'trait' Name GenericParamList? (':' TypeBoundList?)? WhereClause?
245 |   AssocItemList
246 | 
247 | AssocItemList =
248 |   '{' Attr* AssocItem* '}'
249 | 
250 | AssocItem =
251 |   Const
252 | | Fn
253 | | MacroCall
254 | | TypeAlias
255 | 
256 | Impl =
257 |   Attr* Visibility?
258 |   'default'? 'unsafe'?
259 |   'impl' GenericParamList? ('const'? '!'? trait:Type 'for')? self_ty:Type WhereClause?
260 |   AssocItemList
261 | 
262 | ExternBlock =
263 |   Attr* 'unsafe'? Abi ExternItemList
264 | 
265 | ExternItemList =
266 |   '{' Attr* ExternItem* '}'
267 | 
268 | ExternItem =
269 |   Fn
270 | | MacroCall
271 | | Static
272 | | TypeAlias
273 | 
274 | GenericParamList =
275 |   '<' (GenericParam (',' GenericParam)* ','?)? '>'
276 | 
277 | GenericParam =
278 |   ConstParam
279 | | LifetimeParam
280 | | TypeParam
281 | 
282 | TypeParam =
283 |   Attr* Name (':' TypeBoundList?)?
284 |   ('=' default_type:Type)?
285 | 
286 | ConstParam =
287 |   Attr* 'const' Name ':' Type
288 |   ('=' default_val:Expr)?
289 | 
290 | LifetimeParam =
291 |   Attr* Lifetime (':' TypeBoundList?)?
292 | 
293 | WhereClause =
294 |   'where' predicates:(WherePred (',' WherePred)* ','?)
295 | 
296 | WherePred =
297 |   ('for' GenericParamList)?  (Lifetime | Type) ':' TypeBoundList?
298 | 
299 | Visibility =
300 |   'pub' ('(' 'in'? Path ')')?
301 | 
302 | Attr =
303 |   '#' '!'? '[' Meta ']'
304 | 
305 | Meta =
306 |   Path ('=' Expr | TokenTree)?
307 | 
308 | //****************************//
309 | // Statements and Expressions //
310 | //****************************//
311 | 
312 | Stmt =
313 |   ';'
314 | | ExprStmt
315 | | Item
316 | | LetStmt
317 | 
318 | LetStmt =
319 |   Attr* 'let' Pat (':' Type)?
320 |   '=' initializer:Expr
321 |   LetElse?
322 |   ';'
323 | 
324 | LetElse =
325 |   'else' BlockExpr
326 | 
327 | ExprStmt =
328 |   Expr ';'?
329 | 
330 | Expr =
331 |   ArrayExpr
332 | | AwaitExpr
333 | | BinExpr
334 | | BlockExpr
335 | | BoxExpr
336 | | BreakExpr
337 | | CallExpr
338 | | CastExpr
339 | | ClosureExpr
340 | | ContinueExpr
341 | | FieldExpr
342 | | ForExpr
343 | | IfExpr
344 | | IndexExpr
345 | | Literal
346 | | LoopExpr
347 | | MacroCall
348 | | MacroStmts
349 | | MatchExpr
350 | | MethodCallExpr
351 | | ParenExpr
352 | | PathExpr
353 | | PrefixExpr
354 | | RangeExpr
355 | | RecordExpr
356 | | RefExpr
357 | | ReturnExpr
358 | | TryExpr
359 | | TupleExpr
360 | | WhileExpr
361 | | YieldExpr
362 | | LetExpr
363 | | UnderscoreExpr
364 | 
365 | Literal =
366 |   Attr* value:(
367 |     'int_number' | 'float_number'
368 |   | 'string' | 'raw_string'
369 |   | 'byte_string' | 'raw_byte_string'
370 |   | 'true' | 'false'
371 |   | 'char' | 'byte'
372 |   )
373 | 
374 | PathExpr =
375 |   Attr* Path
376 | 
377 | StmtList =
378 |   '{'
379 |     Attr*
380 |     statements:Stmt*
381 |     tail_expr:Expr?
382 |   '}'
383 | 
384 | RefExpr =
385 |   Attr* '&' ('raw' | 'mut' | 'const') Expr
386 | 
387 | TryExpr =
388 |   Attr* Expr '?'
389 | 
390 | BlockExpr =
391 |   Attr* Label? ('try' | 'unsafe' | 'async' | 'const') StmtList
392 | 
393 | PrefixExpr =
394 |   Attr* op:('-' | '!' | '*') Expr
395 | 
396 | BinExpr =
397 |   Attr*
398 |   lhs:Expr
399 |   op:(
400 |     '||' | '&&'
401 |   | '==' | '!=' | '<=' | '>=' | '<' | '>'
402 |   | '+' | '*' | '-' | '/' | '%' | '<<' | '>>' | '^' | '|' | '&'
403 |   | '=' | '+=' | '/=' | '*=' | '%=' | '>>=' | '<<=' | '-=' | '|=' | '&=' | '^='
404 |   )
405 |   rhs:Expr
406 | 
407 | CastExpr =
408 |   Attr* Expr 'as' Type
409 | 
410 | ParenExpr =
411 |   Attr* '(' Attr* Expr ')'
412 | 
413 | ArrayExpr =
414 |   Attr* '[' Attr* (
415 |     (Expr (',' Expr)* ','?)?
416 |   | Expr ';' Expr
417 |   ) ']'
418 | 
419 | IndexExpr =
420 |   Attr* base:Expr '[' index:Expr ']'
421 | 
422 | TupleExpr =
423 |   Attr* '(' Attr* fields:(Expr (',' Expr)* ','?)? ')'
424 | 
425 | RecordExpr =
426 |   Path RecordExprFieldList
427 | 
428 | RecordExprFieldList =
429 |   '{'
430 |     Attr*
431 |     fields:(RecordExprField (',' RecordExprField)* ','?)?
432 |     ('..' spread:Expr?)?
433 |   '}'
434 | 
435 | RecordExprField =
436 |   Attr* (NameRef ':')? Expr
437 | 
438 | CallExpr =
439 |   Attr* Expr ArgList
440 | 
441 | ArgList =
442 |   '(' args:(Expr (',' Expr)* ','?)? ')'
443 | 
444 | MethodCallExpr =
445 |   Attr* receiver:Expr '.' NameRef GenericArgList? ArgList
446 | 
447 | FieldExpr =
448 |   Attr* Expr '.' NameRef
449 | 
450 | ClosureExpr =
451 |   Attr* 'static'? 'async'? 'move'?  ParamList RetType?
452 |   body:Expr
453 | 
454 | IfExpr =
455 |   Attr* 'if' condition:Expr then_branch:BlockExpr
456 |   ('else' else_branch:(IfExpr | BlockExpr))?
457 | 
458 | LoopExpr =
459 |   Attr* Label? 'loop'
460 |   loop_body:BlockExpr
461 | 
462 | ForExpr =
463 |   Attr* Label? 'for' Pat 'in' iterable:Expr
464 |   loop_body:BlockExpr
465 | 
466 | WhileExpr =
467 |   Attr* Label? 'while' condition:Expr
468 |   loop_body:BlockExpr
469 | 
470 | Label =
471 |   Lifetime ':'
472 | 
473 | BreakExpr =
474 |   Attr* 'break' Lifetime? Expr?
475 | 
476 | ContinueExpr =
477 |   Attr* 'continue' Lifetime?
478 | 
479 | RangeExpr =
480 |   Attr* start:Expr? op:('..' | '..=') end:Expr?
481 | 
482 | MatchExpr =
483 |   Attr* 'match' Expr MatchArmList
484 | 
485 | MatchArmList =
486 |   '{'
487 |     Attr*
488 |     arms:MatchArm*
489 |   '}'
490 | 
491 | MatchArm =
492 |   Attr* Pat guard:MatchGuard? '=>' Expr ','?
493 | 
494 | MatchGuard =
495 |   'if' condition:Expr
496 | 
497 | ReturnExpr =
498 |   Attr* 'return' Expr?
499 | 
500 | YieldExpr =
501 |   Attr* 'yield' Expr?
502 | 
503 | LetExpr =
504 |   Attr* 'let' Pat '=' Expr
505 | 
506 | UnderscoreExpr =
507 |   Attr* '_'
508 | 
509 | AwaitExpr =
510 |   Attr* Expr '.' 'await'
511 | 
512 | BoxExpr =
513 |   Attr* 'box' Expr
514 | 
515 | //*************************//
516 | //          Types          //
517 | //*************************//
518 | 
519 | Type =
520 |   ArrayType
521 | | DynTraitType
522 | | FnPtrType
523 | | ForType
524 | | ImplTraitType
525 | | InferType
526 | | MacroType
527 | | NeverType
528 | | ParenType
529 | | PathType
530 | | PtrType
531 | | RefType
532 | | SliceType
533 | | TupleType
534 | 
535 | ParenType =
536 |   '(' Type ')'
537 | 
538 | NeverType =
539 |   '!'
540 | 
541 | MacroType =
542 |   MacroCall
543 | 
544 | PathType =
545 |   Path
546 | 
547 | TupleType =
548 |   '(' fields:(Type (',' Type)* ','?)? ')'
549 | 
550 | PtrType =
551 |   '*' ('const' | 'mut') Type
552 | 
553 | RefType =
554 |   '&' Lifetime? 'mut'? Type
555 | 
556 | ArrayType =
557 |   '[' Type ';' Expr ']'
558 | 
559 | SliceType =
560 |   '[' Type ']'
561 | 
562 | InferType =
563 |   '_'
564 | 
565 | FnPtrType =
566 |   'const'? 'async'? 'unsafe'? Abi? 'fn' ParamList RetType?
567 | 
568 | ForType =
569 |   'for' GenericParamList Type
570 | 
571 | ImplTraitType =
572 |   'impl' TypeBoundList
573 | 
574 | DynTraitType =
575 |   'dyn' TypeBoundList
576 | 
577 | TypeBoundList =
578 |   bounds:(TypeBound ('+' TypeBound)* '+'?)
579 | 
580 | TypeBound =
581 |   Lifetime
582 | | ('?' | '~' 'const')? Type
583 | 
584 | //************************//
585 | //        Patterns        //
586 | //************************//
587 | 
588 | Pat =
589 |   IdentPat
590 | | BoxPat
591 | | RestPat
592 | | LiteralPat
593 | | MacroPat
594 | | OrPat
595 | | ParenPat
596 | | PathPat
597 | | WildcardPat
598 | | RangePat
599 | | RecordPat
600 | | RefPat
601 | | SlicePat
602 | | TuplePat
603 | | TupleStructPat
604 | | ConstBlockPat
605 | 
606 | LiteralPat =
607 |   Literal
608 | 
609 | IdentPat =
610 |   Attr* 'ref'? 'mut'? Name ('@' Pat)?
611 | 
612 | WildcardPat =
613 |   '_'
614 | 
615 | RangePat =
616 |   // 1..
617 |   start:Pat op:('..' | '..=')
618 |   // 1..2
619 |   | start:Pat op:('..' | '..=') end:Pat
620 |   // ..2
621 |   | op:('..' | '..=') end:Pat
622 | 
623 | RefPat =
624 |   '&' 'mut'? Pat
625 | 
626 | RecordPat =
627 |   Path RecordPatFieldList
628 | 
629 | RecordPatFieldList =
630 |   '{'
631 |     fields:(RecordPatField (',' RecordPatField)* ','?)?
632 |     RestPat?
633 |   '}'
634 | 
635 | RecordPatField =
636 |   Attr* (NameRef ':')? Pat
637 | 
638 | TupleStructPat =
639 |    Path '(' fields:(Pat (',' Pat)* ','?)? ')'
640 | 
641 | TuplePat =
642 |    '(' fields:(Pat (',' Pat)* ','?)? ')'
643 | 
644 | ParenPat =
645 |   '(' Pat ')'
646 | 
647 | SlicePat =
648 |   '[' (Pat (',' Pat)* ','?)? ']'
649 | 
650 | PathPat =
651 |   Path
652 | 
653 | OrPat =
654 |   (Pat ('|' Pat)* '|'?)
655 | 
656 | BoxPat =
657 |   'box' Pat
658 | 
659 | RestPat =
660 |   Attr* '..'
661 | 
662 | MacroPat =
663 |   MacroCall
664 | 
665 | ConstBlockPat =
666 |   'const' BlockExpr
667 | 


--------------------------------------------------------------------------------