├── .gitignore ├── LICENSE ├── README.md ├── ast.go ├── cmd └── peglint │ ├── README.md │ └── peglint.go ├── examples_test.go ├── expr.go ├── ope.go ├── ope_test.go ├── parser.go ├── parser_test.go ├── rule.go └── visitor.go /.gitignore: -------------------------------------------------------------------------------- 1 | *.swp 2 | .vscode 3 | go-peg.test 4 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2016 yhirose 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | go-peg 2 | ====== 3 | 4 | **NOTE**: This library is **deprecated**. Please use [**cpp-peglib**](https://github.com/yhirose/cpp-peglib) instead. 5 | 6 | Yet another [PEG](http://en.wikipedia.org/wiki/Parsing_expression_grammar) (Parsing Expression Grammars) parser generator for Go. 7 | 8 | If you need a PEG grammar checker, you may want to check [**peglint**](https://github.com/yhirose/go-peg/tree/master/cmd/peglint). 9 | 10 | If you need a C++ version, please see [*cpp-peglib*](https://github.com/yhirose/cpp-peglib). 11 | 12 | ### Extended features 13 | 14 | * Token operator: `<` `>` 15 | * Automatic whitespace skipping: `%whitespace` 16 | * Expression parsing for binary operators ([precedence climbing method](https://en.wikipedia.org/wiki/Operator-precedence_parser#Precedence_climbing_method)) 17 | * Parameterized rule or Macro 18 | * Word expression: `%word` 19 | * AST generation 20 | 21 | ### Usage 22 | 23 | ```go 24 | // Create a PEG parser 25 | parser, _ := NewParser(` 26 | # Simple calculator 27 | EXPR ← ATOM (BINOP ATOM)* 28 | ATOM ← NUMBER / '(' EXPR ')' 29 | BINOP ← < [-+/*] > 30 | NUMBER ← < [0-9]+ > 31 | %whitespace ← [ \t]* 32 | --- 33 | # Expression parsing option 34 | %expr = EXPR # Rule to apply 'precedence climbing method' to 35 | %binop = L + - # Precedence level 1 36 | %binop = L * / # Precedence level 2 37 | `) 38 | 39 | // Setup semantic actions 40 | g := parser.Grammar 41 | g["EXPR"].Action = func(v *Values, d Any) (Any, error) { 42 | val := v.ToInt(0) 43 | if v.Len() > 1 { 44 | ope := v.ToStr(1) 45 | rhs := v.ToInt(2) 46 | switch ope { 47 | case "+": val += rhs 48 | case "-": val -= rhs 49 | case "*": val *= rhs 50 | case "/": val /= rhs 51 | } 52 | } 53 | return val, nil 54 | } 55 | g["BINOP"].Action = func(v *Values, d Any) (Any, error) { 56 | return v.Token(), nil 57 | } 58 | g["NUMBER"].Action = func(v *Values, d Any) (Any, error) { 59 | return strconv.Atoi(v.Token()) 60 | } 61 | 62 | // Parse 63 | input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 " 64 | val, _ := parser.ParseAndGetValue(input, nil) 65 | 66 | fmt.Println(val) // Output: -3 67 | ``` 68 | 69 | Parameterized Rule or Macro 70 | --------------------------- 71 | 72 | ```peg 73 | # Syntax 74 | Start ← _ Expr 75 | Expr ← Sum 76 | Sum ← List(Product, SumOpe) 77 | Product ← List(Value, ProOpe) 78 | Value ← Number / T('(') Expr T(')') 79 | 80 | # Token 81 | SumOpe ← T('+' / '-') 82 | ProOpe ← T('*' / '/') 83 | Number ← T([0-9]+) 84 | ~_ ← [ \t\r\n]* 85 | 86 | # Macro 87 | List(I, D) ← I (D I)* 88 | T(x) ← < x > _ 89 | ``` 90 | 91 | Word expression 92 | --------------- 93 | 94 | ```go 95 | parser, _ := NewParser(` 96 | ROOT ← 'hello' 'world' 97 | %whitespace ← [ \t\r\n]* 98 | %word ← [a-z]+ 99 | `) 100 | 101 | parser.Parse("hello world", nil) # OK 102 | parser.Parse("helloworld", nil) # NG 103 | ``` 104 | 105 | AST generation 106 | -------------- 107 | 108 | ```go 109 | // Create a PEG parser 110 | parser, _ := NewParser(` 111 | EXPRESSION <- TERM (TERM_OPERATOR TERM)* 112 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 113 | FACTOR <- NUMBER / '(' EXPRESSION ')' 114 | TERM_OPERATOR <- < [-+] > 115 | FACTOR_OPERATOR <- < [/*] > 116 | NUMBER <- < [0-9]+ > 117 | %whitespace <- [ \t\r\n]* 118 | `) 119 | 120 | // Evaluator 121 | var eval func(ast *Ast) int 122 | eval = func(ast *Ast) int { 123 | if ast.Name == "NUMBER" { 124 | val, _ := strconv.Atoi(ast.Token) 125 | return val 126 | } else { 127 | nodes := ast.Nodes 128 | val := eval(nodes[0]) 129 | for i := 1; i < len(nodes); i += 2 { 130 | num := eval(nodes[i+1]) 131 | ope := nodes[i].Token[0] 132 | switch ope { 133 | case '+': 134 | val += num 135 | break 136 | case '-': 137 | val -= num 138 | break 139 | case '*': 140 | val *= num 141 | break 142 | case '/': 143 | val /= num 144 | break 145 | } 146 | } 147 | return val 148 | } 149 | } 150 | 151 | // Generate AST 152 | parser.EnableAst() 153 | input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 " 154 | ret, _ := parser.ParseAndGetValue(input, nil) 155 | ast := ret.(*Ast) 156 | 157 | // Optimize AST 158 | opt := NewAstOptimizer(nil) 159 | ast = opt.Optimize(ast, nil) 160 | 161 | // Evaluate AST 162 | val := eval(ast) 163 | 164 | fmt.Println(val) // Output: -3 165 | ``` 166 | 167 | TODO 168 | ---- 169 | 170 | * Better error handling 171 | * Memoization (Packrat parsing) 172 | 173 | License 174 | ------- 175 | 176 | MIT license (© 2016 Yuji Hirose) 177 | -------------------------------------------------------------------------------- /ast.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | ) 7 | 8 | type Ast struct { 9 | //Path string 10 | Ln int 11 | Col int 12 | S string 13 | Name string 14 | Token string 15 | Nodes []*Ast 16 | Parent *Ast 17 | Data interface{} 18 | } 19 | 20 | func (ast *Ast) String() string { 21 | return astToS(ast, "", 0) 22 | } 23 | 24 | func astToS(ast *Ast, s string, level int) string { 25 | for i := 0; i < level; i++ { 26 | s = s + " " 27 | } 28 | if len(ast.Token) > 0 { 29 | if ast.Data != nil { 30 | s = fmt.Sprintf("%s- %s (%s) [%v]\n", s, ast.Name, strconv.Quote(ast.Token), ast.Data) 31 | } else { 32 | s = fmt.Sprintf("%s- %s (%s)\n", s, ast.Name, strconv.Quote(ast.Token)) 33 | } 34 | } else { 35 | if ast.Data != nil { 36 | s = fmt.Sprintf("%s+ %s [%v]\n", s, ast.Name, ast.Data) 37 | } else { 38 | s = fmt.Sprintf("%s+ %s\n", s, ast.Name) 39 | } 40 | } 41 | for _, node := range ast.Nodes { 42 | s = astToS(node, s, level+1) 43 | } 44 | return s 45 | } 46 | 47 | func (p *Parser) EnableAst() (err error) { 48 | for name, rule := range p.Grammar { 49 | nm := name 50 | if rule.isToken() { 51 | rule.Action = func(v *Values, d Any) (Any, error) { 52 | ln, col := lineInfo(v.SS, v.Pos) 53 | ast := &Ast{Ln: ln, Col: col, S: v.S, Name: nm, Token: v.Token()} 54 | return ast, nil 55 | } 56 | } else { 57 | rule.Action = func(v *Values, d Any) (Any, error) { 58 | ln, col := lineInfo(v.SS, v.Pos) 59 | 60 | var nodes []*Ast 61 | for _, val := range v.Vs { 62 | nodes = append(nodes, val.(*Ast)) 63 | } 64 | 65 | ast := &Ast{Ln: ln, Col: col, S: v.S, Name: nm, Nodes: nodes} 66 | for _, node := range nodes { 67 | node.Parent = ast 68 | } 69 | 70 | return ast, nil 71 | } 72 | } 73 | } 74 | 75 | return err 76 | } 77 | 78 | func (p *Parser) ParseAndGetAst(s string, d Any) (*Ast, error) { 79 | val, err := p.ParseAndGetValue(s, d) 80 | if err != nil { 81 | return nil, err 82 | } 83 | return val.(*Ast), nil 84 | } 85 | 86 | type AstOptimizer struct { 87 | exceptions []string 88 | } 89 | 90 | func NewAstOptimizer(exceptions []string) *AstOptimizer { 91 | return &AstOptimizer{exceptions} 92 | } 93 | 94 | func (o *AstOptimizer) Optimize(org *Ast, par *Ast) *Ast { 95 | opt := true 96 | for _, name := range o.exceptions { 97 | if name == org.Name { 98 | opt = false 99 | } 100 | } 101 | 102 | if opt && len(org.Nodes) == 1 { 103 | chl := o.Optimize(org.Nodes[0], par) 104 | return chl 105 | } 106 | 107 | ast := &Ast{ 108 | Ln: org.Ln, 109 | Col: org.Col, 110 | S: org.S, 111 | Name: org.Name, 112 | Token: org.Token, 113 | Parent: par, 114 | Data: org.Data, 115 | } 116 | for _, node := range org.Nodes { 117 | chl := o.Optimize(node, ast) 118 | ast.Nodes = append(ast.Nodes, chl) 119 | } 120 | return ast 121 | } 122 | -------------------------------------------------------------------------------- /cmd/peglint/README.md: -------------------------------------------------------------------------------- 1 | peglint 2 | ------- 3 | 4 | The lint utility for PEG. 5 | 6 | ``` 7 | usage: peglint [-ast] [-opt] [-trace] [-f path] [-s string] [grammar path] 8 | ``` 9 | 10 | peglint checks syntax of a given PEG grammar file and reports errors. If the check is successful and a user gives a source file for the grammar, it will also check syntax of the source file. 11 | 12 | The -ast flag prints the AST (abstract syntax tree) of the source file. 13 | 14 | The -opt flag prints the optimized AST (abstract syntax tree) of the source file. 15 | 16 | The -trace flag can be used with the source file. It prints names of rules and operators that the PEG parser detects on standard error. 17 | 18 | The -f 'path' specifies a file path to the source text. 19 | 20 | The -s 'string' specifies the source text. 21 | -------------------------------------------------------------------------------- /cmd/peglint/peglint.go: -------------------------------------------------------------------------------- 1 | package main 2 | 3 | import ( 4 | "flag" 5 | "fmt" 6 | "io/ioutil" 7 | "os" 8 | "runtime/pprof" 9 | 10 | "github.com/yhirose/go-peg" 11 | ) 12 | 13 | var usageMessage = `usage: peglint [-ast] [-opt] [-trace] [-f path] [-s string] [grammar path] 14 | 15 | peglint checks syntax of a given PEG grammar file and reports errors. If the check is successful and a user gives a source file for the grammar, it will also check syntax of the source file. 16 | 17 | The -ast flag prints the AST (abstract syntax tree) of the source file. 18 | 19 | The -opt flag prints the optimized AST (abstract syntax tree) of the source file. 20 | 21 | The -trace flag can be used with the source file. It prints names of rules and operators that the PEG parser detects on standard error. 22 | 23 | The -f 'path' specifies a file path to the source text. 24 | 25 | The -s 'string' specifies the source text. 26 | ` 27 | 28 | func usage() { 29 | fmt.Fprintf(os.Stderr, usageMessage) 30 | os.Exit(1) 31 | } 32 | 33 | var ( 34 | astFlag = flag.Bool("ast", false, "show ast") 35 | optFlag = flag.Bool("opt", false, "show optimized ast") 36 | traceFlag = flag.Bool("trace", false, "show trace message") 37 | sourceFilePath = flag.String("f", "", "source file path") 38 | sourceString = flag.String("s", "", "source string") 39 | profPath = flag.String("prof", "", "write cpu profile to file") 40 | ) 41 | 42 | func check(err error) { 43 | if err != nil { 44 | fmt.Fprintln(os.Stderr, err) 45 | os.Exit(1) 46 | } 47 | } 48 | 49 | func pcheck(err error) { 50 | if perr, ok := err.(*peg.Error); ok { 51 | for _, d := range perr.Details { 52 | fmt.Println(d) 53 | } 54 | os.Exit(1) 55 | } 56 | } 57 | 58 | func SetupTracer(p *peg.Parser) { 59 | indent := func(level int) string { 60 | s := "" 61 | for level > 0 { 62 | s = s + " " 63 | level-- 64 | } 65 | return s 66 | } 67 | 68 | fmt.Println("pos:lev\trule/ope") 69 | fmt.Println("-------\t--------") 70 | 71 | level := 0 72 | prevPos := 0 73 | 74 | p.TracerEnter = func(name string, s string, v *peg.Values, d peg.Any, p int) { 75 | var backtrack string 76 | if p < prevPos { 77 | backtrack = "*" 78 | } 79 | fmt.Printf("%d:%d%s\t%s%s\n", p, level, backtrack, indent(level), name) 80 | prevPos = p 81 | level++ 82 | } 83 | 84 | p.TracerLeave = func(name string, s string, v *peg.Values, d peg.Any, p int, l int) { 85 | level-- 86 | } 87 | } 88 | 89 | func main() { 90 | flag.Usage = usage 91 | flag.Parse() 92 | args := flag.Args() 93 | 94 | if len(args) < 1 { 95 | usage() 96 | } 97 | 98 | dat, err := ioutil.ReadFile(args[0]) 99 | check(err) 100 | 101 | parser, err := peg.NewParser(string(dat)) 102 | pcheck(err) 103 | 104 | var source string 105 | 106 | if *sourceFilePath != "" { 107 | if *sourceFilePath == "-" { 108 | dat, err := ioutil.ReadAll(os.Stdin) 109 | check(err) 110 | source = string(dat) 111 | } else { 112 | dat, err := ioutil.ReadFile(*sourceFilePath) 113 | check(err) 114 | source = string(dat) 115 | } 116 | } 117 | 118 | if *sourceString != "" { 119 | source = *sourceString 120 | } 121 | 122 | if len(source) > 0 { 123 | if *traceFlag { 124 | SetupTracer(parser) 125 | } 126 | 127 | if *astFlag || *optFlag { 128 | parser.EnableAst() 129 | } 130 | 131 | if *profPath != "" { 132 | f, err := os.Create(*profPath) 133 | check(err) 134 | pprof.StartCPUProfile(f) 135 | defer pprof.StopCPUProfile() 136 | } 137 | 138 | val, err := parser.ParseAndGetValue(source, nil) 139 | pcheck(err) 140 | 141 | if *astFlag || *optFlag { 142 | ast := val.(*peg.Ast) 143 | if *optFlag { 144 | opt := peg.NewAstOptimizer(nil) 145 | ast = opt.Optimize(ast, nil) 146 | } 147 | fmt.Println(ast) 148 | } 149 | } 150 | } 151 | -------------------------------------------------------------------------------- /examples_test.go: -------------------------------------------------------------------------------- 1 | package peg_test 2 | 3 | import ( 4 | "fmt" 5 | "strconv" 6 | 7 | . "github.com/yhirose/go-peg" 8 | ) 9 | 10 | func Example() { 11 | // Create a PEG parser 12 | parser, _ := NewParser(` 13 | # Grammar for simple calculator... 14 | EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* 15 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 16 | FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ 17 | TERM_OPERATOR <- < [-+] > _ 18 | FACTOR_OPERATOR <- < [/*] > _ 19 | NUMBER <- < [0-9]+ > _ 20 | ~_ <- [ \t]* 21 | `) 22 | 23 | // Setup actions 24 | reduce := func(v *Values, d Any) (Any, error) { 25 | val := v.ToInt(0) 26 | for i := 1; i < len(v.Vs); i += 2 { 27 | num := v.ToInt(i + 1) 28 | switch v.ToStr(i) { 29 | case "+": 30 | val += num 31 | case "-": 32 | val -= num 33 | case "*": 34 | val *= num 35 | case "/": 36 | val /= num 37 | } 38 | } 39 | return val, nil 40 | } 41 | 42 | g := parser.Grammar 43 | g["EXPRESSION"].Action = reduce 44 | g["TERM"].Action = reduce 45 | g["TERM_OPERATOR"].Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil } 46 | g["FACTOR_OPERATOR"].Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil } 47 | g["NUMBER"].Action = func(v *Values, d Any) (Any, error) { return strconv.Atoi(v.Token()) } 48 | 49 | // Parse 50 | input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 " 51 | val, _ := parser.ParseAndGetValue(input, nil) 52 | 53 | fmt.Println(val) 54 | // Output: -3 55 | } 56 | 57 | func Example_combinators() { 58 | // Grammar 59 | var EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER Rule 60 | 61 | EXPRESSION.Ope = Seq(&TERM, Zom(Seq(&TERM_OPERATOR, &TERM))) 62 | TERM.Ope = Seq(&FACTOR, Zom(Seq(&FACTOR_OPERATOR, &FACTOR))) 63 | FACTOR.Ope = Cho(&NUMBER, Seq(Lit("("), &EXPRESSION, Lit(")"))) 64 | TERM_OPERATOR.Ope = Seq(Tok(Cls("-+"))) 65 | FACTOR_OPERATOR.Ope = Seq(Tok(Cls("/*"))) 66 | NUMBER.Ope = Seq(Tok(Oom(Cls("0-9")))) 67 | 68 | EXPRESSION.WhitespaceOpe = Zom(Cls(" \t")) 69 | 70 | // Actions 71 | reduce := func(v *Values, d Any) (Any, error) { 72 | ret := v.ToInt(0) 73 | for i := 1; i < len(v.Vs); i += 2 { 74 | ope := v.ToStr(i) 75 | n := v.ToInt(i + 1) 76 | switch ope { 77 | case "+": 78 | ret += n 79 | case "-": 80 | ret -= n 81 | case "*": 82 | ret *= n 83 | case "/": 84 | ret /= n 85 | } 86 | } 87 | return ret, nil 88 | } 89 | 90 | EXPRESSION.Action = reduce 91 | TERM.Action = reduce 92 | TERM_OPERATOR.Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil } 93 | FACTOR_OPERATOR.Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil } 94 | NUMBER.Action = func(v *Values, d Any) (Any, error) { return strconv.Atoi(v.Token()) } 95 | 96 | // Parse 97 | l, v, _ := EXPRESSION.Parse(" (1 + 2 * (3 + 4)) / 5 - 6 ", nil) 98 | 99 | fmt.Println(l) 100 | fmt.Println(v) 101 | // Output: 102 | // 27 103 | // -3 104 | } 105 | 106 | func Example_whitespace() { 107 | // Create a PEG parser 108 | parser, _ := NewParser(` 109 | # Grammar for simple calculator... 110 | EXPRESSION <- TERM (TERM_OPERATOR TERM)* 111 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 112 | FACTOR <- NUMBER / '(' EXPRESSION ')' 113 | TERM_OPERATOR <- < [-+] > 114 | FACTOR_OPERATOR <- < [/*] > 115 | NUMBER <- < [0-9]+ > 116 | %whitespace <- [ \t]* 117 | `) 118 | 119 | // Setup actions 120 | reduce := func(v *Values, d Any) (Any, error) { 121 | val := v.ToInt(0) 122 | for i := 1; i < len(v.Vs); i += 2 { 123 | num := v.ToInt(i + 1) 124 | switch v.ToStr(i) { 125 | case "+": 126 | val += num 127 | case "-": 128 | val -= num 129 | case "*": 130 | val *= num 131 | case "/": 132 | val /= num 133 | } 134 | } 135 | return val, nil 136 | } 137 | 138 | g := parser.Grammar 139 | g["EXPRESSION"].Action = reduce 140 | g["TERM"].Action = reduce 141 | g["TERM_OPERATOR"].Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil } 142 | g["FACTOR_OPERATOR"].Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil } 143 | g["NUMBER"].Action = func(v *Values, d Any) (Any, error) { return strconv.Atoi(v.Token()) } 144 | 145 | // Parse 146 | input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 " 147 | val, _ := parser.ParseAndGetValue(input, nil) 148 | 149 | fmt.Println(val) 150 | // Output: -3 151 | } 152 | 153 | func Example_expressionParsing() { 154 | // Create a PEG parser 155 | parser, _ := NewParser(` 156 | # Grammar for simple calculator... 157 | EXPRESSION <- ATOM (BINOP ATOM)* 158 | ATOM <- NUMBER / '(' EXPRESSION ')' 159 | BINOP <- < [-+/*] > 160 | NUMBER <- < [0-9]+ > 161 | %whitespace <- [ \t]* 162 | --- 163 | # Expression parsing 164 | %expr = EXPRESSION 165 | %binop = L + - # level 1 166 | %binop = L * / # level 2 167 | `) 168 | 169 | // Setup actions 170 | g := parser.Grammar 171 | g["EXPRESSION"].Action = func(v *Values, d Any) (Any, error) { 172 | val := v.ToInt(0) 173 | if v.Len() > 1 { 174 | rhs := v.ToInt(2) 175 | ope := v.ToStr(1) 176 | switch ope { 177 | case "+": 178 | val += rhs 179 | case "-": 180 | val -= rhs 181 | case "*": 182 | val *= rhs 183 | case "/": 184 | val /= rhs 185 | } 186 | } 187 | return val, nil 188 | } 189 | g["BINOP"].Action = func(v *Values, d Any) (Any, error) { 190 | return v.Token(), nil 191 | } 192 | g["NUMBER"].Action = func(v *Values, d Any) (Any, error) { 193 | return strconv.Atoi(v.Token()) 194 | } 195 | 196 | // Parse 197 | input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 " 198 | val, _ := parser.ParseAndGetValue(input, nil) 199 | 200 | fmt.Println(val) 201 | // Output: -3 202 | } 203 | 204 | func Example_AST() { 205 | // Create a PEG parser 206 | parser, _ := NewParser(` 207 | EXPRESSION <- TERM (TERM_OPERATOR TERM)* 208 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 209 | FACTOR <- NUMBER / '(' EXPRESSION ')' 210 | TERM_OPERATOR <- < [-+] > 211 | FACTOR_OPERATOR <- < [/*] > 212 | NUMBER <- < [0-9]+ > 213 | %whitespace <- [ \t\r\n]* 214 | `) 215 | 216 | // Evaluator 217 | var eval func(ast *Ast) int 218 | eval = func(ast *Ast) int { 219 | if ast.Name == "NUMBER" { 220 | val, _ := strconv.Atoi(ast.Token) 221 | return val 222 | } else { 223 | nodes := ast.Nodes 224 | val := eval(nodes[0]) 225 | for i := 1; i < len(nodes); i += 2 { 226 | num := eval(nodes[i+1]) 227 | ope := nodes[i].Token[0] 228 | switch ope { 229 | case '+': 230 | val += num 231 | break 232 | case '-': 233 | val -= num 234 | break 235 | case '*': 236 | val *= num 237 | break 238 | case '/': 239 | val /= num 240 | break 241 | } 242 | } 243 | return val 244 | } 245 | } 246 | 247 | // Generate AST 248 | parser.EnableAst() 249 | input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 " 250 | ret, _ := parser.ParseAndGetValue(input, nil) 251 | ast := ret.(*Ast) 252 | 253 | // Optimize AST 254 | opt := NewAstOptimizer(nil) 255 | ast = opt.Optimize(ast, nil) 256 | 257 | // Evaluate AST 258 | val := eval(ast) 259 | 260 | fmt.Println(val) 261 | // Output: -3 262 | } 263 | -------------------------------------------------------------------------------- /expr.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | const ( 4 | assocNone = iota 5 | assocLeft 6 | assocRight 7 | ) 8 | 9 | type BinOpeInfo map[string]struct { 10 | level int 11 | assoc int 12 | } 13 | 14 | // Expression parsing 15 | type expression struct { 16 | opeBase 17 | atom operator 18 | binop operator 19 | bopinf BinOpeInfo 20 | action *Action 21 | } 22 | 23 | func (o *expression) parseExpr(s string, p int, v *Values, c *context, d Any, minPrec int) (l int) { 24 | l = o.atom.parse(s, p, v, c, d) 25 | if fail(l) { 26 | return 27 | } 28 | 29 | var tok string 30 | r := o.binop.(*reference).rule 31 | action := r.Action 32 | r.Action = func(v *Values, d Any) (val Any, err error) { 33 | tok = v.Token() 34 | if action != nil { 35 | val, err = action(v, d) 36 | } else if len(v.Vs) > 0 { 37 | val = v.Vs[0] 38 | } 39 | return val, err 40 | } 41 | defer func() { r.Action = action }() 42 | 43 | saveErrorPos := c.errorPos 44 | 45 | for p+l < len(s) { 46 | saveVs := v.Vs 47 | saveTs := v.Ts 48 | 49 | chv := c.push() 50 | chl := o.binop.parse(s, p+l, chv, c, d) 51 | c.pop() 52 | 53 | if fail(chl) { 54 | c.errorPos = saveErrorPos 55 | break 56 | } 57 | 58 | inf, ok := o.bopinf[tok] 59 | if !ok || inf.level < minPrec { 60 | break 61 | } 62 | 63 | v.Vs = append(v.Vs, chv.Vs[0]) 64 | l += chl 65 | 66 | nextMinPrec := inf.level 67 | if inf.assoc == assocLeft { 68 | nextMinPrec = inf.level + 1 69 | } 70 | 71 | chv = c.push() 72 | chl = o.parseExpr(s, p+l, chv, c, d, nextMinPrec) 73 | c.pop() 74 | 75 | if fail(chl) { 76 | v.Vs = saveVs 77 | v.Ts = saveTs 78 | c.errorPos = saveErrorPos 79 | break 80 | } 81 | 82 | v.Vs = append(v.Vs, chv.Vs[0]) 83 | l += chl 84 | 85 | var val Any 86 | if *o.action != nil { 87 | v.S = s[p : p+l] 88 | v.Pos = p 89 | 90 | var err error 91 | if val, err = (*o.action)(v, d); err != nil { 92 | if c.messagePos < p { 93 | c.messagePos = p 94 | c.message = err.Error() 95 | } 96 | l = -1 97 | v.Vs = saveVs 98 | v.Ts = saveTs 99 | c.errorPos = saveErrorPos 100 | break 101 | } 102 | } else if len(v.Vs) > 0 { 103 | val = v.Vs[0] 104 | } 105 | 106 | v.Vs = []Any{val} 107 | } 108 | 109 | return 110 | } 111 | 112 | func (o *expression) parseCore(s string, p int, v *Values, c *context, d Any) (l int) { 113 | l = o.parseExpr(s, p, v, c, d, 0) 114 | return 115 | } 116 | 117 | func (o *expression) accept(v visitor) { 118 | v.visitExpression(o) 119 | } 120 | 121 | func Exp(atom operator, binop operator, bopinf BinOpeInfo, action *Action) operator { 122 | o := &expression{atom: atom, binop: binop, bopinf: bopinf, action: action} 123 | o.derived = o 124 | return o 125 | } 126 | 127 | func EnableExpressionParsing(p *Parser, name string, bopinf BinOpeInfo) error { 128 | if r, ok := p.Grammar[name]; ok { 129 | seq := r.Ope.(*sequence) 130 | atom := seq.opes[0].(*reference) 131 | opes := seq.opes[1].(*zeroOrMore).ope.(*sequence).opes 132 | atom1 := opes[1].(*reference) 133 | binop := opes[0].(*reference) 134 | 135 | if atom.name != atom1.name { 136 | err := &Error{} 137 | ln, col := lineInfo(r.SS, r.Pos) 138 | msg := "expression syntax error" 139 | err.Details = append(err.Details, ErrorDetail{ln, col, msg}) 140 | return err 141 | } 142 | 143 | r.Ope = Exp(atom, binop, bopinf, &r.Action) 144 | r.disableAction = true 145 | } 146 | return nil 147 | } 148 | -------------------------------------------------------------------------------- /ope.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "reflect" 5 | "sync" 6 | ) 7 | 8 | func success(l int) bool { 9 | return l != -1 10 | } 11 | 12 | func fail(l int) bool { 13 | return l == -1 14 | } 15 | 16 | // Any 17 | type Any interface { 18 | } 19 | 20 | // Token 21 | type Token struct { 22 | Pos int 23 | S string 24 | } 25 | 26 | // Semantic values 27 | type Values struct { 28 | SS string 29 | Vs []Any 30 | Pos int 31 | S string 32 | Choice int 33 | Ts []Token 34 | } 35 | 36 | func (v *Values) Len() int { 37 | return len(v.Vs) 38 | } 39 | 40 | func (v *Values) ToStr(i int) string { 41 | return v.Vs[i].(string) 42 | } 43 | 44 | func (v *Values) ToInt(i int) int { 45 | return v.Vs[i].(int) 46 | } 47 | 48 | func (v *Values) ToFloat32(i int) float32 { 49 | return v.Vs[i].(float32) 50 | } 51 | 52 | func (v *Values) ToFloat64(i int) float64 { 53 | return v.Vs[i].(float64) 54 | } 55 | 56 | func (v *Values) ToBool(i int) bool { 57 | return v.Vs[i].(bool) 58 | } 59 | 60 | func (v *Values) ToOpe(i int) operator { 61 | return v.Vs[i].(operator) 62 | } 63 | 64 | func (v *Values) Token() string { 65 | if len(v.Ts) > 0 { 66 | return v.Ts[0].S 67 | } 68 | return v.S 69 | } 70 | 71 | // Context 72 | type context struct { 73 | s string 74 | 75 | errorPos int 76 | messagePos int 77 | message string 78 | 79 | svStack []Values 80 | argsStack [][]operator 81 | 82 | inToken bool 83 | 84 | whitespaceOpe operator 85 | inWhitespace bool 86 | 87 | wordOpe operator 88 | 89 | tracerEnter func(name string, s string, v *Values, d Any, p int) 90 | tracerLeave func(name string, s string, v *Values, d Any, p int, l int) 91 | } 92 | 93 | func (c *context) setErrorPos(p int) { 94 | if c.errorPos < p { 95 | c.errorPos = p 96 | } 97 | } 98 | 99 | func (c *context) push() *Values { 100 | v := Values{SS: c.s} 101 | c.svStack = append(c.svStack, v) 102 | return &c.svStack[len(c.svStack)-1] 103 | } 104 | 105 | func (c *context) pop() { 106 | c.svStack = c.svStack[:len(c.svStack)-1] 107 | } 108 | 109 | func (c *context) pushArgs(args []operator) { 110 | c.argsStack = append(c.argsStack, args) 111 | } 112 | 113 | func (c *context) popArgs() { 114 | c.argsStack = c.argsStack[:len(c.argsStack)-1] 115 | } 116 | 117 | func (c *context) topArg() []operator { 118 | if len(c.argsStack) == 0 { 119 | return nil 120 | } 121 | return c.argsStack[len(c.argsStack)-1] 122 | } 123 | 124 | // parse 125 | func parse(o operator, s string, p int, v *Values, c *context, d Any) (l int) { 126 | if c.tracerEnter != nil { 127 | c.tracerEnter(o.Label(), s, v, d, p) 128 | } 129 | 130 | l = o.parseCore(s, p, v, c, d) 131 | 132 | if c.tracerLeave != nil { 133 | c.tracerLeave(o.Label(), s, v, d, p, l) 134 | } 135 | return 136 | } 137 | 138 | // Operator 139 | type operator interface { 140 | Label() string 141 | parse(s string, p int, v *Values, c *context, d Any) int 142 | parseCore(s string, p int, v *Values, c *context, d Any) int 143 | accept(v visitor) 144 | } 145 | 146 | // Operator base 147 | type opeBase struct { 148 | derived operator 149 | } 150 | 151 | func (o *opeBase) Label() string { 152 | return reflect.TypeOf(o.derived).String()[5:] 153 | } 154 | 155 | func (o *opeBase) parse(s string, p int, v *Values, c *context, d Any) int { 156 | return parse(o.derived, s, p, v, c, d) 157 | } 158 | 159 | // Sequence 160 | type sequence struct { 161 | opeBase 162 | opes []operator 163 | } 164 | 165 | func (o *sequence) parseCore(s string, p int, v *Values, c *context, d Any) (l int) { 166 | l = 0 167 | for _, ope := range o.opes { 168 | chl := ope.parse(s, p+l, v, c, d) 169 | if fail(chl) { 170 | l = -1 171 | return 172 | } 173 | l += chl 174 | } 175 | return 176 | } 177 | 178 | func (o *sequence) accept(v visitor) { 179 | v.visitSequence(o) 180 | } 181 | 182 | // Prioritized Choice 183 | type prioritizedChoice struct { 184 | opeBase 185 | opes []operator 186 | } 187 | 188 | func (o *prioritizedChoice) parseCore(s string, p int, v *Values, c *context, d Any) (l int) { 189 | id := 0 190 | for _, ope := range o.opes { 191 | chv := c.push() 192 | l = ope.parse(s, p, chv, c, d) 193 | c.pop() 194 | if success(l) { 195 | v.Vs = append(v.Vs, chv.Vs...) 196 | v.Pos = chv.Pos 197 | v.S = chv.S 198 | v.Choice = id 199 | v.Ts = append(v.Ts, chv.Ts...) 200 | return 201 | } 202 | id++ 203 | } 204 | l = -1 205 | return 206 | } 207 | 208 | func (o *prioritizedChoice) accept(v visitor) { 209 | v.visitPrioritizedChoice(o) 210 | } 211 | 212 | // Zero or More 213 | type zeroOrMore struct { 214 | opeBase 215 | ope operator 216 | } 217 | 218 | func (o *zeroOrMore) parseCore(s string, p int, v *Values, c *context, d Any) (l int) { 219 | saveErrorPos := c.errorPos 220 | l = 0 221 | for p+l < len(s) { 222 | saveVs := v.Vs 223 | saveTs := v.Ts 224 | chl := o.ope.parse(s, p+l, v, c, d) 225 | if fail(chl) { 226 | v.Vs = saveVs 227 | v.Ts = saveTs 228 | c.errorPos = saveErrorPos 229 | break 230 | } 231 | l += chl 232 | } 233 | return 234 | } 235 | 236 | func (o *zeroOrMore) accept(v visitor) { 237 | v.visitZeroOrMore(o) 238 | } 239 | 240 | // One or More 241 | type oneOrMore struct { 242 | opeBase 243 | ope operator 244 | } 245 | 246 | func (o *oneOrMore) parseCore(s string, p int, v *Values, c *context, d Any) (l int) { 247 | l = o.ope.parse(s, p, v, c, d) 248 | if fail(l) { 249 | return 250 | } 251 | saveErrorPos := c.errorPos 252 | for p+l < len(s) { 253 | saveVs := v.Vs 254 | saveTs := v.Ts 255 | chl := o.ope.parse(s, p+l, v, c, d) 256 | if fail(chl) { 257 | v.Vs = saveVs 258 | v.Ts = saveTs 259 | c.errorPos = saveErrorPos 260 | break 261 | } 262 | l += chl 263 | } 264 | return 265 | } 266 | 267 | func (o *oneOrMore) accept(v visitor) { 268 | v.visitOneOrMore(o) 269 | } 270 | 271 | // Option 272 | type option struct { 273 | opeBase 274 | ope operator 275 | } 276 | 277 | func (o *option) parseCore(s string, p int, v *Values, c *context, d Any) (l int) { 278 | saveErrorPos := c.errorPos 279 | saveVs := v.Vs 280 | saveTs := v.Ts 281 | l = o.ope.parse(s, p, v, c, d) 282 | if fail(l) { 283 | v.Vs = saveVs 284 | v.Ts = saveTs 285 | c.errorPos = saveErrorPos 286 | l = 0 287 | } 288 | return 289 | } 290 | 291 | func (o *option) accept(v visitor) { 292 | v.visitOption(o) 293 | } 294 | 295 | // And Predicate 296 | type andPredicate struct { 297 | opeBase 298 | ope operator 299 | } 300 | 301 | func (o *andPredicate) parseCore(s string, p int, v *Values, c *context, d Any) (l int) { 302 | chv := c.push() 303 | chl := o.ope.parse(s, p, chv, c, d) 304 | c.pop() 305 | 306 | if success(chl) { 307 | l = 0 308 | } else { 309 | l = -1 310 | } 311 | return 312 | } 313 | 314 | func (o *andPredicate) accept(v visitor) { 315 | v.visitAndPredicate(o) 316 | } 317 | 318 | // Not Predicate 319 | type notPredicate struct { 320 | opeBase 321 | ope operator 322 | } 323 | 324 | func (o *notPredicate) parseCore(s string, p int, v *Values, c *context, d Any) (l int) { 325 | saveErrorPos := c.errorPos 326 | 327 | chv := c.push() 328 | chl := o.ope.parse(s, p, chv, c, d) 329 | c.pop() 330 | 331 | if success(chl) { 332 | c.setErrorPos(p) 333 | l = -1 334 | } else { 335 | c.errorPos = saveErrorPos 336 | l = 0 337 | } 338 | return 339 | } 340 | 341 | func (o *notPredicate) accept(v visitor) { 342 | v.visitNotPredicate(o) 343 | } 344 | 345 | // Literal String 346 | type literalString struct { 347 | opeBase 348 | lit string 349 | initIsWord sync.Once 350 | isWord bool 351 | } 352 | 353 | func (o *literalString) parseCore(s string, p int, v *Values, c *context, d Any) int { 354 | l := 0 355 | for ; l < len(o.lit); l++ { 356 | if p+l == len(s) || s[p+l] != o.lit[l] { 357 | c.setErrorPos(p) 358 | return -1 359 | } 360 | } 361 | 362 | // Word check 363 | o.initIsWord.Do(func() { 364 | if c.wordOpe != nil { 365 | len := c.wordOpe.parse(o.lit, 0, &Values{}, &context{s: s}, nil) 366 | o.isWord = success(len) 367 | } 368 | }) 369 | if o.isWord { 370 | len := Npd(c.wordOpe).parse(s, p+l, v, &context{s: s}, nil) 371 | if fail(len) { 372 | return -1 373 | } 374 | l += len 375 | } 376 | 377 | // Skip whiltespace 378 | if c.inToken == false { 379 | if c.whitespaceOpe != nil { 380 | len := c.whitespaceOpe.parse(s, p+l, v, c, d) 381 | if fail(len) { 382 | return -1 383 | } 384 | l += len 385 | } 386 | } 387 | return l 388 | } 389 | 390 | func (o *literalString) accept(v visitor) { 391 | v.visitLiteralString(o) 392 | } 393 | 394 | // Character Class 395 | type characterClass struct { 396 | opeBase 397 | chars string 398 | } 399 | 400 | func (o *characterClass) parseCore(s string, p int, v *Values, c *context, d Any) (l int) { 401 | // TODO: UTF8 support 402 | if len(s)-p < 1 { 403 | c.setErrorPos(p) 404 | l = -1 405 | return 406 | } 407 | ch := s[p] 408 | i := 0 409 | for i < len(o.chars) { 410 | if i+2 < len(o.chars) && o.chars[i+1] == '-' { 411 | if o.chars[i] <= ch && ch <= o.chars[i+2] { 412 | l = 1 413 | return 414 | } 415 | i += 3 416 | } else { 417 | if o.chars[i] == ch { 418 | l = 1 419 | return 420 | } 421 | i++ 422 | } 423 | } 424 | c.setErrorPos(p) 425 | l = -1 426 | return 427 | } 428 | 429 | func (o *characterClass) accept(v visitor) { 430 | v.visitCharacterClass(o) 431 | } 432 | 433 | // Any Character 434 | type anyCharacter struct { 435 | opeBase 436 | } 437 | 438 | func (o *anyCharacter) parseCore(s string, p int, v *Values, c *context, d Any) (l int) { 439 | // TODO: UTF8 support 440 | if len(s)-p < 1 { 441 | c.setErrorPos(p) 442 | l = -1 443 | return 444 | } 445 | l = 1 446 | return 447 | } 448 | 449 | func (o *anyCharacter) accept(v visitor) { 450 | v.visitAnyCharacter(o) 451 | } 452 | 453 | // Token Boundary 454 | type tokenBoundary struct { 455 | opeBase 456 | ope operator 457 | } 458 | 459 | func (o *tokenBoundary) parseCore(s string, p int, v *Values, c *context, d Any) int { 460 | c.inToken = true 461 | l := o.ope.parse(s, p, v, c, d) 462 | c.inToken = false 463 | if success(l) { 464 | v.Ts = append(v.Ts, Token{p, s[p : p+l]}) 465 | 466 | // Skip whiltespace 467 | if c.whitespaceOpe != nil { 468 | len := c.whitespaceOpe.parse(s, p+l, v, c, d) 469 | if fail(len) { 470 | return -1 471 | } 472 | l += len 473 | } 474 | } 475 | return l 476 | } 477 | 478 | func (o *tokenBoundary) accept(v visitor) { 479 | v.visitTokenBoundary(o) 480 | } 481 | 482 | // Ignore 483 | type ignore struct { 484 | opeBase 485 | ope operator 486 | } 487 | 488 | func (o *ignore) parseCore(s string, p int, v *Values, c *context, d Any) int { 489 | chv := c.push() 490 | l := o.ope.parse(s, p, chv, c, d) 491 | c.pop() 492 | return l 493 | } 494 | 495 | func (o *ignore) accept(v visitor) { 496 | v.visitIgnore(o) 497 | } 498 | 499 | // User 500 | type user struct { 501 | opeBase 502 | fn func(s string, p int, v *Values, d Any) int 503 | } 504 | 505 | func (o *user) parseCore(s string, p int, v *Values, c *context, d Any) int { 506 | return o.fn(s, p, v, d) 507 | } 508 | 509 | func (o *user) accept(v visitor) { 510 | v.visitUser(o) 511 | } 512 | 513 | // Reference 514 | type reference struct { 515 | opeBase 516 | name string 517 | iarg int 518 | args []operator 519 | pos int 520 | rule *Rule 521 | } 522 | 523 | func (o *reference) parseCore(s string, p int, v *Values, c *context, d Any) (l int) { 524 | if o.rule != nil { 525 | // Reference rule 526 | if o.rule.Parameters == nil { 527 | // Definition 528 | l = o.rule.parse(s, p, v, c, d) 529 | } else { 530 | // Macro 531 | vis := &findReference{ 532 | args: c.topArg(), 533 | params: o.rule.Parameters, 534 | } 535 | 536 | // Collect arguments 537 | var args []operator 538 | for _, arg := range o.args { 539 | arg.accept(vis) 540 | args = append(args, vis.ope) 541 | } 542 | 543 | c.pushArgs(args) 544 | l = o.rule.parse(s, p, v, c, d) 545 | c.popArgs() 546 | } 547 | } else { 548 | // Reference parameter in macro 549 | args := c.topArg() 550 | l = args[o.iarg].parse(s, p, v, c, d) 551 | } 552 | return 553 | } 554 | 555 | func (o *reference) accept(v visitor) { 556 | v.visitReference(o) 557 | } 558 | 559 | // Whitespace 560 | type whitespace struct { 561 | opeBase 562 | ope operator 563 | } 564 | 565 | func (o *whitespace) parseCore(s string, p int, v *Values, c *context, d Any) int { 566 | if c.inWhitespace { 567 | return 0 568 | } else { 569 | c.inWhitespace = true 570 | l := o.ope.parse(s, p, v, c, d) 571 | c.inWhitespace = false 572 | return l 573 | } 574 | } 575 | 576 | func (o *whitespace) accept(v visitor) { 577 | v.visitWhitespace(o) 578 | } 579 | 580 | func SeqCore(opes []operator) operator { 581 | o := &sequence{opes: opes} 582 | o.derived = o 583 | return o 584 | } 585 | func Seq(opes ...operator) operator { 586 | return SeqCore(opes) 587 | } 588 | func ChoCore(opes []operator) operator { 589 | o := &prioritizedChoice{opes: opes} 590 | o.derived = o 591 | return o 592 | } 593 | func Cho(opes ...operator) operator { 594 | return ChoCore(opes) 595 | } 596 | func Zom(ope operator) operator { 597 | o := &zeroOrMore{ope: ope} 598 | o.derived = o 599 | return o 600 | } 601 | func Oom(ope operator) operator { 602 | o := &oneOrMore{ope: ope} 603 | o.derived = o 604 | return o 605 | } 606 | func Opt(ope operator) operator { 607 | o := &option{ope: ope} 608 | o.derived = o 609 | return o 610 | } 611 | func Apd(ope operator) operator { 612 | o := &andPredicate{ope: ope} 613 | o.derived = o 614 | return o 615 | } 616 | func Npd(ope operator) operator { 617 | o := ¬Predicate{ope: ope} 618 | o.derived = o 619 | return o 620 | } 621 | func Lit(lit string) operator { 622 | o := &literalString{lit: lit} 623 | o.derived = o 624 | return o 625 | } 626 | func Cls(chars string) operator { 627 | o := &characterClass{chars: chars} 628 | o.derived = o 629 | return o 630 | } 631 | func Dot() operator { 632 | o := &anyCharacter{} 633 | o.derived = o 634 | return o 635 | } 636 | func Tok(ope operator) operator { 637 | o := &tokenBoundary{ope: ope} 638 | o.derived = o 639 | return o 640 | } 641 | func Ign(ope operator) operator { 642 | o := &ignore{ope: ope} 643 | o.derived = o 644 | return o 645 | } 646 | func Usr(fn func(s string, p int, v *Values, d Any) int) operator { 647 | o := &user{fn: fn} 648 | o.derived = o 649 | return o 650 | } 651 | func Ref(ident string, args []operator, pos int) operator { 652 | o := &reference{name: ident, args: args, pos: pos} 653 | o.derived = o 654 | return o 655 | } 656 | func Wsp(ope operator) operator { 657 | o := &whitespace{ope: Ign(ope)} 658 | o.derived = o 659 | return o 660 | } 661 | -------------------------------------------------------------------------------- /ope_test.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import "testing" 4 | 5 | type Cases []struct { 6 | input string 7 | want int 8 | } 9 | 10 | func run(name string, t *testing.T, ope operator, cases Cases) { 11 | for _, cs := range cases { 12 | v := &Values{} 13 | c := &context{} 14 | if got := ope.parse(cs.input, 0, v, c, nil); got != cs.want { 15 | t.Errorf("[%s] input:%q want:%d got:%d", name, cs.input, cs.want, got) 16 | } 17 | } 18 | } 19 | 20 | func TestSequence(t *testing.T) { 21 | ope := Seq( 22 | Lit("日本語"), 23 | Lit("も"), 24 | Lit("OK"), 25 | Lit("です。"), 26 | ) 27 | cases := Cases{ 28 | {"日本語もOKです。", 23}, 29 | {"日本語OKです。", -1}, 30 | } 31 | run("Sequence", t, ope, cases) 32 | } 33 | 34 | func TestPrioritizedChoice(t *testing.T) { 35 | ope := Cho( 36 | Lit("English"), 37 | Lit("日本語"), 38 | ) 39 | cases := Cases{ 40 | {"日本語", 9}, 41 | {"English", 7}, 42 | {"Go", -1}, 43 | } 44 | run("PrioritizedChoice", t, ope, cases) 45 | } 46 | 47 | func TestZeroOrMore(t *testing.T) { 48 | ope := Zom( 49 | Lit("abc"), 50 | ) 51 | cases := Cases{ 52 | {"", 0}, 53 | {"a", 0}, 54 | {"b", 0}, 55 | {"ab", 0}, 56 | {"abc", 3}, 57 | {"abca", 3}, 58 | {"abcabc", 6}, 59 | } 60 | run("ZeroOrMore", t, ope, cases) 61 | } 62 | 63 | func TestOneOrMore(t *testing.T) { 64 | ope := Oom( 65 | Lit("abc"), 66 | ) 67 | cases := Cases{ 68 | {"", -1}, 69 | {"a", -1}, 70 | {"b", -1}, 71 | {"ab", -1}, 72 | {"abc", 3}, 73 | {"abca", 3}, 74 | {"abcabc", 6}, 75 | } 76 | run("OneOrMore", t, ope, cases) 77 | } 78 | 79 | func TestOption(t *testing.T) { 80 | ope := Opt( 81 | Lit("abc"), 82 | ) 83 | cases := Cases{ 84 | {"", 0}, 85 | {"a", 0}, 86 | {"b", 0}, 87 | {"ab", 0}, 88 | {"abc", 3}, 89 | {"abca", 3}, 90 | {"abcabc", 3}, 91 | } 92 | run("Option", t, ope, cases) 93 | } 94 | 95 | func TestAndPredicate(t *testing.T) { 96 | ope := Apd( 97 | Lit("abc"), 98 | ) 99 | cases := Cases{ 100 | {"", -1}, 101 | {"a", -1}, 102 | {"b", -1}, 103 | {"ab", -1}, 104 | {"abc", 0}, 105 | {"abca", 0}, 106 | {"abcabc", 0}, 107 | } 108 | run("AndPredicate", t, ope, cases) 109 | } 110 | 111 | func TestNotPredicate(t *testing.T) { 112 | ope := Npd( 113 | Lit("abc"), 114 | ) 115 | cases := Cases{ 116 | {"", 0}, 117 | {"a", 0}, 118 | {"b", 0}, 119 | {"ab", 0}, 120 | {"abc", -1}, 121 | {"abca", -1}, 122 | {"abcabc", -1}, 123 | } 124 | run("NotPredicate", t, ope, cases) 125 | } 126 | 127 | func TestLiteralString(t *testing.T) { 128 | ope := Lit("日本語") 129 | cases := Cases{ 130 | {"", -1}, 131 | {"日", -1}, 132 | {"日本語", 9}, 133 | {"日本語です。", 9}, 134 | {"English", -1}, 135 | } 136 | run("LiteralString", t, ope, cases) 137 | } 138 | 139 | func TestCharacterClass(t *testing.T) { 140 | ope := Cls("a-zA-Z0-9_") 141 | cases := Cases{ 142 | {"", -1}, 143 | {"a", 1}, 144 | {"b", 1}, 145 | {"z", 1}, 146 | {"A", 1}, 147 | {"B", 1}, 148 | {"Z", 1}, 149 | {"0", 1}, 150 | {"1", 1}, 151 | {"9", 1}, 152 | {"_", 1}, 153 | {"-", -1}, 154 | {" ", -1}, 155 | } 156 | run("CharacterClass", t, ope, cases) 157 | } 158 | 159 | func TestTokenBoundary(t *testing.T) { 160 | ope := Seq(Tok(Lit("hello")), Lit(" ")) 161 | v := &Values{} 162 | c := &context{} 163 | input := "hello " 164 | 165 | want := len(input) 166 | if got := ope.parse(input, 0, v, c, nil); got != want { 167 | t.Errorf("[%s] input:%q want:%d got:%d", "TokenBoundary", input, want, got) 168 | } 169 | 170 | tok := "hello" 171 | if len(v.Ts) == 0 || v.Ts[0].S != tok { 172 | t.Errorf("[%s] input:%q want:%s got:%s", "TokenBoundary", input, tok, v.Ts[0].S) 173 | } 174 | } 175 | 176 | func TestIgnore(t *testing.T) { 177 | var NUMBER, WS Rule 178 | NUMBER.Ope = Seq(Tok(Oom(Cls("0-9"))), Ign(&WS)) 179 | WS.Ope = Zom(Cls(" \t")) 180 | 181 | input := "123 " 182 | 183 | NUMBER.Action = func(v *Values, d Any) (Any, error) { 184 | n := 0 185 | if len(v.Vs) != n { 186 | t.Errorf("[%s] input:%q want:%d got:%d", "Ignore", input, n, len(v.Vs)) 187 | } 188 | return nil, nil 189 | } 190 | 191 | want := len(input) 192 | if l, _, _ := NUMBER.Parse(input, nil); l != want { 193 | t.Errorf("[%s] input:%q want:%d got:%d", "Ignore", input, want, l) 194 | } 195 | } 196 | -------------------------------------------------------------------------------- /parser.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import "strings" 4 | 5 | const ( 6 | WhitespceRuleName = "%whitespace" 7 | WordRuleName = "%word" 8 | OptExpressionRule = "%expr" 9 | OptBinaryOperator = "%binop" 10 | ) 11 | 12 | // PEG parser generator 13 | type duplicate struct { 14 | name string 15 | pos int 16 | } 17 | 18 | type data struct { 19 | grammar map[string]*Rule 20 | start string 21 | duplicates []duplicate 22 | options map[string][]string 23 | } 24 | 25 | func newData() *data { 26 | return &data{ 27 | grammar: make(map[string]*Rule), 28 | options: make(map[string][]string), 29 | } 30 | } 31 | 32 | var rStart, rDefinition, rExpression, 33 | rSequence, rPrefix, rSuffix, rPrimary, 34 | rIdentifier, rIdentCont, rIdentStart, rIdentRest, 35 | rLiteral, rClass, rRange, rChar, 36 | rLEFTARROW, rSLASH, rAND, rNOT, rQUESTION, rSTAR, rPLUS, rOPEN, rCLOSE, rDOT, 37 | rSpacing, rComment, rSpace, rEndOfLine, rEndOfFile, rBeginTok, rEndTok, 38 | rIgnore, rIGNORE, 39 | rParameters, rArguments, rCOMMA, 40 | rOption, rOptionValue, rOptionComment, rASSIGN, rSEPARATOR Rule 41 | 42 | func init() { 43 | // Setup PEG syntax parser 44 | rStart.Ope = Seq( 45 | &rSpacing, 46 | Oom(&rDefinition), 47 | Opt(Seq(&rSEPARATOR, Oom(&rOption))), 48 | &rEndOfFile) 49 | 50 | rDefinition.Ope = Cho( 51 | Seq(&rIgnore, &rIdentCont, &rParameters, &rLEFTARROW, &rExpression), 52 | Seq(&rIgnore, &rIdentifier, &rLEFTARROW, &rExpression)) 53 | 54 | rExpression.Ope = Seq(&rSequence, Zom(Seq(&rSLASH, &rSequence))) 55 | rSequence.Ope = Zom(&rPrefix) 56 | rPrefix.Ope = Seq(Opt(Cho(&rAND, &rNOT)), &rSuffix) 57 | rSuffix.Ope = Seq(&rPrimary, Opt(Cho(&rQUESTION, &rSTAR, &rPLUS))) 58 | 59 | rPrimary.Ope = Cho( 60 | Seq(&rIgnore, &rIdentCont, &rArguments, Npd(&rLEFTARROW)), 61 | Seq(&rIgnore, &rIdentifier, Npd(Seq(Opt(&rParameters), &rLEFTARROW))), 62 | Seq(&rOPEN, &rExpression, &rCLOSE), 63 | Seq(&rBeginTok, &rExpression, &rEndTok), 64 | &rLiteral, 65 | &rClass, 66 | &rDOT) 67 | 68 | rIdentifier.Ope = Seq(&rIdentCont, &rSpacing) 69 | rIdentCont.Ope = Seq(&rIdentStart, Zom(&rIdentRest)) 70 | rIdentStart.Ope = Cls("a-zA-Z_\x80-\xff%") 71 | rIdentRest.Ope = Cho(&rIdentStart, Cls("0-9")) 72 | 73 | rLiteral.Ope = Cho( 74 | Seq(Lit("'"), Tok(Zom(Seq(Npd(Lit("'")), &rChar))), Lit("'"), &rSpacing), 75 | Seq(Lit("\""), Tok(Zom(Seq(Npd(Lit("\"")), &rChar))), Lit("\""), &rSpacing)) 76 | 77 | rClass.Ope = Seq(Lit("["), Tok(Zom(Seq(Npd(Lit("]")), &rRange))), Lit("]"), &rSpacing) 78 | 79 | rRange.Ope = Cho(Seq(&rChar, Lit("-"), &rChar), &rChar) 80 | rChar.Ope = Cho( 81 | Seq(Lit("\\"), Cls("nrtfv'\"[]\\")), 82 | Seq(Lit("\\"), Cls("0-3"), Cls("0-7"), Cls("0-7")), 83 | Seq(Lit("\\"), Cls("0-7"), Opt(Cls("0-7"))), 84 | Seq(Lit("\\x"), Cls("0-9a-fA-F"), Opt(Cls("0-9a-fA-F"))), 85 | Seq(Npd(Lit("\\")), Dot())) 86 | 87 | rLEFTARROW.Ope = Seq(Cho(Lit("<-"), Lit("←")), &rSpacing) 88 | rSLASH.Ope = Seq(Lit("/"), &rSpacing) 89 | rSLASH.Ignore = true 90 | rAND.Ope = Seq(Lit("&"), &rSpacing) 91 | rNOT.Ope = Seq(Lit("!"), &rSpacing) 92 | rQUESTION.Ope = Seq(Lit("?"), &rSpacing) 93 | rSTAR.Ope = Seq(Lit("*"), &rSpacing) 94 | rPLUS.Ope = Seq(Lit("+"), &rSpacing) 95 | rOPEN.Ope = Seq(Lit("("), &rSpacing) 96 | rOPEN.Ignore = true 97 | rCLOSE.Ope = Seq(Lit(")"), &rSpacing) 98 | rCLOSE.Ignore = true 99 | rDOT.Ope = Seq(Lit("."), &rSpacing) 100 | 101 | rSpacing.Ope = Zom(Cho(&rSpace, &rComment)) 102 | rComment.Ope = Seq(Lit("#"), Zom(Seq(Npd(&rEndOfLine), Dot())), &rEndOfLine) 103 | rSpace.Ope = Cho(Lit(" "), Lit("\t"), &rEndOfLine) 104 | rEndOfLine.Ope = Cho(Lit("\r\n"), Lit("\n"), Lit("\r")) 105 | rEndOfFile.Ope = Npd(Dot()) 106 | 107 | rBeginTok.Ope = Seq(Lit("<"), &rSpacing) 108 | rBeginTok.Ignore = true 109 | rEndTok.Ope = Seq(Lit(">"), &rSpacing) 110 | rEndTok.Ignore = true 111 | 112 | rIGNORE.Ope = Lit("~") 113 | rSEPARATOR.Ope = Seq(Lit("---"), &rSpacing) 114 | 115 | rIgnore.Ope = Opt(&rIGNORE) 116 | 117 | rParameters.Ope = Seq(&rOPEN, &rIdentifier, Zom(Seq(&rCOMMA, &rIdentifier)), &rCLOSE) 118 | rArguments.Ope = Seq(&rOPEN, &rExpression, Zom(Seq(&rCOMMA, &rExpression)), &rCLOSE) 119 | rCOMMA.Ope = Seq(Lit(","), &rSpacing) 120 | rCOMMA.Ignore = true 121 | 122 | rOption.Ope = Seq(&rIdentifier, &rASSIGN, &rOptionValue) 123 | rOptionComment.Ope = Seq(Zom(Cho(Lit(" "), Lit("\t"))), Cho(&rComment, &rEndOfLine)) 124 | rOptionValue.Ope = Seq(Tok(Zom(Seq(Npd(&rOptionComment), Dot()))), &rOptionComment, &rSpacing) 125 | rASSIGN.Ope = Seq(Lit("="), &rSpacing) 126 | rSEPARATOR.Ope = Seq(Lit("---"), &rSpacing) 127 | 128 | // Setup actions 129 | rDefinition.Action = func(v *Values, d Any) (val Any, err error) { 130 | var ignore bool 131 | var name string 132 | var params []string 133 | var ope operator 134 | 135 | switch v.Choice { 136 | case 0: // Macro 137 | ignore = v.ToBool(0) 138 | name = v.ToStr(1) 139 | params = v.Vs[2].([]string) 140 | ope = v.ToOpe(4) 141 | case 1: // Rule 142 | ignore = v.ToBool(0) 143 | name = v.ToStr(1) 144 | ope = v.ToOpe(3) 145 | } 146 | 147 | data := d.(*data) 148 | _, ok := data.grammar[name] 149 | if ok { 150 | data.duplicates = append(data.duplicates, duplicate{name, v.Pos}) 151 | } else { 152 | data.grammar[name] = &Rule{ 153 | Ope: ope, 154 | Name: name, 155 | SS: v.SS, 156 | Pos: v.Pos, 157 | Ignore: ignore, 158 | Parameters: params, 159 | } 160 | if len(data.start) == 0 { 161 | data.start = name 162 | } 163 | } 164 | return 165 | } 166 | 167 | rParameters.Action = func(v *Values, d Any) (val Any, err error) { 168 | var params []string 169 | for i := 0; i < len(v.Vs); i++ { 170 | params = append(params, v.ToStr(i)) 171 | } 172 | val = params 173 | return 174 | } 175 | 176 | rArguments.Action = func(v *Values, d Any) (val Any, err error) { 177 | var exprs []operator 178 | for i := 0; i < len(v.Vs); i++ { 179 | exprs = append(exprs, v.ToOpe(i)) 180 | } 181 | val = exprs 182 | return 183 | } 184 | 185 | rExpression.Action = func(v *Values, d Any) (val Any, err error) { 186 | if len(v.Vs) == 1 { 187 | val = v.ToOpe(0) 188 | } else { 189 | var opes []operator 190 | for i := 0; i < len(v.Vs); i++ { 191 | opes = append(opes, v.ToOpe(i)) 192 | } 193 | val = Cho(opes...) 194 | } 195 | return 196 | } 197 | 198 | rSequence.Action = func(v *Values, d Any) (val Any, err error) { 199 | if len(v.Vs) == 1 { 200 | val = v.ToOpe(0) 201 | } else { 202 | var opes []operator 203 | for i := 0; i < len(v.Vs); i++ { 204 | opes = append(opes, v.ToOpe(i)) 205 | } 206 | val = Seq(opes...) 207 | } 208 | return 209 | } 210 | 211 | rPrefix.Action = func(v *Values, d Any) (val Any, err error) { 212 | if len(v.Vs) == 1 { 213 | val = v.ToOpe(0) 214 | } else { 215 | tok := v.ToStr(0) 216 | ope := v.ToOpe(1) 217 | switch tok { 218 | case "&": 219 | val = Apd(ope) 220 | case "!": 221 | val = Npd(ope) 222 | } 223 | } 224 | return 225 | } 226 | 227 | rSuffix.Action = func(v *Values, d Any) (val Any, err error) { 228 | ope := v.ToOpe(0) 229 | if len(v.Vs) == 1 { 230 | val = ope 231 | } else { 232 | tok := v.ToStr(1) 233 | switch tok { 234 | case "?": 235 | val = Opt(ope) 236 | case "*": 237 | val = Zom(ope) 238 | case "+": 239 | val = Oom(ope) 240 | } 241 | } 242 | return 243 | } 244 | 245 | rPrimary.Action = func(v *Values, d Any) (val Any, err error) { 246 | switch v.Choice { 247 | case 0 /* Macro Reference */, 1: /* Reference */ 248 | ignore := v.ToBool(0) 249 | ident := v.ToStr(1) 250 | 251 | var args []operator 252 | if v.Choice == 0 { 253 | args = v.Vs[2].([]operator) 254 | } 255 | 256 | if ignore { 257 | val = Ign(Ref(ident, args, v.Pos)) 258 | } else { 259 | val = Ref(ident, args, v.Pos) 260 | } 261 | case 2: // Expression 262 | val = v.ToOpe(0) 263 | case 3: // TokenBoundary 264 | val = Tok(v.ToOpe(0)) 265 | default: 266 | val = v.ToOpe(0) 267 | } 268 | return 269 | } 270 | 271 | rIdentCont.Action = func(v *Values, d Any) (Any, error) { 272 | return v.S, nil 273 | } 274 | 275 | rLiteral.Action = func(v *Values, d Any) (Any, error) { 276 | return Lit(resolveEscapeSequence(v.Ts[0].S)), nil 277 | } 278 | 279 | rClass.Action = func(v *Values, d Any) (Any, error) { 280 | return Cls(resolveEscapeSequence(v.Ts[0].S)), nil 281 | } 282 | 283 | rAND.Action = func(v *Values, d Any) (Any, error) { 284 | return v.S[:1], nil 285 | } 286 | rNOT.Action = func(v *Values, d Any) (Any, error) { 287 | return v.S[:1], nil 288 | } 289 | rQUESTION.Action = func(v *Values, d Any) (Any, error) { 290 | return v.S[:1], nil 291 | } 292 | rSTAR.Action = func(v *Values, d Any) (Any, error) { 293 | return v.S[:1], nil 294 | } 295 | rPLUS.Action = func(v *Values, d Any) (Any, error) { 296 | return v.S[:1], nil 297 | } 298 | 299 | rDOT.Action = func(v *Values, d Any) (Any, error) { 300 | return Dot(), nil 301 | } 302 | 303 | rIgnore.Action = func(v *Values, d Any) (val Any, err error) { 304 | val = len(v.Vs) != 0 305 | return 306 | } 307 | 308 | rOption.Action = func(v *Values, d Any) (val Any, err error) { 309 | options := d.(*data).options 310 | optName := v.ToStr(0) 311 | optVal := v.ToStr(2) 312 | options[optName] = append(options[optName], optVal) 313 | return 314 | } 315 | rOptionValue.Action = func(v *Values, d Any) (Any, error) { 316 | return v.Token(), nil 317 | } 318 | } 319 | 320 | func isHex(c byte) (v int, ok bool) { 321 | if '0' <= c && c <= '9' { 322 | v = int(c - '0') 323 | ok = true 324 | } else if 'a' <= c && c <= 'f' { 325 | v = int(c - 'a' + 10) 326 | ok = true 327 | } else if 'A' <= c && c <= 'F' { 328 | v = int(c - 'A' + 10) 329 | ok = true 330 | } 331 | return 332 | } 333 | 334 | func isDigit(c byte) (v int, ok bool) { 335 | if '0' <= c && c <= '9' { 336 | v = int(c - '0') 337 | ok = true 338 | } 339 | return 340 | } 341 | 342 | func parseHexNumber(s string, i int) (byte, int) { 343 | ret := 0 344 | for i < len(s) { 345 | val, ok := isHex(s[i]) 346 | if !ok { 347 | break 348 | } 349 | ret = ret*16 + val 350 | i++ 351 | } 352 | return byte(ret), i 353 | } 354 | 355 | func parseOctNumber(s string, i int) (byte, int) { 356 | ret := 0 357 | for i < len(s) { 358 | val, ok := isDigit(s[i]) 359 | if !ok { 360 | break 361 | } 362 | ret = ret*8 + val 363 | i++ 364 | } 365 | return byte(ret), i 366 | } 367 | 368 | func resolveEscapeSequence(s string) string { 369 | n := len(s) 370 | b := make([]byte, 0, n) 371 | 372 | i := 0 373 | for i < n { 374 | ch := s[i] 375 | if ch == '\\' { 376 | i++ 377 | switch s[i] { 378 | case 'n': 379 | b = append(b, '\n') 380 | i++ 381 | case 'r': 382 | b = append(b, '\r') 383 | i++ 384 | case 't': 385 | b = append(b, '\t') 386 | i++ 387 | case 'f': 388 | b = append(b, '\f') 389 | i++ 390 | case 'v': 391 | b = append(b, '\v') 392 | i++ 393 | case '\'': 394 | b = append(b, '\'') 395 | i++ 396 | case '"': 397 | b = append(b, '"') 398 | i++ 399 | case '[': 400 | b = append(b, '[') 401 | i++ 402 | case ']': 403 | b = append(b, ']') 404 | i++ 405 | case '\\': 406 | b = append(b, '\\') 407 | i++ 408 | case 'x': 409 | ch, i = parseHexNumber(s, i+1) 410 | b = append(b, ch) 411 | default: 412 | ch, i = parseOctNumber(s, i) 413 | b = append(b, ch) 414 | } 415 | } else { 416 | b = append(b, ch) 417 | i++ 418 | } 419 | } 420 | 421 | return string(b) 422 | } 423 | 424 | func getExpressionParsingOptions(options map[string][]string) (name string, info BinOpeInfo) { 425 | name = "" 426 | if vs, ok := options[OptExpressionRule]; ok { 427 | name = vs[0] 428 | // TODO: error handling 429 | } 430 | 431 | info = make(BinOpeInfo) 432 | if vs, ok := options[OptBinaryOperator]; ok { 433 | level := 1 434 | for _, s := range vs { 435 | flds := strings.Split(s, " ") 436 | // TODO: error handling 437 | assoc := assocNone 438 | for i, fld := range flds { 439 | switch i { 440 | case 0: 441 | switch fld { 442 | case "L": 443 | assoc = assocLeft 444 | case "R": 445 | assoc = assocRight 446 | default: 447 | // TODO: error handling 448 | } 449 | default: 450 | info[fld] = struct { 451 | level int 452 | assoc int 453 | }{level, assoc} 454 | } 455 | } 456 | level++ 457 | } 458 | } 459 | 460 | return 461 | } 462 | 463 | // Parser 464 | type Parser struct { 465 | Grammar map[string]*Rule 466 | start string 467 | TracerEnter func(name string, s string, v *Values, d Any, p int) 468 | TracerLeave func(name string, s string, v *Values, d Any, p int, l int) 469 | } 470 | 471 | func NewParser(s string) (p *Parser, err error) { 472 | return NewParserWithUserRules(s, nil) 473 | } 474 | 475 | func NewParserWithUserRules(s string, rules map[string]operator) (p *Parser, err error) { 476 | data := newData() 477 | 478 | _, _, err = rStart.Parse(s, data) 479 | if err != nil { 480 | return nil, err 481 | } 482 | 483 | // User provided rules 484 | for name, ope := range rules { 485 | ignore := false 486 | 487 | if len(name) > 0 && name[0] == '~' { 488 | ignore = true 489 | name = name[1:] 490 | } 491 | 492 | if len(name) > 0 { 493 | data.grammar[name] = &Rule{ 494 | Ope: ope, 495 | Name: name, 496 | Ignore: ignore, 497 | } 498 | } 499 | } 500 | 501 | // Check duplicated definitions 502 | if len(data.duplicates) > 0 { 503 | err = &Error{} 504 | for _, dup := range data.duplicates { 505 | ln, col := lineInfo(s, dup.pos) 506 | msg := "'" + dup.name + "' is already defined." 507 | err.(*Error).Details = append(err.(*Error).Details, ErrorDetail{ln, col, msg}) 508 | } 509 | } 510 | 511 | // Check missing definitions 512 | for _, r := range data.grammar { 513 | v := &referenceChecker{ 514 | grammar: data.grammar, 515 | params: r.Parameters, 516 | errorPos: make(map[string]int), 517 | errorMsg: make(map[string]string), 518 | } 519 | r.accept(v) 520 | for name, pos := range v.errorPos { 521 | if err == nil { 522 | err = &Error{} 523 | } 524 | ln, col := lineInfo(s, pos) 525 | msg := v.errorMsg[name] 526 | err.(*Error).Details = append(err.(*Error).Details, ErrorDetail{ln, col, msg}) 527 | } 528 | } 529 | 530 | if err != nil { 531 | return nil, err 532 | } 533 | 534 | // Link references 535 | for _, r := range data.grammar { 536 | v := &linkReferences{ 537 | parameters: r.Parameters, 538 | grammar: data.grammar, 539 | } 540 | r.accept(v) 541 | } 542 | 543 | // Check left recursion 544 | for name, r := range data.grammar { 545 | v := &detectLeftRecursion{ 546 | pos: -1, 547 | name: name, 548 | params: r.Parameters, 549 | refs: make(map[string]bool), 550 | done: false, 551 | } 552 | r.accept(v) 553 | if v.pos != -1 { 554 | if err == nil { 555 | err = &Error{} 556 | } 557 | ln, col := lineInfo(s, v.pos) 558 | msg := "'" + name + "' is left recursive." 559 | err.(*Error).Details = append(err.(*Error).Details, ErrorDetail{ln, col, msg}) 560 | } 561 | } 562 | 563 | if err != nil { 564 | return nil, err 565 | } 566 | 567 | // Automatic whitespace skipping 568 | if r, ok := data.grammar[WhitespceRuleName]; ok { 569 | data.grammar[data.start].WhitespaceOpe = Wsp(r) 570 | } 571 | 572 | // Word expression 573 | if r, ok := data.grammar[WordRuleName]; ok { 574 | data.grammar[data.start].WordOpe = r 575 | } 576 | 577 | p = &Parser{ 578 | Grammar: data.grammar, 579 | start: data.start, 580 | } 581 | 582 | // Setup expression parsing 583 | name, info := getExpressionParsingOptions(data.options) 584 | err = EnableExpressionParsing(p, name, info) 585 | 586 | return 587 | } 588 | 589 | func (p *Parser) Parse(s string, d Any) (err error) { 590 | _, err = p.ParseAndGetValue(s, d) 591 | return 592 | } 593 | 594 | func (p *Parser) ParseAndGetValue(s string, d Any) (val Any, err error) { 595 | r := p.Grammar[p.start] 596 | r.TracerEnter = p.TracerEnter 597 | r.TracerLeave = p.TracerLeave 598 | _, val, err = r.Parse(s, d) 599 | return 600 | } 601 | -------------------------------------------------------------------------------- /parser_test.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import ( 4 | "errors" 5 | "strconv" 6 | "strings" 7 | "testing" 8 | ) 9 | 10 | func TestSimpleSyntax(t *testing.T) { 11 | _, err := NewParser(` 12 | ROOT ← _ 13 | _ <- ' ' 14 | `) 15 | if err != nil { 16 | t.Error(err) 17 | } 18 | } 19 | 20 | func TestEmptySyntax(t *testing.T) { 21 | _, err := NewParser("") 22 | if err == nil { 23 | t.Error(err) 24 | } 25 | } 26 | 27 | func assert(t *testing.T, ok bool) { 28 | if ok == false { 29 | t.Error("error...") 30 | } 31 | } 32 | 33 | func TestStringCapture(t *testing.T) { 34 | parser, _ := NewParser(` 35 | ROOT <- _ ('[' TAG_NAME ']' _)* 36 | TAG_NAME <- (!']' .)+ 37 | _ <- [ \t]* 38 | `) 39 | 40 | var tags []string 41 | parser.Grammar["TAG_NAME"].Action = func(sv *Values, d Any) (v Any, err error) { 42 | tags = append(tags, sv.S) 43 | return 44 | } 45 | 46 | assert(t, parser.Parse(" [tag1] [tag:2] [tag-3] ", nil) == nil) 47 | assert(t, len(tags) == 3) 48 | assert(t, tags[0] == "tag1") 49 | assert(t, tags[1] == "tag:2") 50 | assert(t, tags[2] == "tag-3") 51 | } 52 | 53 | /* 54 | TEST_CASE("String capture test with match", "[general]") 55 | { 56 | peg::match m; 57 | auto ret = peg::peg_match( 58 | " ROOT <- _ ('[' $< TAG_NAME > ']' _)* " 59 | " TAG_NAME <- (!']' .)+ " 60 | " _ <- [ \t]* ", 61 | " [tag1] [tag:2] [tag-3] ", 62 | m); 63 | 64 | REQUIRE(ret == true); 65 | REQUIRE(m.size() == 4); 66 | REQUIRE(m.str(1) == "tag1"); 67 | REQUIRE(m.str(2) == "tag:2"); 68 | REQUIRE(m.str(3) == "tag-3"); 69 | } 70 | */ 71 | 72 | func TestStringCapture2(t *testing.T) { 73 | var tags []string 74 | 75 | var ROOT, TAG, TAG_NAME, WS Rule 76 | ROOT.Ope = Seq(&WS, Zom(&TAG)) 77 | TAG.Ope = Seq(Lit("["), &TAG_NAME, Lit("]"), &WS) 78 | TAG_NAME.Ope = Oom(Seq(Npd(Lit("]")), Dot())) 79 | WS.Ope = Zom(Cls(" \t")) 80 | 81 | TAG_NAME.Action = func(sv *Values, d Any) (v Any, err error) { 82 | tags = append(tags, sv.S) 83 | return 84 | } 85 | 86 | _, _, err := ROOT.Parse(" [tag1] [tag:2] [tag-3] ", nil) 87 | assert(t, err == nil) 88 | assert(t, len(tags) == 3) 89 | assert(t, tags[0] == "tag1") 90 | assert(t, tags[1] == "tag:2") 91 | assert(t, tags[2] == "tag-3") 92 | } 93 | 94 | func TestStringCapture3(t *testing.T) { 95 | syntax := ` 96 | ROOT <- _ TOKEN* 97 | TOKEN <- '[' < (!']' .)+ > ']' _ 98 | _ <- [ \t\r\n]* 99 | ` 100 | 101 | parser, _ := NewParser(syntax) 102 | 103 | var tags []string 104 | parser.Grammar["TOKEN"].Action = func(sv *Values, d Any) (v Any, err error) { 105 | tags = append(tags, sv.Token()) 106 | return 107 | } 108 | 109 | assert(t, parser.Parse(" [tag1] [tag:2] [tag-3] ", nil) == nil) 110 | assert(t, len(tags) == 3) 111 | assert(t, tags[0] == "tag1") 112 | assert(t, tags[1] == "tag:2") 113 | assert(t, tags[2] == "tag-3") 114 | } 115 | 116 | /* 117 | TEST_CASE("Named capture test", "[general]") 118 | { 119 | peg::match m; 120 | 121 | auto ret = peg::peg_match( 122 | " ROOT <- _ ('[' $test< TAG_NAME > ']' _)* " 123 | " TAG_NAME <- (!']' .)+ " 124 | " _ <- [ \t]* ", 125 | " [tag1] [tag:2] [tag-3] ", 126 | m); 127 | 128 | auto cap = m.named_capture("test"); 129 | 130 | REQUIRE(ret == true); 131 | REQUIRE(m.size() == 4); 132 | REQUIRE(cap.size() == 3); 133 | REQUIRE(m.str(cap[2]) == "tag-3"); 134 | } 135 | 136 | TEST_CASE("String capture test with embedded match action", "[general]") 137 | { 138 | Definition ROOT, TAG, TAG_NAME, WS; 139 | 140 | vector tags; 141 | 142 | ROOT <= seq(WS, zom(TAG)); 143 | TAG <= seq(chr('['), 144 | cap(TAG_NAME, [&](const char* s, size_t n, size_t id, const std::string& name) { 145 | tags.push_back(string(s, n)); 146 | }), 147 | chr(']'), 148 | WS); 149 | TAG_NAME <= oom(seq(npd(chr(']')), dot())); 150 | WS <= zom(cls(" \t")); 151 | 152 | auto r = ROOT.parse(" [tag1] [tag:2] [tag-3] "); 153 | 154 | REQUIRE(r.ret == true); 155 | REQUIRE(tags.size() == 3); 156 | REQUIRE(tags[0] == "tag1"); 157 | REQUIRE(tags[1] == "tag:2"); 158 | REQUIRE(tags[2] == "tag-3"); 159 | } 160 | */ 161 | 162 | func TestSyclicGrammar(t *testing.T) { 163 | var PARENT, CHILD Rule 164 | PARENT.Ope = Seq(&CHILD) 165 | CHILD.Ope = Seq(&PARENT) 166 | } 167 | 168 | /* 169 | TEST_CASE("Visit test", "[general]") 170 | { 171 | Definition ROOT, TAG, TAG_NAME, WS; 172 | 173 | ROOT <= seq(WS, zom(TAG)); 174 | TAG <= seq(chr('['), TAG_NAME, chr(']'), WS); 175 | TAG_NAME <= oom(seq(npd(chr(']')), dot())); 176 | WS <= zom(cls(" \t")); 177 | 178 | AssignIDToDefinition defIds; 179 | ROOT.accept(defIds); 180 | 181 | REQUIRE(defIds.ids.size() == 4); 182 | } 183 | */ 184 | 185 | func TestTokenCheckTest(t *testing.T) { 186 | parser, _ := NewParser(` 187 | EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* 188 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 189 | FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ 190 | TERM_OPERATOR <- < [-+] > _ 191 | FACTOR_OPERATOR <- < [/*] > _ 192 | NUMBER <- < [0-9]+ > _ 193 | _ <- [ \t\r\n]* 194 | `) 195 | 196 | assert(t, parser.Grammar["EXPRESSION"].isToken() == false) 197 | assert(t, parser.Grammar["FACTOR"].isToken() == false) 198 | assert(t, parser.Grammar["FACTOR_OPERATOR"].isToken() == true) 199 | assert(t, parser.Grammar["NUMBER"].isToken() == true) 200 | assert(t, parser.Grammar["_"].isToken() == true) 201 | } 202 | 203 | func TestLambdaAction(t *testing.T) { 204 | parser, _ := NewParser(` 205 | START <- (CHAR)* 206 | CHAR <- . 207 | `) 208 | 209 | var ss string 210 | parser.Grammar["CHAR"].Action = func(sv *Values, d Any) (v Any, err error) { 211 | ss += sv.S 212 | return 213 | } 214 | 215 | assert(t, parser.Parse("hello", nil) == nil) 216 | assert(t, ss == "hello") 217 | } 218 | 219 | func TestEnterExitHandlers(t *testing.T) { 220 | parser, _ := NewParser(` 221 | START <- LTOKEN '=' RTOKEN 222 | LTOKEN <- TOKEN 223 | RTOKEN <- TOKEN 224 | TOKEN <- [A-Za-z]+ 225 | `) 226 | 227 | parser.Grammar["LTOKEN"].Enter = func(d Any) { 228 | *d.(*bool) = false 229 | } 230 | parser.Grammar["LTOKEN"].Leave = func(d Any) { 231 | *d.(*bool) = true 232 | } 233 | 234 | msg := "should be upper case string..." 235 | 236 | parser.Grammar["TOKEN"].Action = func(sv *Values, d Any) (v Any, err error) { 237 | if *d.(*bool) { 238 | if sv.S != strings.ToUpper(sv.S) { 239 | err = errors.New(msg) 240 | } 241 | } 242 | return 243 | } 244 | 245 | requireUpperCase := false 246 | var d Any = &requireUpperCase 247 | assert(t, parser.Parse("hello=world", d) != nil) 248 | assert(t, parser.Parse("HELLO=world", d) != nil) 249 | assert(t, parser.Parse("hello=WORLD", d) == nil) 250 | assert(t, parser.Parse("HELLO=WORLD", d) == nil) 251 | 252 | var err error 253 | err = parser.Parse("hello=world", d) 254 | pegErr, ok := err.(*Error) 255 | assert(t, ok) 256 | assert(t, pegErr.Details[0].Ln == 1) 257 | assert(t, pegErr.Details[0].Col == 7) 258 | assert(t, pegErr.Details[0].Msg == msg) 259 | } 260 | 261 | func TestWhitespace(t *testing.T) { 262 | parser, _ := NewParser(` 263 | # Rules 264 | ROOT <- ITEM (',' ITEM)* 265 | ITEM <- WORD / PHRASE 266 | 267 | # Tokens 268 | WORD <- < [a-zA-Z0-9_]+ > 269 | PHRASE <- < '"' (!'"' .)* '"' > 270 | 271 | %whitespace <- [ \t\r\n]* 272 | `) 273 | 274 | err := parser.Parse(` one, "two, three", four `, nil) 275 | assert(t, err == nil) 276 | } 277 | 278 | func TestWhitespace2(t *testing.T) { 279 | parser, _ := NewParser(` 280 | # Rules 281 | ROOT <- ITEM (',' ITEM)* 282 | ITEM <- '[' < [a-zA-Z0-9_]+ > ']' 283 | 284 | %whitespace <- (SPACE / TAB)* 285 | SPACE <- ' ' 286 | TAB <- '\t' 287 | `) 288 | 289 | var items []string 290 | parser.Grammar["ITEM"].Action = func(sv *Values, d Any) (v Any, err error) { 291 | items = append(items, sv.Token()) 292 | return 293 | } 294 | 295 | err := parser.Parse(`[one], [two] ,[three] `, nil) 296 | assert(t, err == nil) 297 | assert(t, len(items) == 3) 298 | assert(t, items[0] == "one") 299 | assert(t, items[1] == "two") 300 | assert(t, items[2] == "three") 301 | } 302 | 303 | func TestWordExpression(t *testing.T) { 304 | parser, _ := NewParser(` 305 | ROOT <- 'hello' ','? 'world' 306 | %whitespace <- [ \t\r\n]* 307 | %word <- [a-z]+ 308 | `) 309 | 310 | assert(t, parser.Parse(`helloworld`, nil) != nil) 311 | assert(t, parser.Parse(`hello world`, nil) == nil) 312 | assert(t, parser.Parse(`hello,world`, nil) == nil) 313 | assert(t, parser.Parse(`hello, world`, nil) == nil) 314 | assert(t, parser.Parse(`hello , world`, nil) == nil) 315 | } 316 | 317 | func TestSkipToken(t *testing.T) { 318 | parser, _ := NewParser(` 319 | ROOT <- _ ITEM (',' _ ITEM _)* 320 | ITEM <- ([a-z0-9])+ 321 | ~_ <- [ \t]* 322 | `) 323 | 324 | parser.Grammar["ROOT"].Action = func(sv *Values, d Any) (v Any, err error) { 325 | assert(t, len(sv.Vs) == 2) 326 | return 327 | } 328 | 329 | assert(t, parser.Parse(" item1, item2 ", nil) == nil) 330 | } 331 | 332 | func TestSkipToken2(t *testing.T) { 333 | parser, _ := NewParser(` 334 | ROOT <- ITEM (',' ITEM)* 335 | ITEM <- < ([a-z0-9])+ > 336 | %whitespace <- [ \t]* 337 | `) 338 | 339 | parser.Grammar["ROOT"].Action = func(sv *Values, d Any) (v Any, err error) { 340 | assert(t, len(sv.Vs) == 2) 341 | return 342 | } 343 | 344 | assert(t, parser.Parse(" item1, item2 ", nil) == nil) 345 | } 346 | 347 | /* 348 | TEST_CASE("Backtracking test", "[general]") 349 | { 350 | parser parser( 351 | " START <- PAT1 / PAT2 " 352 | " PAT1 <- HELLO ' One' " 353 | " PAT2 <- HELLO ' Two' " 354 | " HELLO <- 'Hello' " 355 | ); 356 | 357 | size_t count = 0; 358 | parser["HELLO"] = [&](const SemanticValues& sv) { 359 | count++; 360 | }; 361 | 362 | parser.enable_packrat_parsing(); 363 | 364 | bool ret = parser.parse("Hello Two"); 365 | REQUIRE(ret == true); 366 | REQUIRE(count == 1); // Skip second time 367 | } 368 | */ 369 | 370 | func TestBacktrackingWithAst(t *testing.T) { 371 | parser, _ := NewParser(` 372 | S <- A? B (A B)* A 373 | A <- 'a' 374 | B <- 'b' 375 | `) 376 | 377 | parser.EnableAst() 378 | val, err := parser.ParseAndGetValue("ba", nil) 379 | ast := val.(*Ast) 380 | 381 | assert(t, err == nil) 382 | assert(t, len(ast.Nodes) == 2) 383 | } 384 | 385 | func TestOctalHexValue(t *testing.T) { 386 | parser, _ := NewParser(` 387 | ROOT <- '\132\x7a' 388 | `) 389 | 390 | assert(t, parser.Parse("Zz", nil) == nil) 391 | } 392 | 393 | func TestSimpleCalculator(t *testing.T) { 394 | parser, _ := NewParser(` 395 | Additive <- Multitive '+' Additive / Multitive 396 | Multitive <- Primary '*' Multitive / Primary 397 | Primary <- '(' Additive ')' / Number 398 | Number <- [0-9]+ 399 | `) 400 | 401 | parser.Grammar["Additive"].Action = func(sv *Values, d Any) (v Any, err error) { 402 | switch sv.Choice { 403 | case 0: 404 | v = sv.ToInt(0) + sv.ToInt(1) 405 | default: 406 | v = sv.ToInt(0) 407 | } 408 | return 409 | } 410 | 411 | parser.Grammar["Multitive"].Action = func(sv *Values, d Any) (v Any, err error) { 412 | switch sv.Choice { 413 | case 0: 414 | v = sv.ToInt(0) * sv.ToInt(1) 415 | default: 416 | v = sv.ToInt(0) 417 | } 418 | return 419 | } 420 | 421 | parser.Grammar["Number"].Action = func(sv *Values, d Any) (v Any, err error) { 422 | return strconv.Atoi(sv.S) 423 | } 424 | 425 | val, err := parser.ParseAndGetValue("(1+2)*3", nil) 426 | 427 | assert(t, err == nil) 428 | assert(t, val == 9) 429 | } 430 | 431 | func TestCalculator(t *testing.T) { 432 | // Construct grammer 433 | var EXPRESSION, TERM, FACTOR, TERM_OPERATOR, FACTOR_OPERATOR, NUMBER Rule 434 | 435 | EXPRESSION.Ope = Seq(&TERM, Zom(Seq(&TERM_OPERATOR, &TERM))) 436 | TERM.Ope = Seq(&FACTOR, Zom(Seq(&FACTOR_OPERATOR, &FACTOR))) 437 | FACTOR.Ope = Cho(&NUMBER, Seq(Lit("("), &EXPRESSION, Lit(")"))) 438 | TERM_OPERATOR.Ope = Cls("+-") 439 | FACTOR_OPERATOR.Ope = Cls("/*") 440 | NUMBER.Ope = Oom(Cls("0-9")) 441 | 442 | // Setup actions 443 | reduce := func(sv *Values, d Any) (Any, error) { 444 | ret := sv.ToInt(0) 445 | for i := 1; i < len(sv.Vs); i += 2 { 446 | num := sv.ToInt(i + 1) 447 | ope := sv.ToStr(i) 448 | switch ope { 449 | case "+": 450 | ret += num 451 | case "-": 452 | ret -= num 453 | case "*": 454 | ret *= num 455 | case "/": 456 | ret /= num 457 | } 458 | } 459 | return ret, nil 460 | } 461 | 462 | EXPRESSION.Action = reduce 463 | TERM.Action = reduce 464 | TERM_OPERATOR.Action = func(sv *Values, d Any) (v Any, err error) { return sv.S, nil } 465 | FACTOR_OPERATOR.Action = func(sv *Values, d Any) (v Any, err error) { return sv.S, nil } 466 | NUMBER.Action = func(sv *Values, d Any) (v Any, err error) { return strconv.Atoi(sv.S) } 467 | 468 | // Parse 469 | _, val, err := EXPRESSION.Parse("1+2*3*(4-5+6)/7-8", nil) 470 | 471 | assert(t, err == nil) 472 | assert(t, val == -3) 473 | } 474 | 475 | func TestCalculator2(t *testing.T) { 476 | parser, _ := NewParser(` 477 | # Grammar for Calculator... 478 | EXPRESSION <- TERM (TERM_OPERATOR TERM)* 479 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 480 | FACTOR <- NUMBER / '(' EXPRESSION ')' 481 | TERM_OPERATOR <- [-+] 482 | FACTOR_OPERATOR <- [/*] 483 | NUMBER <- [0-9]+ 484 | `) 485 | 486 | // Setup actions 487 | reduce := func(sv *Values, d Any) (Any, error) { 488 | ret := sv.ToInt(0) 489 | for i := 1; i < len(sv.Vs); i += 2 { 490 | num := sv.ToInt(i + 1) 491 | ope := sv.ToStr(i) 492 | switch ope { 493 | case "+": 494 | ret += num 495 | case "-": 496 | ret -= num 497 | case "*": 498 | ret *= num 499 | case "/": 500 | ret /= num 501 | } 502 | } 503 | return ret, nil 504 | } 505 | 506 | g := parser.Grammar 507 | g["EXPRESSION"].Action = reduce 508 | g["TERM"].Action = reduce 509 | g["TERM_OPERATOR"].Action = func(sv *Values, d Any) (Any, error) { return sv.S, nil } 510 | g["FACTOR_OPERATOR"].Action = func(sv *Values, d Any) (Any, error) { return sv.S, nil } 511 | g["NUMBER"].Action = func(sv *Values, d Any) (Any, error) { return strconv.Atoi(sv.S) } 512 | 513 | // Parse 514 | val, err := parser.ParseAndGetValue("1+2*3*(4-5+6)/7-8", nil) 515 | 516 | assert(t, err == nil) 517 | assert(t, val == -3) 518 | } 519 | 520 | func TestCalculator3(t *testing.T) { 521 | parser, _ := NewParser(` 522 | # Grammar for simple calculator... 523 | EXPRESSION <- ATOM (BINOP ATOM)* 524 | ATOM <- NUMBER / '(' EXPRESSION ')' 525 | BINOP <- < [-+/*] > 526 | NUMBER <- < [0-9]+ > 527 | %whitespace <- [ \t]* 528 | --- 529 | # Expression parsing 530 | %expr = EXPRESSION # rule 531 | %binop = L + - # level 1 532 | %binop = L * / # level 2 533 | `) 534 | 535 | // Setup actions 536 | g := parser.Grammar 537 | g["EXPRESSION"].Action = func(v *Values, d Any) (Any, error) { 538 | val := v.ToInt(0) 539 | if v.Len() > 1 { 540 | rhs := v.ToInt(2) 541 | ope := v.ToStr(1) 542 | switch ope { 543 | case "+": 544 | val += rhs 545 | case "-": 546 | val -= rhs 547 | case "*": 548 | val *= rhs 549 | case "/": 550 | val /= rhs 551 | } 552 | } 553 | return val, nil 554 | } 555 | g["BINOP"].Action = func(v *Values, d Any) (Any, error) { 556 | return v.Token(), nil 557 | } 558 | g["NUMBER"].Action = func(v *Values, d Any) (Any, error) { 559 | return strconv.Atoi(v.Token()) 560 | } 561 | 562 | // Parse 563 | val, err := parser.ParseAndGetValue("1+2*3*(4-5+6)/7-8", nil) 564 | 565 | assert(t, err == nil) 566 | assert(t, val == -3) 567 | 568 | val, err = parser.ParseAndGetValue(" 1 + 1 + 1 ", nil) 569 | 570 | assert(t, err == nil) 571 | assert(t, val == 3) 572 | } 573 | 574 | func TestCalculatorTestWithAST(t *testing.T) { 575 | parser, _ := NewParser(` 576 | EXPRESSION <- _ TERM (TERM_OPERATOR TERM)* 577 | TERM <- FACTOR (FACTOR_OPERATOR FACTOR)* 578 | FACTOR <- NUMBER / '(' _ EXPRESSION ')' _ 579 | TERM_OPERATOR <- < [-+] > _ 580 | FACTOR_OPERATOR <- < [/*] > _ 581 | NUMBER <- < [0-9]+ > _ 582 | ~_ <- [ \t\r\n]* 583 | `) 584 | 585 | var eval func(ast *Ast) int 586 | eval = func(ast *Ast) int { 587 | if ast.Name == "NUMBER" { 588 | val, _ := strconv.Atoi(ast.Token) 589 | return val 590 | } else { 591 | nodes := ast.Nodes 592 | result := eval(nodes[0]) 593 | for i := 1; i < len(nodes); i += 2 { 594 | num := eval(nodes[i+1]) 595 | ope := nodes[i].Token[0] 596 | switch ope { 597 | case '+': 598 | result += num 599 | break 600 | case '-': 601 | result -= num 602 | break 603 | case '*': 604 | result *= num 605 | break 606 | case '/': 607 | result /= num 608 | break 609 | } 610 | } 611 | return result 612 | } 613 | } 614 | 615 | parser.EnableAst() 616 | val, err := parser.ParseAndGetValue("1+2*3*(4-5+6)/7-8", nil) 617 | 618 | ast := val.(*Ast) 619 | opt := NewAstOptimizer(nil) 620 | ast = opt.Optimize(ast, nil) 621 | ret := eval(ast) 622 | 623 | assert(t, err == nil) 624 | assert(t, ret == -3) 625 | } 626 | 627 | func TestIgnoreSemanticValue(t *testing.T) { 628 | parser, _ := NewParser(` 629 | START <- ~HELLO WORLD 630 | HELLO <- 'Hello' _ 631 | WORLD <- 'World' _ 632 | _ <- [ \t\r\n]* 633 | `) 634 | 635 | parser.EnableAst() 636 | ast, err := parser.ParseAndGetAst("Hello World", nil) 637 | 638 | assert(t, err == nil) 639 | assert(t, len(ast.Nodes) == 1) 640 | assert(t, ast.Nodes[0].Name == "WORLD") 641 | } 642 | 643 | func TestIgnoreSemanticValueOfORPredicate(t *testing.T) { 644 | parser, _ := NewParser(` 645 | START <- _ !DUMMY HELLO_WORLD '.' 646 | HELLO_WORLD <- HELLO 'World' _ 647 | HELLO <- 'Hello' _ 648 | DUMMY <- 'dummy' _ 649 | ~_ <- [ \t\r\n]* 650 | `) 651 | 652 | parser.EnableAst() 653 | ast, err := parser.ParseAndGetAst("Hello World.", nil) 654 | 655 | assert(t, err == nil) 656 | assert(t, len(ast.Nodes) == 1) 657 | assert(t, ast.Nodes[0].Name == "HELLO_WORLD") 658 | } 659 | 660 | func TestIgnoreSemanticValueOfANDPredicate(t *testing.T) { 661 | parser, _ := NewParser(` 662 | START <- _ &HELLO HELLO_WORLD '.' 663 | HELLO_WORLD <- HELLO 'World' _ 664 | HELLO <- 'Hello' _ 665 | ~_ <- [ \t\r\n]* 666 | `) 667 | 668 | parser.EnableAst() 669 | ast, err := parser.ParseAndGetAst("Hello World.", nil) 670 | 671 | assert(t, err == nil) 672 | assert(t, len(ast.Nodes) == 1) 673 | assert(t, ast.Nodes[0].Name == "HELLO_WORLD") 674 | } 675 | 676 | func TestLiteralTokenOnAst1(t *testing.T) { 677 | parser, _ := NewParser(` 678 | STRING_LITERAL <- '"' (('\\"' / '\\t' / '\\n') / (!["] .))* '"' 679 | `) 680 | 681 | parser.EnableAst() 682 | ast, err := parser.ParseAndGetAst(`"a\tb"`, nil) 683 | 684 | assert(t, err == nil) 685 | assert(t, ast.Token == `"a\tb"`) 686 | assert(t, len(ast.Nodes) == 0) 687 | } 688 | 689 | func TestLiteralTokenOnAst2(t *testing.T) { 690 | parser, _ := NewParser(` 691 | STRING_LITERAL <- '"' (ESC / CHAR)* '"' 692 | ESC <- ('\\"' / '\\t' / '\\n') 693 | CHAR <- (!["] .) 694 | `) 695 | 696 | parser.EnableAst() 697 | ast, err := parser.ParseAndGetAst(`"a\tb"`, nil) 698 | 699 | assert(t, err == nil) 700 | assert(t, ast.Token == "") 701 | assert(t, len(ast.Nodes) == 3) 702 | } 703 | 704 | func TestLiteralTokenOnAst3(t *testing.T) { 705 | parser, _ := NewParser(` 706 | STRING_LITERAL <- < '"' (ESC / CHAR)* '"' > 707 | ESC <- ('\\"' / '\\t' / '\\n') 708 | CHAR <- (!["] .) 709 | `) 710 | 711 | parser.EnableAst() 712 | ast, err := parser.ParseAndGetAst(`"a\tb"`, nil) 713 | 714 | assert(t, err == nil) 715 | assert(t, ast.Token == `"a\tb"`) 716 | assert(t, len(ast.Nodes) == 0) 717 | } 718 | 719 | func TestMissingDefinitions(t *testing.T) { 720 | parser, err := NewParser(` 721 | A <- B C 722 | `) 723 | 724 | assert(t, parser == nil) 725 | assert(t, err != nil) 726 | } 727 | 728 | func TestDefinitionDuplicates(t *testing.T) { 729 | parser, err := NewParser(` 730 | A <- '' 731 | A <- '' 732 | `) 733 | 734 | assert(t, parser == nil) 735 | assert(t, err != nil) 736 | } 737 | 738 | func TestLeftRecursive(t *testing.T) { 739 | parser, err := NewParser(` 740 | A <- A 'a' 741 | B <- A 'a' 742 | `) 743 | 744 | assert(t, parser == nil) 745 | assert(t, err != nil) 746 | } 747 | 748 | func TestLeftRecursiveWithOption(t *testing.T) { 749 | parser, err := NewParser(` 750 | A <- 'a' / 'b'? B 'c' 751 | B <- A 752 | `) 753 | 754 | assert(t, parser == nil) 755 | assert(t, err != nil) 756 | } 757 | 758 | func TestLeftRecursiveWithZom(t *testing.T) { 759 | parser, err := NewParser(` 760 | A <- 'a'* A* 761 | `) 762 | 763 | assert(t, parser == nil) 764 | assert(t, err != nil) 765 | } 766 | 767 | func TestLeftRecursiveWithZOMContentRule(t *testing.T) { 768 | parser, err := NewParser(` 769 | A <- B 770 | B <- _ A 771 | _ <- ' '* # Zero or more 772 | `) 773 | 774 | assert(t, parser == nil) 775 | assert(t, err != nil) 776 | } 777 | 778 | func TestLeftRecursiveWithEmptyString(t *testing.T) { 779 | parser, err := NewParser(` 780 | " A <- '' A" 781 | `) 782 | 783 | assert(t, parser == nil) 784 | assert(t, err != nil) 785 | } 786 | 787 | func TestUserRule(t *testing.T) { 788 | syntax := " ROOT <- _ 'Hello' _ NAME '!' _ " 789 | 790 | rules := map[string]operator{ 791 | "NAME": Usr(func(s string, p int, sv *Values, d Any) int { 792 | names := []string{"PEG", "BNF"} 793 | for _, name := range names { 794 | if len(name) <= len(s)-p && name == s[p:p+len(name)] { 795 | return len(name) 796 | } 797 | } 798 | return -1 799 | }), 800 | "~_": Zom(Cls(" \t\r\n")), 801 | } 802 | 803 | parser, err := NewParserWithUserRules(syntax, rules) 804 | assert(t, err == nil) 805 | assert(t, parser.Parse(" Hello BNF! ", nil) == nil) 806 | } 807 | 808 | func TestSemanticPredicate(t *testing.T) { 809 | parser, _ := NewParser("NUMBER <- [0-9]+") 810 | 811 | parser.Grammar["NUMBER"].Action = func(sv *Values, d Any) (val Any, err error) { 812 | val, _ = strconv.Atoi(sv.S) 813 | if val != 100 { 814 | err = errors.New("value error!!") 815 | } 816 | return 817 | } 818 | 819 | val, err := parser.ParseAndGetValue("100", nil) 820 | assert(t, err == nil) 821 | assert(t, val == 100) 822 | 823 | val, err = parser.ParseAndGetValue("200", nil) 824 | assert(t, err != nil) 825 | } 826 | 827 | func TestJapaneseCharacter(t *testing.T) { 828 | parser, _ := NewParser(` 829 | 文 <- 修飾語? 主語 述語 '。' 830 | 主語 <- 名詞 助詞 831 | 述語 <- 動詞 助詞 832 | 修飾語 <- 形容詞 833 | 名詞 <- 'サーバー' / 'クライアント' 834 | 形容詞 <- '古い' / '新しい' 835 | 動詞 <- '落ち' / '復旧し' 836 | 助詞 <- 'が' / 'を' / 'た' / 'ます' / 'に' 837 | `) 838 | 839 | assert(t, parser.Parse("サーバーを復旧します。", nil) == nil) 840 | } 841 | 842 | func TestLineInformation(t *testing.T) { 843 | parser, err := NewParser(` 844 | S <- _ (WORD _)+ 845 | WORD <- [A-Za-z]+ 846 | ~_ <- [ \t\r\n]+ 847 | `) 848 | 849 | type LineInfo struct { 850 | Ln int 851 | Col int 852 | } 853 | var locations []LineInfo 854 | 855 | parser.Grammar["WORD"].Action = func(sv *Values, d Any) (val Any, err error) { 856 | ln, col := lineInfo(sv.SS, sv.Pos) 857 | locations = append(locations, LineInfo{ln, col}) 858 | return 859 | } 860 | 861 | assert(t, err == nil) 862 | assert(t, parser.Parse(" Mon Tue Wed \nThu Fri Sat\nSun\n", nil) == nil) 863 | 864 | assert(t, locations[0] == LineInfo{1, 2}) 865 | assert(t, locations[1] == LineInfo{1, 6}) 866 | assert(t, locations[2] == LineInfo{1, 10}) 867 | assert(t, locations[3] == LineInfo{2, 1}) 868 | assert(t, locations[4] == LineInfo{2, 6}) 869 | assert(t, locations[5] == LineInfo{2, 11}) 870 | assert(t, locations[6] == LineInfo{3, 1}) 871 | } 872 | 873 | func TestMacroSimple(t *testing.T) { 874 | parser, err := NewParser(` 875 | S <- HELLO WORLD 876 | HELLO <- T('hello') 877 | WORLD <- T('world') 878 | T(a) <- a [ \t]* 879 | `) 880 | 881 | assert(t, err == nil) 882 | assert(t, parser.Parse("hello \tworld ", nil) == nil) 883 | } 884 | 885 | func TestMacroTwoParameters(t *testing.T) { 886 | parser, err := NewParser(` 887 | S <- HELLO_WORLD 888 | HELLO_WORLD <- T('hello', 'world') 889 | T(a, b) <- a [ \t]* b [ \t]* 890 | `) 891 | 892 | assert(t, err == nil) 893 | assert(t, parser.Parse("hello \tworld ", nil) == nil) 894 | } 895 | 896 | func TestMacroSyntaxError(t *testing.T) { 897 | _, err := NewParser(` 898 | S <- T('hello') 899 | T (a) <- a [ \t]* 900 | `) 901 | 902 | assert(t, err != nil) 903 | } 904 | 905 | func TestMacroMissingArgument(t *testing.T) { 906 | _, err := NewParser(` 907 | S <- T ('hello') 908 | T(a, b) <- a [ \t]* b 909 | `) 910 | 911 | assert(t, err != nil) 912 | } 913 | 914 | func TestMacroReferenceSyntaxError(t *testing.T) { 915 | _, err := NewParser(` 916 | S <- T ('hello') 917 | T(a) <- a [ \t]* 918 | `) 919 | 920 | assert(t, err != nil) 921 | } 922 | 923 | func TestInvalidMacroReferenceError(t *testing.T) { 924 | _, err := NewParser(` 925 | S <- T('hello') 926 | T <- 'world' 927 | `) 928 | 929 | assert(t, err != nil) 930 | } 931 | 932 | func TestMacroCalculator(t *testing.T) { 933 | // Create a PEG parser 934 | parser, _ := NewParser(` 935 | # Grammar for simple calculator... 936 | EXPRESSION <- _ LIST(TERM, TERM_OPERATOR) 937 | TERM <- LIST(FACTOR, FACTOR_OPERATOR) 938 | FACTOR <- NUMBER / T('(') EXPRESSION T(')') 939 | TERM_OPERATOR <- T([-+]) 940 | FACTOR_OPERATOR <- T([/*]) 941 | NUMBER <- T([0-9]+) 942 | ~_ <- [ \t]* 943 | LIST(I, D) <- I (D I)* 944 | T(S) <- < S > _ 945 | `) 946 | 947 | // Setup actions 948 | reduce := func(v *Values, d Any) (Any, error) { 949 | val := v.ToInt(0) 950 | for i := 1; i < len(v.Vs); i += 2 { 951 | num := v.ToInt(i + 1) 952 | switch v.ToStr(i) { 953 | case "+": 954 | val += num 955 | case "-": 956 | val -= num 957 | case "*": 958 | val *= num 959 | case "/": 960 | val /= num 961 | } 962 | } 963 | return val, nil 964 | } 965 | 966 | g := parser.Grammar 967 | g["EXPRESSION"].Action = reduce 968 | g["TERM"].Action = reduce 969 | g["TERM_OPERATOR"].Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil } 970 | g["FACTOR_OPERATOR"].Action = func(v *Values, d Any) (Any, error) { return v.Token(), nil } 971 | g["NUMBER"].Action = func(v *Values, d Any) (Any, error) { return strconv.Atoi(v.Token()) } 972 | 973 | input := " 1 + 2 * 3 * (4 - 5 + 6) / 7 - 8 " 974 | val, err := parser.ParseAndGetValue(input, nil) 975 | 976 | assert(t, err == nil) 977 | assert(t, val == -3) 978 | } 979 | 980 | func TestMacroExpressionArguments(t *testing.T) { 981 | parser, err := NewParser(` 982 | S <- M('hello' / 'Hello', 'world' / 'World') 983 | M(arg0, arg1) <- arg0 [ \t]+ arg1 984 | `) 985 | 986 | assert(t, err == nil) 987 | assert(t, parser.Parse("Hello world", nil) == nil) 988 | } 989 | 990 | func TestMacroRecursive(t *testing.T) { 991 | parser, err := NewParser(` 992 | S <- M('abc') 993 | M(s) <- !s / s ' ' M(s / '123') / s 994 | `) 995 | 996 | assert(t, err == nil) 997 | assert(t, parser.Parse("", nil) == nil) 998 | assert(t, parser.Parse("abc", nil) == nil) 999 | assert(t, parser.Parse("abc abc", nil) == nil) 1000 | assert(t, parser.Parse("abc 123 abc", nil) == nil) 1001 | } 1002 | 1003 | func TestMacroRecursive2(t *testing.T) { 1004 | syntaxes := []string{ 1005 | `S <- M('abc') M(s) <- !s / s ' ' M(s* '-' '123') / s`, 1006 | `S <- M('abc') M(s) <- !s / s ' ' M(s+ '-' '123') / s`, 1007 | `S <- M('abc') M(s) <- !s / s ' ' M(s? '-' '123') / s`, 1008 | `S <- M('abc') M(s) <- !s / s ' ' M(&s s+ '-' '123') / s`, 1009 | `S <- M('abc') M(s) <- !s / s ' ' M(s '-' !s '123') / s`, 1010 | `S <- M('abc') M(s) <- !s / s ' ' M(< s > '-' '123') / s`, 1011 | `S <- M('abc') M(s) <- !s / s ' ' M(~s '-' '123') / s`, 1012 | } 1013 | for _, syntax := range syntaxes { 1014 | parser, err := NewParser(syntax) 1015 | assert(t, err == nil) 1016 | assert(t, parser.Parse("abc abc-123", nil) == nil) 1017 | } 1018 | } 1019 | 1020 | func TestMacroExclusiveModifiers(t *testing.T) { 1021 | parser, err := NewParser(` 1022 | S <- Modifiers(!"") _ 1023 | Modifiers(Appeared) <- (!Appeared) ( 1024 | Token('public') Modifiers(Appeared / 'public') / 1025 | Token('static') Modifiers(Appeared / 'static') / 1026 | Token('final') Modifiers(Appeared / 'final') / 1027 | "") 1028 | Token(t) <- t _ 1029 | _ <- [ \t\r\n]* 1030 | `) 1031 | 1032 | assert(t, err == nil) 1033 | assert(t, parser.Parse("public", nil) == nil) 1034 | assert(t, parser.Parse("static", nil) == nil) 1035 | assert(t, parser.Parse("final", nil) == nil) 1036 | assert(t, parser.Parse("public static final", nil) == nil) 1037 | assert(t, parser.Parse("public public", nil) != nil) 1038 | assert(t, parser.Parse("public static public", nil) != nil) 1039 | } 1040 | 1041 | func match(t *testing.T, r *Rule, s string, want bool) { 1042 | l, _, err := r.Parse(s, newData()) 1043 | ok := err == nil 1044 | if ok != want { 1045 | t.Errorf("syntax error: %d", l) 1046 | } 1047 | } 1048 | 1049 | func TestPegGrammar(t *testing.T) { 1050 | match(t, &rStart, " Definition <- a / ( b c ) / d \n rule2 <- [a-zA-Z][a-z0-9-]+ ", true) 1051 | } 1052 | 1053 | func TestPegDefinition(t *testing.T) { 1054 | match(t, &rDefinition, "Definition <- a / (b c) / d ", true) 1055 | match(t, &rDefinition, "Definition <- a / b c / d ", true) 1056 | match(t, &rDefinition, "Definition ← a ", true) 1057 | match(t, &rDefinition, "Definition ", false) 1058 | match(t, &rDefinition, " ", false) 1059 | match(t, &rDefinition, "", false) 1060 | match(t, &rDefinition, "Definition = a / (b c) / d ", false) 1061 | match(t, &rDefinition, "Macro(param) <- a ", true) 1062 | match(t, &rDefinition, "Macro (param) <- a ", false) 1063 | } 1064 | 1065 | func TestPegExpression(t *testing.T) { 1066 | match(t, &rExpression, "a / (b c) / d ", true) 1067 | match(t, &rExpression, "a / b c / d ", true) 1068 | match(t, &rExpression, "a b ", true) 1069 | match(t, &rExpression, "", true) 1070 | match(t, &rExpression, " ", false) 1071 | match(t, &rExpression, " a b ", false) 1072 | } 1073 | 1074 | func TestPegSequence(t *testing.T) { 1075 | match(t, &rSequence, "a b c d ", true) 1076 | match(t, &rSequence, "", true) 1077 | match(t, &rSequence, "!", false) 1078 | match(t, &rSequence, "<-", false) 1079 | match(t, &rSequence, " a", false) 1080 | } 1081 | 1082 | func TestPegPrefix(t *testing.T) { 1083 | match(t, &rPrefix, "&[a]", true) 1084 | match(t, &rPrefix, "![']", true) 1085 | match(t, &rPrefix, "-[']", false) 1086 | match(t, &rPrefix, "", false) 1087 | match(t, &rPrefix, " a", false) 1088 | } 1089 | 1090 | func TestPegSuffix(t *testing.T) { 1091 | match(t, &rSuffix, "aaa ", true) 1092 | match(t, &rSuffix, "aaa? ", true) 1093 | match(t, &rSuffix, "aaa* ", true) 1094 | match(t, &rSuffix, "aaa+ ", true) 1095 | match(t, &rSuffix, ". + ", true) 1096 | match(t, &rSuffix, "?", false) 1097 | match(t, &rSuffix, "", false) 1098 | match(t, &rPrefix, " a", false) 1099 | } 1100 | 1101 | func TestPegPrimary(t *testing.T) { 1102 | match(t, &rPrimary, "_Identifier0_ ", true) 1103 | match(t, &rPrimary, "_Identifier0_<-", false) 1104 | match(t, &rPrimary, "( _Identifier0_ _Identifier1_ )", true) 1105 | match(t, &rPrimary, "'Literal String'", true) 1106 | match(t, &rPrimary, "\"Literal String\"", true) 1107 | match(t, &rPrimary, "[a-zA-Z]", true) 1108 | match(t, &rPrimary, ".", true) 1109 | match(t, &rPrimary, "", false) 1110 | match(t, &rPrimary, " ", false) 1111 | match(t, &rPrimary, " a", false) 1112 | match(t, &rPrimary, "", false) 1113 | } 1114 | 1115 | func TestPegIdentifier(t *testing.T) { 1116 | match(t, &rIdentifier, "_Identifier0_ ", true) 1117 | match(t, &rIdentifier, "0Identifier_ ", false) 1118 | match(t, &rIdentifier, "Iden|t ", false) 1119 | match(t, &rIdentifier, " ", false) 1120 | match(t, &rIdentifier, " a", false) 1121 | match(t, &rIdentifier, "", false) 1122 | } 1123 | 1124 | func TestPegIdentStart(t *testing.T) { 1125 | match(t, &rIdentStart, "_", true) 1126 | match(t, &rIdentStart, "a", true) 1127 | match(t, &rIdentStart, "Z", true) 1128 | match(t, &rIdentStart, "", false) 1129 | match(t, &rIdentStart, " ", false) 1130 | match(t, &rIdentStart, "0", false) 1131 | } 1132 | 1133 | func TestPegIdentRest(t *testing.T) { 1134 | match(t, &rIdentRest, "_", true) 1135 | match(t, &rIdentRest, "a", true) 1136 | match(t, &rIdentRest, "Z", true) 1137 | match(t, &rIdentRest, "", false) 1138 | match(t, &rIdentRest, " ", false) 1139 | match(t, &rIdentRest, "0", true) 1140 | } 1141 | 1142 | func TestPegLiteral(t *testing.T) { 1143 | match(t, &rLiteral, "'abc' ", true) 1144 | match(t, &rLiteral, "'a\\nb\\tc' ", true) 1145 | match(t, &rLiteral, "'a\\277\tc' ", true) 1146 | match(t, &rLiteral, "'a\\77\tc' ", true) 1147 | match(t, &rLiteral, "'a\\80\tc' ", false) 1148 | match(t, &rLiteral, "'\n' ", true) 1149 | match(t, &rLiteral, "'a\\'b' ", true) 1150 | match(t, &rLiteral, "'a'b' ", false) 1151 | match(t, &rLiteral, "'a\"'b' ", false) 1152 | match(t, &rLiteral, "\"'\\\"abc\\\"'\" ", true) 1153 | match(t, &rLiteral, "\"'\"abc\"'\" ", false) 1154 | match(t, &rLiteral, "abc", false) 1155 | match(t, &rLiteral, "", false) 1156 | match(t, &rLiteral, "日本語", false) 1157 | } 1158 | 1159 | func TestPegClass(t *testing.T) { 1160 | match(t, &rClass, "[]", true) 1161 | match(t, &rClass, "[a]", true) 1162 | match(t, &rClass, "[a-z]", true) 1163 | match(t, &rClass, "[az]", true) 1164 | match(t, &rClass, "[a-zA-Z-]", true) 1165 | match(t, &rClass, "[a-zA-Z-0-9]", true) 1166 | match(t, &rClass, "[a-]", false) 1167 | match(t, &rClass, "[-a]", true) 1168 | match(t, &rClass, "[", false) 1169 | match(t, &rClass, "[a", false) 1170 | match(t, &rClass, "]", false) 1171 | match(t, &rClass, "a]", false) 1172 | match(t, &rClass, "あ-ん", false) 1173 | match(t, &rClass, "[-+]", true) 1174 | match(t, &rClass, "[+-]", false) 1175 | } 1176 | 1177 | func TestPegRange(t *testing.T) { 1178 | match(t, &rRange, "a", true) 1179 | match(t, &rRange, "a-z", true) 1180 | match(t, &rRange, "az", false) 1181 | match(t, &rRange, "", false) 1182 | match(t, &rRange, "a-", false) 1183 | match(t, &rRange, "-a", false) 1184 | } 1185 | 1186 | func TestPegChar(t *testing.T) { 1187 | match(t, &rChar, "\\n", true) 1188 | match(t, &rChar, "\\r", true) 1189 | match(t, &rChar, "\\t", true) 1190 | match(t, &rChar, "\\f", true) 1191 | match(t, &rChar, "\\v", true) 1192 | match(t, &rChar, "\\'", true) 1193 | match(t, &rChar, "\\\"", true) 1194 | match(t, &rChar, "\\[", true) 1195 | match(t, &rChar, "\\]", true) 1196 | match(t, &rChar, "\\\\", true) 1197 | match(t, &rChar, "\\000", true) 1198 | match(t, &rChar, "\\377", true) 1199 | match(t, &rChar, "\\477", false) 1200 | match(t, &rChar, "\\087", false) 1201 | match(t, &rChar, "\\079", false) 1202 | match(t, &rChar, "\\00", true) 1203 | match(t, &rChar, "\\77", true) 1204 | match(t, &rChar, "\\80", false) 1205 | match(t, &rChar, "\\08", false) 1206 | match(t, &rChar, "\\0", true) 1207 | match(t, &rChar, "\\7", true) 1208 | match(t, &rChar, "\\8", false) 1209 | match(t, &rChar, "a", true) 1210 | match(t, &rChar, ".", true) 1211 | match(t, &rChar, "0", true) 1212 | match(t, &rChar, "\\", false) 1213 | match(t, &rChar, " ", true) 1214 | match(t, &rChar, " ", false) 1215 | match(t, &rChar, "", false) 1216 | match(t, &rChar, "あ", false) 1217 | } 1218 | 1219 | func TestPegOperators(t *testing.T) { 1220 | match(t, &rLEFTARROW, "<-", true) 1221 | match(t, &rSLASH, "/ ", true) 1222 | match(t, &rAND, "& ", true) 1223 | match(t, &rNOT, "! ", true) 1224 | match(t, &rQUESTION, "? ", true) 1225 | match(t, &rSTAR, "* ", true) 1226 | match(t, &rPLUS, "+ ", true) 1227 | match(t, &rOPEN, "( ", true) 1228 | match(t, &rCLOSE, ") ", true) 1229 | match(t, &rDOT, ". ", true) 1230 | } 1231 | 1232 | func TestPegComment(t *testing.T) { 1233 | match(t, &rComment, "# Comment.\n", true) 1234 | match(t, &rComment, "# Comment.", false) 1235 | match(t, &rComment, " ", false) 1236 | match(t, &rComment, "a", false) 1237 | } 1238 | 1239 | func TestPegSpace(t *testing.T) { 1240 | match(t, &rSpace, " ", true) 1241 | match(t, &rSpace, "\t", true) 1242 | match(t, &rSpace, "\n", true) 1243 | match(t, &rSpace, "", false) 1244 | match(t, &rSpace, "a", false) 1245 | } 1246 | 1247 | func TestPegEndOfLine(t *testing.T) { 1248 | match(t, &rEndOfLine, "\r\n", true) 1249 | match(t, &rEndOfLine, "\n", true) 1250 | match(t, &rEndOfLine, "\r", true) 1251 | match(t, &rEndOfLine, " ", false) 1252 | match(t, &rEndOfLine, "", false) 1253 | match(t, &rEndOfLine, "a", false) 1254 | } 1255 | 1256 | func TestPegEndOfFile(t *testing.T) { 1257 | match(t, &rEndOfFile, "", true) 1258 | match(t, &rEndOfFile, " ", false) 1259 | } 1260 | -------------------------------------------------------------------------------- /rule.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | import "fmt" 4 | 5 | // Error detail 6 | type ErrorDetail struct { 7 | Ln int 8 | Col int 9 | Msg string 10 | } 11 | 12 | func (d ErrorDetail) String() string { 13 | return fmt.Sprintf("%d:%d %s", d.Ln, d.Col, d.Msg) 14 | } 15 | 16 | // Error 17 | type Error struct { 18 | Details []ErrorDetail 19 | } 20 | 21 | func (e *Error) Error() string { 22 | d := e.Details[0] 23 | return fmt.Sprintf("%d:%d %s", d.Ln, d.Col, d.Msg) 24 | } 25 | 26 | // Action 27 | type Action func(v *Values, d Any) (Any, error) 28 | 29 | // Rule 30 | type Rule struct { 31 | Name string 32 | SS string 33 | Pos int 34 | Ope operator 35 | Action Action 36 | Enter func(d Any) 37 | Leave func(d Any) 38 | Message func() (message string) 39 | Ignore bool 40 | WhitespaceOpe operator 41 | WordOpe operator 42 | 43 | Parameters []string 44 | 45 | TracerEnter func(name string, s string, v *Values, d Any, p int) 46 | TracerLeave func(name string, s string, v *Values, d Any, p int, l int) 47 | 48 | tokenChecker *tokenChecker 49 | disableAction bool 50 | } 51 | 52 | func (r *Rule) Parse(s string, d Any) (l int, val Any, err error) { 53 | v := &Values{} 54 | c := &context{ 55 | s: s, 56 | errorPos: -1, 57 | messagePos: -1, 58 | whitespaceOpe: r.WhitespaceOpe, 59 | wordOpe: r.WordOpe, 60 | tracerEnter: r.TracerEnter, 61 | tracerLeave: r.TracerLeave, 62 | } 63 | 64 | var ope operator = r 65 | if r.WhitespaceOpe != nil { 66 | ope = Seq(r.WhitespaceOpe, r) // Skip whitespace at beginning 67 | } 68 | 69 | l = ope.parse(s, 0, v, c, d) 70 | 71 | if success(l) && len(v.Vs) > 0 && v.Vs[0] != nil { 72 | val = v.Vs[0] 73 | } 74 | 75 | if fail(l) || l != len(s) { 76 | var pos int 77 | var msg string 78 | if fail(l) { 79 | if c.messagePos > -1 { 80 | pos = c.messagePos 81 | msg = c.message 82 | } else { 83 | msg = "syntax error" 84 | pos = c.errorPos 85 | } 86 | } else { 87 | msg = "not exact match" 88 | pos = l 89 | } 90 | ln, col := lineInfo(s, pos) 91 | err = &Error{} 92 | err.(*Error).Details = append(err.(*Error).Details, ErrorDetail{ln, col, msg}) 93 | } 94 | 95 | return 96 | } 97 | 98 | func (o *Rule) Label() string { 99 | return fmt.Sprintf("[%s]", o.Name) 100 | } 101 | 102 | func (o *Rule) parse(s string, p int, v *Values, c *context, d Any) int { 103 | return parse(o, s, p, v, c, d) 104 | } 105 | 106 | func (r *Rule) parseCore(s string, p int, v *Values, c *context, d Any) int { 107 | // Macro reference 108 | if r.Parameters != nil { 109 | return r.Ope.parse(s, p, v, c, d) 110 | } 111 | 112 | if r.Enter != nil { 113 | r.Enter(d) 114 | } 115 | 116 | chv := c.push() 117 | 118 | l := r.Ope.parse(s, p, chv, c, d) 119 | 120 | // Invoke action 121 | var val Any 122 | 123 | if success(l) { 124 | if r.Action != nil && !r.disableAction { 125 | chv.S = s[p : p+l] 126 | chv.Pos = p 127 | 128 | var err error 129 | if val, err = r.Action(chv, d); err != nil { 130 | if c.messagePos < p { 131 | c.messagePos = p 132 | c.message = err.Error() 133 | } 134 | l = -1 135 | } 136 | } else if len(chv.Vs) > 0 { 137 | val = chv.Vs[0] 138 | } 139 | } 140 | 141 | if success(l) { 142 | if r.Ignore == false { 143 | v.Vs = append(v.Vs, val) 144 | } 145 | } else { 146 | if r.Message != nil { 147 | if c.messagePos < p { 148 | c.messagePos = p 149 | c.message = r.Message() 150 | } 151 | } 152 | } 153 | 154 | c.pop() 155 | 156 | if r.Leave != nil { 157 | r.Leave(d) 158 | } 159 | 160 | return l 161 | } 162 | 163 | func (r *Rule) accept(v visitor) { 164 | v.visitRule(r) 165 | } 166 | 167 | func (r *Rule) isToken() bool { 168 | if r.tokenChecker == nil { 169 | r.tokenChecker = &tokenChecker{} 170 | r.Ope.accept(r.tokenChecker) 171 | } 172 | return r.tokenChecker.isToken() 173 | } 174 | 175 | // lineInfo 176 | func lineInfo(s string, curPos int) (ln int, col int) { 177 | pos := 0 178 | colStartPos := 0 179 | ln = 1 180 | 181 | for pos < curPos { 182 | if s[pos] == '\n' { 183 | ln++ 184 | colStartPos = pos + 1 185 | } 186 | pos++ 187 | } 188 | 189 | col = pos - colStartPos + 1 190 | return 191 | } 192 | -------------------------------------------------------------------------------- /visitor.go: -------------------------------------------------------------------------------- 1 | package peg 2 | 3 | // visitor 4 | type visitor interface { 5 | visitSequence(ope *sequence) 6 | visitPrioritizedChoice(ope *prioritizedChoice) 7 | visitZeroOrMore(ope *zeroOrMore) 8 | visitOneOrMore(ope *oneOrMore) 9 | visitOption(ope *option) 10 | visitAndPredicate(ope *andPredicate) 11 | visitNotPredicate(ope *notPredicate) 12 | visitLiteralString(ope *literalString) 13 | visitCharacterClass(ope *characterClass) 14 | visitAnyCharacter(ope *anyCharacter) 15 | visitTokenBoundary(ope *tokenBoundary) 16 | visitIgnore(ope *ignore) 17 | visitUser(ope *user) 18 | visitReference(ope *reference) 19 | visitRule(ope *Rule) 20 | visitWhitespace(ope *whitespace) 21 | visitExpression(ope *expression) 22 | } 23 | 24 | // visitorBase 25 | type visitorBase struct { 26 | } 27 | 28 | func (v *visitorBase) visitSequence(ope *sequence) {} 29 | func (v *visitorBase) visitPrioritizedChoice(ope *prioritizedChoice) {} 30 | func (v *visitorBase) visitZeroOrMore(ope *zeroOrMore) {} 31 | func (v *visitorBase) visitOneOrMore(ope *oneOrMore) {} 32 | func (v *visitorBase) visitOption(ope *option) {} 33 | func (v *visitorBase) visitAndPredicate(ope *andPredicate) {} 34 | func (v *visitorBase) visitNotPredicate(ope *notPredicate) {} 35 | func (v *visitorBase) visitLiteralString(ope *literalString) {} 36 | func (v *visitorBase) visitCharacterClass(ope *characterClass) {} 37 | func (v *visitorBase) visitAnyCharacter(ope *anyCharacter) {} 38 | func (v *visitorBase) visitTokenBoundary(ope *tokenBoundary) {} 39 | func (v *visitorBase) visitIgnore(ope *ignore) {} 40 | func (v *visitorBase) visitUser(ope *user) {} 41 | func (v *visitorBase) visitReference(ope *reference) {} 42 | func (v *visitorBase) visitRule(ope *Rule) {} 43 | func (v *visitorBase) visitWhitespace(ope *whitespace) {} 44 | func (v *visitorBase) visitExpression(ope *expression) {} 45 | 46 | // tokenChecker 47 | type tokenChecker struct { 48 | *visitorBase 49 | hasTokenBoundary bool 50 | hasRule bool 51 | } 52 | 53 | func (v *tokenChecker) visitSequence(ope *sequence) { 54 | for _, o := range ope.opes { 55 | o.accept(v) 56 | } 57 | } 58 | func (v *tokenChecker) visitPrioritizedChoice(ope *prioritizedChoice) { 59 | for _, o := range ope.opes { 60 | o.accept(v) 61 | } 62 | } 63 | func (v *tokenChecker) visitZeroOrMore(ope *zeroOrMore) { ope.ope.accept(v) } 64 | func (v *tokenChecker) visitOneOrMore(ope *oneOrMore) { ope.ope.accept(v) } 65 | func (v *tokenChecker) visitOption(ope *option) { ope.ope.accept(v) } 66 | func (v *tokenChecker) visitTokenBoundary(ope *tokenBoundary) { v.hasTokenBoundary = true } 67 | func (v *tokenChecker) visitIgnore(ope *ignore) { ope.ope.accept(v) } 68 | func (v *tokenChecker) visitReference(ope *reference) { 69 | if ope.args != nil { 70 | ope.rule.accept(v) 71 | for _, arg := range ope.args { 72 | arg.accept(v) 73 | } 74 | } else { 75 | v.hasRule = true 76 | } 77 | } 78 | func (v *tokenChecker) visitWhitespace(ope *whitespace) { ope.ope.accept(v) } 79 | func (v *tokenChecker) visitExpression(ope *expression) { ope.atom.accept(v) } 80 | 81 | func (v *tokenChecker) isToken() bool { 82 | return v.hasTokenBoundary || !v.hasRule 83 | } 84 | 85 | // detectLeftRecursion 86 | type detectLeftRecursion struct { 87 | *visitorBase 88 | pos int 89 | name string 90 | params []string 91 | refs map[string]bool 92 | done bool 93 | } 94 | 95 | func (v *detectLeftRecursion) visitSequence(ope *sequence) { 96 | for _, o := range ope.opes { 97 | o.accept(v) 98 | if v.done { 99 | break 100 | } else if v.pos != -1 { 101 | v.done = true 102 | break 103 | } 104 | } 105 | } 106 | func (v *detectLeftRecursion) visitPrioritizedChoice(ope *prioritizedChoice) { 107 | for _, o := range ope.opes { 108 | o.accept(v) 109 | if v.pos != -1 { 110 | v.done = true 111 | break 112 | } 113 | } 114 | } 115 | func (v *detectLeftRecursion) visitZeroOrMore(ope *zeroOrMore) { ope.ope.accept(v); v.done = false } 116 | func (v *detectLeftRecursion) visitOneOrMore(ope *oneOrMore) { ope.ope.accept(v); v.done = true } 117 | func (v *detectLeftRecursion) visitOption(ope *option) { ope.ope.accept(v); v.done = false } 118 | func (v *detectLeftRecursion) visitAndPredicate(ope *andPredicate) { ope.ope.accept(v); v.done = false } 119 | func (v *detectLeftRecursion) visitNotPredicate(ope *notPredicate) { ope.ope.accept(v); v.done = false } 120 | func (v *detectLeftRecursion) visitLiteralString(ope *literalString) { v.done = len(ope.lit) > 0 } 121 | func (v *detectLeftRecursion) visitCharacterClass(ope *characterClass) { v.done = true } 122 | func (v *detectLeftRecursion) visitAnyCharacter(ope *anyCharacter) { v.done = true } 123 | func (v *detectLeftRecursion) visitTokenBoundary(ope *tokenBoundary) { ope.ope.accept(v) } 124 | func (v *detectLeftRecursion) visitIgnore(ope *ignore) { ope.ope.accept(v) } 125 | func (v *detectLeftRecursion) visitReference(ope *reference) { 126 | if ope.name == v.name { 127 | v.pos = ope.pos 128 | } else if _, ok := v.refs[ope.name]; !ok { 129 | v.refs[ope.name] = true 130 | if ope.rule != nil { 131 | ope.rule.accept(v) 132 | if v.done == false { 133 | return 134 | } 135 | } 136 | } 137 | v.done = true 138 | } 139 | func (v *detectLeftRecursion) visitRule(ope *Rule) { ope.Ope.accept(v) } 140 | func (v *detectLeftRecursion) visitWhitespace(ope *whitespace) { ope.ope.accept(v) } 141 | func (v *detectLeftRecursion) visitExpression(ope *expression) { ope.atom.accept(v) } 142 | 143 | // referenceChecker 144 | type referenceChecker struct { 145 | *visitorBase 146 | grammar map[string]*Rule 147 | params []string 148 | errorPos map[string]int 149 | errorMsg map[string]string 150 | } 151 | 152 | func (v *referenceChecker) visitSequence(ope *sequence) { 153 | for _, o := range ope.opes { 154 | o.accept(v) 155 | } 156 | } 157 | func (v *referenceChecker) visitPrioritizedChoice(ope *prioritizedChoice) { 158 | for _, o := range ope.opes { 159 | o.accept(v) 160 | } 161 | } 162 | func (v *referenceChecker) visitZeroOrMore(ope *zeroOrMore) { ope.ope.accept(v) } 163 | func (v *referenceChecker) visitOneOrMore(ope *oneOrMore) { ope.ope.accept(v) } 164 | func (v *referenceChecker) visitOption(ope *option) { ope.ope.accept(v) } 165 | func (v *referenceChecker) visitAndPredicate(ope *andPredicate) { ope.ope.accept(v) } 166 | func (v *referenceChecker) visitNotPredicate(ope *notPredicate) { ope.ope.accept(v) } 167 | func (v *referenceChecker) visitTokenBoundary(ope *tokenBoundary) { ope.ope.accept(v) } 168 | func (v *referenceChecker) visitIgnore(ope *ignore) { ope.ope.accept(v) } 169 | func (v *referenceChecker) visitReference(ope *reference) { 170 | for _, param := range v.params { 171 | if param == ope.name { 172 | return 173 | } 174 | } 175 | 176 | if r, ok := v.grammar[ope.name]; !ok { 177 | v.errorPos[ope.name] = ope.pos 178 | v.errorMsg[ope.name] = "'" + ope.name + "' is not defined." 179 | } else if r.Parameters != nil { 180 | if ope.args == nil || len(ope.args) != len(r.Parameters) { 181 | v.errorPos[ope.name] = ope.pos 182 | v.errorMsg[ope.name] = "incorrect number of arguments." 183 | } 184 | } else { 185 | if ope.args != nil { 186 | v.errorPos[ope.name] = ope.pos 187 | v.errorMsg[ope.name] = "'" + ope.name + "' is not macro." 188 | } 189 | } 190 | } 191 | func (v *referenceChecker) visitRule(ope *Rule) { ope.Ope.accept(v) } 192 | func (v *referenceChecker) visitWhitespace(ope *whitespace) { ope.ope.accept(v) } 193 | func (v *referenceChecker) visitExpression(ope *expression) { ope.atom.accept(v) } 194 | 195 | // linkReferences 196 | type linkReferences struct { 197 | *visitorBase 198 | parameters []string 199 | grammar map[string]*Rule 200 | } 201 | 202 | func (v *linkReferences) visitSequence(ope *sequence) { 203 | for _, o := range ope.opes { 204 | o.accept(v) 205 | } 206 | } 207 | func (v *linkReferences) visitPrioritizedChoice(ope *prioritizedChoice) { 208 | for _, o := range ope.opes { 209 | o.accept(v) 210 | } 211 | } 212 | func (v *linkReferences) visitZeroOrMore(ope *zeroOrMore) { ope.ope.accept(v) } 213 | func (v *linkReferences) visitOneOrMore(ope *oneOrMore) { ope.ope.accept(v) } 214 | func (v *linkReferences) visitOption(ope *option) { ope.ope.accept(v) } 215 | func (v *linkReferences) visitAndPredicate(ope *andPredicate) { ope.ope.accept(v) } 216 | func (v *linkReferences) visitNotPredicate(ope *notPredicate) { ope.ope.accept(v) } 217 | func (v *linkReferences) visitTokenBoundary(ope *tokenBoundary) { ope.ope.accept(v) } 218 | func (v *linkReferences) visitIgnore(ope *ignore) { ope.ope.accept(v) } 219 | func (v *linkReferences) visitReference(ope *reference) { 220 | if r, ok := v.grammar[ope.name]; ok { 221 | ope.rule = r 222 | } else { 223 | for i, param := range v.parameters { 224 | if param == ope.name { 225 | ope.iarg = i 226 | break 227 | } 228 | } 229 | } 230 | for _, arg := range ope.args { 231 | arg.accept(v) 232 | } 233 | } 234 | func (v *linkReferences) visitRule(ope *Rule) { ope.Ope.accept(v) } 235 | func (v *linkReferences) visitWhitespace(ope *whitespace) { ope.ope.accept(v) } 236 | func (v *linkReferences) visitExpression(ope *expression) { ope.atom.accept(v) } 237 | 238 | // findReference 239 | type findReference struct { 240 | *visitorBase 241 | args []operator 242 | params []string 243 | ope operator 244 | } 245 | 246 | func (v *findReference) visitSequence(ope *sequence) { 247 | var opes []operator 248 | for _, o := range ope.opes { 249 | o.accept(v) 250 | opes = append(opes, v.ope) 251 | } 252 | v.ope = SeqCore(opes) 253 | } 254 | func (v *findReference) visitPrioritizedChoice(ope *prioritizedChoice) { 255 | var opes []operator 256 | for _, o := range ope.opes { 257 | o.accept(v) 258 | opes = append(opes, v.ope) 259 | } 260 | v.ope = ChoCore(opes) 261 | } 262 | func (v *findReference) visitZeroOrMore(ope *zeroOrMore) { 263 | ope.ope.accept(v) 264 | v.ope = Zom(v.ope) 265 | } 266 | func (v *findReference) visitOneOrMore(ope *oneOrMore) { 267 | ope.ope.accept(v) 268 | v.ope = Oom(v.ope) 269 | } 270 | func (v *findReference) visitOption(ope *option) { 271 | ope.ope.accept(v) 272 | v.ope = Opt(v.ope) 273 | } 274 | func (v *findReference) visitAndPredicate(ope *andPredicate) { 275 | ope.ope.accept(v) 276 | v.ope = Apd(v.ope) 277 | } 278 | func (v *findReference) visitNotPredicate(ope *notPredicate) { 279 | ope.ope.accept(v) 280 | v.ope = Npd(v.ope) 281 | } 282 | func (v *findReference) visitLiteralString(ope *literalString) { 283 | v.ope = ope 284 | } 285 | func (v *findReference) visitCharacterClass(ope *characterClass) { 286 | v.ope = ope 287 | } 288 | func (v *findReference) visitAnyCharacter(ope *anyCharacter) { 289 | v.ope = ope 290 | } 291 | func (v *findReference) visitTokenBoundary(ope *tokenBoundary) { 292 | ope.ope.accept(v) 293 | v.ope = Tok(v.ope) 294 | } 295 | func (v *findReference) visitIgnore(ope *ignore) { 296 | ope.ope.accept(v) 297 | v.ope = Ign(v.ope) 298 | } 299 | func (v *findReference) visitUser(ope *user) { 300 | v.ope = ope 301 | } 302 | func (v *findReference) visitReference(ope *reference) { 303 | for i, arg := range v.args { 304 | name := v.params[i] 305 | if name == ope.name { 306 | v.ope = arg 307 | return 308 | } 309 | } 310 | v.ope = ope 311 | } 312 | func (v *findReference) visitWhitespace(ope *whitespace) { 313 | ope.ope.accept(v) 314 | v.ope = Wsp(v.ope) 315 | } 316 | func (v *findReference) visitExpression(ope *expression) { 317 | ope.atom.accept(v) 318 | v.ope = ope 319 | } 320 | --------------------------------------------------------------------------------