├── LICENSE ├── README.md ├── choice_test.go ├── cmb.go ├── cmb_test.go ├── go.mod ├── lambda_test.go ├── literal_test.go ├── optional_test.go ├── parselets.go ├── parser.go ├── parser_test.go ├── pattern_test.go ├── rule_test.go ├── sentence_test.go ├── sequence_test.go └── zeroOrMore_test.go /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Sean Wolcott 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # cmb 2 | 3 | A parser combinator library in Go 4 | 5 | This package allows you to build parsers with no code generation. 6 | Production rules are defined as function calls into the options of the parser constructor. 7 | 8 | Installation 9 | ------------ 10 | Install using ```go get``` (no dependecies required): 11 | ``` 12 | go get github.com/mcvoid/cmb 13 | ``` 14 | 15 | 16 | Example 17 | ------- 18 | ``` 19 | package main 20 | 21 | import ( 22 | "regexp" 23 | . "github.com/mcvoid/cmb" 24 | ) 25 | 26 | func main() { 27 | // a grammar for a simple config file format 28 | parser := Cmb( 29 | "options", 30 | Define("options", ZeroOrMore(Rule("option"))), 31 | Define("option", Sequence(Rule("name"), Literal("="), Rule("value"), Literal("\n"))), 32 | Define("name", Pattern(regexp.MustCompile("[a-z_]+"))), 33 | Define("value", Pattern(regexp.MustCompile(".+"))), 34 | Ignore(" \t"), 35 | ) 36 | 37 | parseTree := parser.Parse(`option_one = abcdef 38 | option_two = ghijkl 39 | `) 40 | } 41 | ``` 42 | 43 | Parselets Available 44 | ------------------- 45 | * Sequence - Recognize several patterns in a row (concatenation) 46 | * Choice - Recognize one of several possible patterns (alternation) 47 | * Optional - Recognize zero or one instances of a rule 48 | * ZeroOrMore - Recignize zero or more instances of a rule 49 | * Literal - Recognize a given string 50 | * Pattern - Recognize a given regular expression 51 | * Rule - Recogize a rule with a given name, including itself 52 | 53 | Parse Tree Format 54 | ----------------- 55 | ``` 56 | // ParseTreeNode is a single node of the result of a parse. 57 | type ParseTreeNode struct { 58 | // A user- and structure- defined type identifier of the node. 59 | // It will either be a production name or the name of a sub-rule of a production. 60 | NodeType string 61 | // The text which this structure represents. 62 | Text []byte 63 | // The starting position of the text in the string. 64 | Start int 65 | // The ending position of the text in the string. 66 | End int 67 | // The entire string being parsed. 68 | BaseString []byte 69 | // Any child nodes. 70 | Children []*ParseTreeNode 71 | } 72 | ``` 73 | 74 | License 75 | ------- 76 | MIT License, see [LICENSE](LICENSE) 77 | -------------------------------------------------------------------------------- /choice_test.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestChoice(t *testing.T) { 9 | parser := &Parser{} 10 | sourceString := []byte("abcdefghijkl") 11 | returnA := &ParseTreeNode{"a", []byte("abc"), 0, 3, sourceString, []*ParseTreeNode{}} 12 | returnB := &ParseTreeNode{"b", []byte("def"), 3, 6, sourceString, []*ParseTreeNode{}} 13 | returnC := &ParseTreeNode{"c", []byte("ghi"), 6, 9, sourceString, []*ParseTreeNode{}} 14 | errorA := fmt.Errorf("a") 15 | errorB := fmt.Errorf("b") 16 | errorC := fmt.Errorf("c") 17 | makeParselet := func(val *ParseTreeNode, err error) Parselet { 18 | return func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { return val, err } 19 | } 20 | 21 | parselet := Choice() 22 | actualVal, actualErr := parselet(sourceString, 3, parser) 23 | if actualVal != nil { 24 | t.Errorf("Expected val to be nil, got %v", actualVal) 25 | } 26 | if actualErr == nil { 27 | t.Errorf("Expected err to be non-nil") 28 | } 29 | 30 | parselet = Choice( 31 | makeParselet(nil, errorA), 32 | makeParselet(nil, errorB), 33 | makeParselet(nil, errorC), 34 | ) 35 | actualVal, actualErr = parselet(sourceString, 3, parser) 36 | if actualVal != nil { 37 | t.Errorf("Expected val to be nil, got %v", actualVal) 38 | } 39 | if actualErr == nil { 40 | t.Errorf("Expected err to be non-nil") 41 | } 42 | 43 | parselet = Choice( 44 | makeParselet(returnA, nil), 45 | makeParselet(nil, errorA), 46 | makeParselet(nil, errorB), 47 | makeParselet(nil, errorC), 48 | ) 49 | actualVal, actualErr = parselet(sourceString, 0, parser) 50 | if actualErr != nil { 51 | t.Errorf("Expected err to be nil, got %v", actualErr) 52 | } 53 | if string(actualVal.Text) != "abc" { 54 | t.Errorf("val.Text: expected '%v', got '%v'", "abc", string(actualVal.Text)) 55 | } 56 | 57 | parselet = Choice( 58 | makeParselet(nil, errorA), 59 | makeParselet(nil, errorB), 60 | makeParselet(nil, errorC), 61 | makeParselet(returnA, nil), 62 | ) 63 | actualVal, actualErr = parselet(sourceString, 0, parser) 64 | if actualErr != nil { 65 | t.Errorf("Expected err to be nil, got %v", actualErr) 66 | } 67 | if string(actualVal.Text) != "abc" { 68 | t.Errorf("val.Text: expected '%v', got '%v'", "abc", string(actualVal.Text)) 69 | } 70 | 71 | parselet = Choice( 72 | makeParselet(nil, errorA), 73 | makeParselet(returnA, nil), 74 | makeParselet(nil, errorB), 75 | makeParselet(nil, errorC), 76 | ) 77 | actualVal, actualErr = parselet(sourceString, 0, parser) 78 | if actualErr != nil { 79 | t.Errorf("Expected err to be nil, got %v", actualErr) 80 | } 81 | if string(actualVal.Text) != "abc" { 82 | t.Errorf("val.Text: expected '%v', got '%v'", "abc", string(actualVal.Text)) 83 | } 84 | 85 | parselet = Choice( 86 | makeParselet(nil, errorA), 87 | makeParselet(returnA, nil), 88 | makeParselet(nil, errorB), 89 | makeParselet(returnB, nil), 90 | makeParselet(nil, errorC), 91 | makeParselet(returnC, nil), 92 | ) 93 | actualVal, actualErr = parselet(sourceString, 0, parser) 94 | if actualErr != nil { 95 | t.Errorf("Expected err to be nil, got %v", actualErr) 96 | } 97 | if string(actualVal.Text) != "abc" { 98 | t.Errorf("val.Text: expected '%v', got '%v'", "abc", string(actualVal.Text)) 99 | } 100 | 101 | parselet = Choice( 102 | makeParselet(nil, errorA), 103 | makeParselet(returnB, nil), 104 | makeParselet(nil, errorB), 105 | makeParselet(returnA, nil), 106 | makeParselet(nil, errorC), 107 | makeParselet(returnC, nil), 108 | ) 109 | actualVal, actualErr = parselet(sourceString, 0, parser) 110 | if actualErr != nil { 111 | t.Errorf("Expected err to be nil, got %v", actualErr) 112 | } 113 | if string(actualVal.Text) != "def" { 114 | t.Errorf("val.Text: expected '%v', got '%v'", "def", string(actualVal.Text)) 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /cmb.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | // Production defines a production rule for a parser's grammar. 4 | type Production func(*Parser) 5 | 6 | // Cmb creates a new parser with grammar defined by parser combinators. 7 | func Cmb(startRule string, prods ...Production) *Parser { 8 | p := Parser{ 9 | rules: map[string]Parselet{}, 10 | startRule: startRule, 11 | ignore: "", 12 | } 13 | for _, prod := range prods { 14 | prod(&p) 15 | } 16 | return &p 17 | } 18 | 19 | // Define produces a production. 20 | func Define(name string, rule Parselet) Production { 21 | return func(p *Parser) { 22 | p.rules[name] = rule 23 | } 24 | } 25 | 26 | // Ignore is a set of characters that should be skipped. 27 | func Ignore(s string) Production { 28 | return func(p *Parser) { 29 | p.ignore = p.ignore + s 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /cmb_test.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import "testing" 4 | 5 | func TestDefine(t *testing.T) { 6 | p := Parser{ 7 | rules: map[string]Parselet{}, 8 | startRule: "name", 9 | ignore: "", 10 | } 11 | parselet := func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { return nil, nil } 12 | Define("a", parselet)(&p) 13 | _, ok := p.rules["a"] 14 | if !ok { 15 | t.Errorf("Added rule not found") 16 | } 17 | } 18 | 19 | func TestIgnore(t *testing.T) { 20 | p := Parser{ 21 | rules: map[string]Parselet{}, 22 | startRule: "name", 23 | ignore: "abc", 24 | } 25 | Ignore("def")(&p) 26 | if p.ignore != "abcdef" { 27 | t.Errorf("Ignore: expected '%v' got '%v'", "abcdef", p.ignore) 28 | } 29 | } 30 | 31 | func TestCmb(t *testing.T) { 32 | p := Cmb("abcd") 33 | if p.startRule != "abcd" { 34 | t.Errorf("startrule: expected '%v' got '%v'", "abcd", p.startRule) 35 | } 36 | 37 | parselet := func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { return nil, nil } 38 | p = Cmb( 39 | "", 40 | Ignore("a"), 41 | Define("b", parselet), 42 | ) 43 | if p.ignore != "a" { 44 | t.Errorf("ignore: expected '%v' got '%v'", "a", p.ignore) 45 | } 46 | _, ok := p.rules["b"] 47 | if !ok { 48 | t.Errorf("Added rule not found") 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /go.mod: -------------------------------------------------------------------------------- 1 | module github.com/mcvoid/cmb 2 | 3 | go 1.13 4 | -------------------------------------------------------------------------------- /lambda_test.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import ( 4 | "regexp" 5 | "testing" 6 | ) 7 | 8 | func TestLambdaBug1(t *testing.T) { 9 | parser := Cmb( 10 | "application", 11 | Define("atom", Pattern(regexp.MustCompile("[a-zA-Z_][a-zA-Z0-9_]*"))), 12 | Define("listItem", Choice( 13 | Rule("atom"), 14 | )), 15 | Define("application", Sequence( 16 | Rule("listItem"), 17 | ZeroOrMore(Rule("listItem")), 18 | )), 19 | ) 20 | tree, _ := parser.Parse("a") 21 | if len(tree.Children) != 2 { 22 | t.Errorf("expected 2 children") 23 | } 24 | _, second := tree.Children[0], tree.Children[1] 25 | if len(second.Children) != 0 { 26 | t.Errorf("expected rest to be 0") 27 | } 28 | } 29 | 30 | func TestLambdaBug2(t *testing.T) { 31 | parser := Cmb( 32 | "application", 33 | Ignore(" "), 34 | Define("application", Sequence(Literal("a"), Literal("b"), Literal("c"), Literal("d"))), 35 | ) 36 | parser.Parse("abc") 37 | } 38 | 39 | func TestLambdaBug3(t *testing.T) { 40 | parser := Cmb( 41 | "application", 42 | Define("application", Literal("abc")), 43 | ) 44 | parser.Parse("") 45 | } 46 | -------------------------------------------------------------------------------- /literal_test.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import ( 4 | "testing" 5 | ) 6 | 7 | func TestLiteral(t *testing.T) { 8 | parser := &Parser{ignore: ""} 9 | parselet := Literal("abc") 10 | actualValue, actualError := parselet([]byte("def"), 0, parser) 11 | if actualValue != nil { 12 | t.Errorf("Expected no match, got %v", actualValue) 13 | } 14 | if actualError == nil { 15 | t.Errorf("Expected non-nil error") 16 | } 17 | 18 | actualValue, actualError = parselet([]byte("abcdef"), 3, parser) 19 | if actualValue != nil { 20 | t.Errorf("Expected no match, got %v", actualValue) 21 | } 22 | if actualError == nil { 23 | t.Errorf("Expected non-nil error") 24 | } 25 | 26 | actualValue, actualError = parselet([]byte("abcdef"), 0, parser) 27 | if actualValue == nil { 28 | t.Errorf("Expected match, got nil") 29 | } 30 | if actualError != nil { 31 | t.Errorf("Expected nil error, go %v", actualError) 32 | } 33 | if actualValue.Start != 0 { 34 | t.Errorf("val.Start: expected %v go %v", 0, actualValue.Start) 35 | } 36 | if actualValue.End != 3 { 37 | t.Errorf("val.End: expected %v go %v", 3, actualValue.End) 38 | } 39 | 40 | actualValue, actualError = parselet([]byte("abcdef"), 3, parser) 41 | if actualValue != nil { 42 | t.Errorf("Expected no match, got %v", actualValue) 43 | } 44 | if actualError == nil { 45 | t.Errorf("Expected non-nil error") 46 | } 47 | 48 | actualValue, actualError = parselet([]byte("defabc"), 3, parser) 49 | if actualValue == nil { 50 | t.Errorf("Expected match, got nil") 51 | } 52 | if actualError != nil { 53 | t.Errorf("Expected nil error, go %v", actualError) 54 | } 55 | if actualValue.Start != 3 { 56 | t.Errorf("val.Start: expected %v go %v", 3, actualValue.Start) 57 | } 58 | if actualValue.End != 6 { 59 | t.Errorf("val.End: expected %v go %v", 6, actualValue.End) 60 | } 61 | } 62 | 63 | func TestLiteralWithIgnore(t *testing.T) { 64 | parser := &Parser{ignore: " \t\n"} 65 | parselet := Literal("abc") 66 | actualValue, actualError := parselet([]byte("def"), 0, parser) 67 | if actualValue != nil { 68 | t.Errorf("Expected no match, got %v", actualValue) 69 | } 70 | if actualError == nil { 71 | t.Errorf("Expected non-nil error") 72 | } 73 | 74 | actualValue, actualError = parselet([]byte("\t\t\tabc"), 0, parser) 75 | if actualValue == nil { 76 | t.Errorf("Expected match, got nil") 77 | } 78 | if actualError != nil { 79 | t.Errorf("Expected nil error, go %v", actualError) 80 | } 81 | if actualValue.Start != 0 { 82 | t.Errorf("val.Start: expected %v go %v", 3, actualValue.Start) 83 | } 84 | if actualValue.End != 6 { 85 | t.Errorf("val.End: expected %v go %v", 6, actualValue.End) 86 | } 87 | if string(actualValue.Text) != "abc" { 88 | t.Errorf("val.Text: expected %v go %v", "abc", string(actualValue.Text)) 89 | } 90 | } 91 | -------------------------------------------------------------------------------- /optional_test.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestOptional(t *testing.T) { 9 | parser := &Parser{} 10 | sourceString := []byte("abcdefghijkl") 11 | returnA := &ParseTreeNode{"a", []byte("abc"), 0, 3, sourceString, []*ParseTreeNode{}} 12 | errorA := fmt.Errorf("a") 13 | makeParselet := func(val *ParseTreeNode, err error) Parselet { 14 | return func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { return val, err } 15 | } 16 | 17 | parselet := Optional(makeParselet(returnA, nil)) 18 | actualVal, actualErr := parselet(sourceString, 3, parser) 19 | if actualErr != nil { 20 | t.Errorf("Expected err to be nil, got %v", actualErr) 21 | } 22 | if string(actualVal.Text) != "abc" { 23 | t.Errorf("val.Text: expected '%v' got '%v'", "abc", string(actualVal.Text)) 24 | } 25 | if actualVal.Children[0] != returnA { 26 | t.Errorf("val.Children[0]: expected '%v' got '%v'", returnA, actualVal.Children[0]) 27 | } 28 | 29 | parselet = Optional(makeParselet(nil, errorA)) 30 | actualVal, actualErr = parselet(sourceString, 3, parser) 31 | if actualErr != nil { 32 | t.Errorf("Expected err to be nil, got %v", actualErr) 33 | } 34 | if string(actualVal.Text) != "" { 35 | t.Errorf("val.Text: expected '%v' got '%v'", "", string(actualVal.Text)) 36 | } 37 | if actualVal.Start != 3 { 38 | t.Errorf("val.Start: expected '%v' got '%v'", 3, actualVal.Start) 39 | } 40 | if actualVal.End != 3 { 41 | t.Errorf("val.End: expected '%v' got '%v'", 3, actualVal.End) 42 | } 43 | if len(actualVal.Children) != 0 { 44 | t.Errorf("val.Children length: expected '%v' got '%v'", 0, len(actualVal.Children)) 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /parselets.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import ( 4 | "bytes" 5 | "fmt" 6 | "math" 7 | "regexp" 8 | ) 9 | 10 | // Parselet is a single combinable recognizer of grammatical structure. 11 | type Parselet func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) 12 | 13 | // Rule matches a named production rule. 14 | func Rule(name string) Parselet { 15 | return func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 16 | p, exists := parser.rules[name] 17 | if !exists { 18 | return nil, fmt.Errorf(`pos %d: rule "%s" not found`, pos, name) 19 | } 20 | 21 | memo, ok := parser.table[name][pos] 22 | if ok { 23 | return memo.val, memo.err 24 | } 25 | r, err := p(s, pos, parser) 26 | if err != nil { 27 | parser.table[name][pos] = &parseletResult{nil, err} 28 | return nil, err 29 | } 30 | val := &ParseTreeNode{name, r.Text, r.Start, r.End, s, r.Children} 31 | parser.table[name][pos] = &parseletResult{val, nil} 32 | return val, nil 33 | } 34 | } 35 | 36 | // Literal will match a substring in the parsed string. 37 | func Literal(strToFind string) Parselet { 38 | return func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 39 | cursor := ignore(s, pos, parser) 40 | strlen := len(strToFind) 41 | if pos+strlen > len(s) { 42 | return nil, fmt.Errorf("pos %d: Unexpected EOF", pos) 43 | } 44 | if !bytes.HasPrefix(s[cursor:], []byte(strToFind)) { 45 | return nil, fmt.Errorf("pos %d: Expected %s got %s", pos, strToFind, s[pos:pos+strlen]) 46 | } 47 | return &ParseTreeNode{"literal", s[cursor : cursor+strlen], pos, cursor + strlen, s, []*ParseTreeNode{}}, nil 48 | } 49 | } 50 | 51 | // Pattern will match a regex in the parsed string. 52 | func Pattern(regex *regexp.Regexp) Parselet { 53 | return func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 54 | cursor := ignore(s, pos, parser) 55 | bounds := regex.FindIndex(s[cursor:]) 56 | if bounds == nil || bounds[0] != 0 { 57 | maxBounds := int(math.Min(float64(pos+10), float64(len(s)))) 58 | return nil, fmt.Errorf("pos %d: expected number, got %s", pos, s[pos:maxBounds]) 59 | } 60 | start, end := bounds[0]+cursor, bounds[1]+cursor 61 | return &ParseTreeNode{"pattern", s[start:end], pos, end, s, []*ParseTreeNode{}}, nil 62 | } 63 | } 64 | 65 | // Sequence matches multiple rules one after another. 66 | func Sequence(items ...Parselet) Parselet { 67 | return func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 68 | cursor := pos 69 | children := []*ParseTreeNode{} 70 | for _, item := range items { 71 | r, err := item(s, cursor, parser) 72 | if err != nil { 73 | return nil, err 74 | } 75 | children = append(children, r) 76 | cursor = r.End 77 | } 78 | 79 | return &ParseTreeNode{"sequence", s[pos:cursor], pos, cursor, s, children}, nil 80 | } 81 | } 82 | 83 | // Choice matches several rules in the same place and returns whichever matches first. 84 | func Choice(items ...Parselet) Parselet { 85 | return func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 86 | for _, item := range items { 87 | r, err := item(s, pos, parser) 88 | if err == nil { 89 | return &ParseTreeNode{"choice", r.Text, r.Start, r.End, s, []*ParseTreeNode{r}}, nil 90 | } 91 | } 92 | 93 | return nil, fmt.Errorf("pos %d: none of the available options were valid", pos) 94 | } 95 | } 96 | 97 | // Optional matches a rule or the absence of the rule. 98 | func Optional(item Parselet) Parselet { 99 | return func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 100 | r, err := item(s, pos, parser) 101 | if err != nil { 102 | return &ParseTreeNode{"optional", []byte{}, pos, pos, s, []*ParseTreeNode{}}, nil 103 | } 104 | return &ParseTreeNode{"optional", r.Text, r.Start, r.End, s, []*ParseTreeNode{r}}, nil 105 | } 106 | } 107 | 108 | // ZeroOrMore matches a rule multiple times. 109 | func ZeroOrMore(item Parselet) Parselet { 110 | return func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 111 | children := []*ParseTreeNode{} 112 | cursor := pos 113 | for { 114 | r, err := item(s, cursor, parser) 115 | if err != nil { 116 | break 117 | } 118 | children = append(children, r) 119 | cursor = r.End 120 | } 121 | return &ParseTreeNode{"zeroOrMore", s[pos:cursor], pos, cursor, s, children}, nil 122 | } 123 | } 124 | -------------------------------------------------------------------------------- /parser.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import ( 4 | "sync" 5 | ) 6 | 7 | // ParseTreeNode is a single node of the result of a parse. 8 | type ParseTreeNode struct { 9 | // A user- and structure- defined type identifier of the node. 10 | // It will either be a production name or the name of a sub-rule of a production. 11 | NodeType string 12 | // The text which this structure represents. 13 | Text []byte 14 | // The starting position of the text in the string. 15 | Start int 16 | // The ending position of the text in the string. 17 | End int 18 | // The entire string being parsed. 19 | BaseString []byte 20 | // Any child nodes. 21 | Children []*ParseTreeNode 22 | } 23 | 24 | // Parser is a recognizer of grammar within a string. 25 | type Parser struct { 26 | rules map[string]Parselet 27 | startRule string 28 | ignore string 29 | table map[string]map[int]*parseletResult 30 | mux sync.Mutex 31 | } 32 | 33 | // Parse turns a string into a parse tree according to a grammar. 34 | func (p *Parser) Parse(s string) (*ParseTreeNode, error) { 35 | p.mux.Lock() 36 | p.table = map[string]map[int]*parseletResult{} 37 | for key := range p.rules { 38 | p.table[key] = map[int]*parseletResult{} 39 | } 40 | 41 | result, err := Rule(p.startRule)([]byte(s), 0, p) 42 | p.mux.Unlock() 43 | return result, err 44 | } 45 | 46 | type parseletResult struct { 47 | val *ParseTreeNode 48 | err error 49 | } 50 | 51 | func ignore(s []byte, pos int, parser *Parser) int { 52 | cursor := pos 53 | advanced := true 54 | for advanced { 55 | advanced = false 56 | for _, b := range []byte(parser.ignore) { 57 | if cursor >= len(s) { 58 | break 59 | } 60 | if s[cursor] == b { 61 | cursor++ 62 | advanced = true 63 | break 64 | } 65 | } 66 | } 67 | return cursor 68 | } 69 | -------------------------------------------------------------------------------- /parser_test.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestParserIgnore(t *testing.T) { 9 | sourceString := []byte("abcdef") 10 | parser := &Parser{ignore: " \t\n"} 11 | 12 | actual := ignore(sourceString, 0, parser) 13 | expected := 0 14 | if expected != actual { 15 | t.Errorf("Expected %v actual %v", expected, actual) 16 | } 17 | 18 | sourceString = []byte("\n\nabcdef") 19 | 20 | actual = ignore(sourceString, 0, parser) 21 | expected = 2 22 | if expected != actual { 23 | t.Errorf("Expected %v actual %v", expected, actual) 24 | } 25 | 26 | sourceString = []byte("\n\n \t\n abcdef") 27 | 28 | actual = ignore(sourceString, 0, parser) 29 | expected = 7 30 | if expected != actual { 31 | t.Errorf("Expected %v actual %v", expected, actual) 32 | } 33 | 34 | parser = &Parser{ignore: ""} 35 | 36 | actual = ignore(sourceString, 0, parser) 37 | expected = 0 38 | if expected != actual { 39 | t.Errorf("Expected %v actual %v", expected, actual) 40 | } 41 | } 42 | 43 | func TestParse(t *testing.T) { 44 | invoked := false 45 | returnA := &ParseTreeNode{"a", []byte("abc"), 0, 3, []byte("sourceString"), []*ParseTreeNode{}} 46 | errorA := fmt.Errorf("a") 47 | p := Parser{ 48 | ignore: "", 49 | startRule: "a", 50 | rules: map[string]Parselet{ 51 | "a": func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 52 | invoked = true 53 | return nil, errorA 54 | }, 55 | }, 56 | } 57 | actualVal, actualErr := p.Parse("abcdefghijkl") 58 | valErr := p.table["a"][0].err 59 | if valErr != errorA { 60 | t.Errorf("table: failed to memoize") 61 | } 62 | 63 | if !invoked { 64 | t.Errorf("invoked: expected parselet to be invoked") 65 | } 66 | if actualVal != nil { 67 | t.Errorf("val: expected %v got %v", nil, actualVal) 68 | } 69 | if actualErr != errorA { 70 | t.Errorf("err: expected %v got %v", errorA, actualErr) 71 | } 72 | 73 | p = Parser{ 74 | ignore: "", 75 | startRule: "a", 76 | rules: map[string]Parselet{ 77 | "a": func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 78 | invoked = true 79 | return returnA, nil 80 | }, 81 | }, 82 | } 83 | actualVal, actualErr = p.Parse("abcdefghijkl") 84 | val := p.table["a"][0].val 85 | if string(val.Text) != "abc" { 86 | t.Errorf("table: failed to memoize") 87 | } 88 | if actualVal != val { 89 | t.Errorf("Memoized and returned values inconsistent: %v, %v", val, actualVal) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /pattern_test.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import ( 4 | "regexp" 5 | "testing" 6 | ) 7 | 8 | func TestPattern(t *testing.T) { 9 | parser := &Parser{ignore: ""} 10 | parselet := Pattern(regexp.MustCompile("abc")) 11 | actualValue, actualError := parselet([]byte("def"), 0, parser) 12 | if actualValue != nil { 13 | t.Errorf("Expected no match, got %v", actualValue) 14 | } 15 | if actualError == nil { 16 | t.Errorf("Expected non-nil error") 17 | } 18 | 19 | actualValue, actualError = parselet([]byte("abcdef"), 3, parser) 20 | if actualValue != nil { 21 | t.Errorf("Expected no match, got %v", actualValue) 22 | } 23 | if actualError == nil { 24 | t.Errorf("Expected non-nil error") 25 | } 26 | 27 | actualValue, actualError = parselet([]byte("abcdef"), 0, parser) 28 | if actualValue == nil { 29 | t.Errorf("Expected match, got nil") 30 | } 31 | if actualError != nil { 32 | t.Errorf("Expected nil error, got %v", actualError) 33 | } 34 | if actualValue.Start != 0 { 35 | t.Errorf("val.Start: expected %v got %v", 0, actualValue.Start) 36 | } 37 | if actualValue.End != 3 { 38 | t.Errorf("val.End: expected %v got %v", 3, actualValue.End) 39 | } 40 | 41 | actualValue, actualError = parselet([]byte("abcdef"), 3, parser) 42 | if actualValue != nil { 43 | t.Errorf("Expected no match, got %v", actualValue) 44 | } 45 | if actualError == nil { 46 | t.Errorf("Expected non-nil error") 47 | } 48 | 49 | actualValue, actualError = parselet([]byte("defabc"), 3, parser) 50 | if actualValue == nil { 51 | t.Errorf("Expected match, got nil") 52 | } 53 | if actualError != nil { 54 | t.Errorf("Expected nil error, got %v", actualError) 55 | } 56 | if actualValue.Start != 3 { 57 | t.Errorf("val.Start: expected %v got %v", 3, actualValue.Start) 58 | } 59 | if actualValue.End != 6 { 60 | t.Errorf("val.End: expected %v got %v", 6, actualValue.End) 61 | } 62 | } 63 | 64 | func TestPatternWithIgnore(t *testing.T) { 65 | parser := &Parser{ignore: " \t\n"} 66 | parselet := Pattern(regexp.MustCompile("abc")) 67 | actualValue, actualError := parselet([]byte("def"), 0, parser) 68 | if actualValue != nil { 69 | t.Errorf("Expected no match, got %v", actualValue) 70 | } 71 | if actualError == nil { 72 | t.Errorf("Expected non-nil error") 73 | } 74 | 75 | actualValue, actualError = parselet([]byte("\t\t\tabc"), 0, parser) 76 | if actualValue == nil { 77 | t.Errorf("Expected match, got nil") 78 | } 79 | if actualError != nil { 80 | t.Errorf("Expected nil error, got %v", actualError) 81 | } 82 | if actualValue.Start != 0 { 83 | t.Errorf("val.Start: expected %v got %v", 3, actualValue.Start) 84 | } 85 | if actualValue.End != 6 { 86 | t.Errorf("val.End: expected %v got %v", 6, actualValue.End) 87 | } 88 | if string(actualValue.Text) != "abc" { 89 | t.Errorf("val.Text: expected %v got %v", "abc", string(actualValue.Text)) 90 | } 91 | } 92 | -------------------------------------------------------------------------------- /rule_test.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestRule(t *testing.T) { 9 | sourceString := []byte("abcdefg") 10 | parser := &Parser{ 11 | rules: map[string]Parselet{ 12 | "a": func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 13 | return &ParseTreeNode{"c", []byte("abc"), 5, 6, sourceString, []*ParseTreeNode{nil, nil}}, nil 14 | }, 15 | }, 16 | table: map[string]map[int]*parseletResult{ 17 | "a": map[int]*parseletResult{}, 18 | }, 19 | } 20 | 21 | parselet := Rule("a") 22 | actualValue, actualError := parselet(sourceString, 0, parser) 23 | expectedValue := ParseTreeNode{"a", []byte("abc"), 5, 6, sourceString, []*ParseTreeNode{nil, nil}} 24 | if actualValue.NodeType != expectedValue.NodeType { 25 | t.Errorf("Expected %v got %v", expectedValue.NodeType, actualValue.NodeType) 26 | } 27 | if string(actualValue.Text) != string(expectedValue.Text) { 28 | t.Errorf("Expected %v got %v", string(expectedValue.Text), string(actualValue.Text)) 29 | } 30 | if actualError != nil { 31 | t.Errorf("Expected %v got %v", nil, actualError) 32 | } 33 | if parser.table["a"][0].val != actualValue { 34 | t.Error("Expected result to be memoized") 35 | } 36 | if parser.table["a"][0].err != actualError { 37 | t.Error("Expected error to be memoized") 38 | } 39 | } 40 | 41 | func TestRuleDoesNotExist(t *testing.T) { 42 | parser := &Parser{ 43 | rules: map[string]Parselet{}, 44 | table: map[string]map[int]*parseletResult{ 45 | "a": map[int]*parseletResult{}, 46 | }, 47 | } 48 | parselet := Rule("a") 49 | actualValue, actualError := parselet([]byte("ghi"), 0, parser) 50 | if actualValue != nil { 51 | t.Errorf("Expected value to be nil, actual: %v", actualValue) 52 | } 53 | if actualError == nil { 54 | t.Errorf("Expected error to be non-nil") 55 | } 56 | } 57 | 58 | func TestRuleError(t *testing.T) { 59 | parser := &Parser{ 60 | rules: map[string]Parselet{}, 61 | table: map[string]map[int]*parseletResult{ 62 | "a": map[int]*parseletResult{}, 63 | }, 64 | } 65 | expectedError := fmt.Errorf("test") 66 | parser.rules["a"] = func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 67 | return nil, expectedError 68 | } 69 | parselet := Rule("a") 70 | actualValue, actualError := parselet([]byte("ghi"), 0, parser) 71 | if actualValue != nil { 72 | t.Errorf("Expected value to be nil, actual: %v", actualValue) 73 | } 74 | if actualError != expectedError { 75 | t.Errorf("Expected %v got %v", expectedError, actualError) 76 | } 77 | } 78 | 79 | func TestRuleMemoized(t *testing.T) { 80 | sourceString := []byte("sourceString") 81 | parser := &Parser{ 82 | rules: map[string]Parselet{ 83 | "a": func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 84 | return &ParseTreeNode{"c", []byte("abc"), 5, 6, sourceString, []*ParseTreeNode{nil, nil}}, nil 85 | }, 86 | }, 87 | table: map[string]map[int]*parseletResult{ 88 | "a": map[int]*parseletResult{ 89 | 0: &parseletResult{&ParseTreeNode{"c", []byte("abc"), 5, 6, sourceString, []*ParseTreeNode{nil, nil}}, nil}, 90 | }, 91 | }, 92 | } 93 | parselet := Rule("a") 94 | actualValue, actualError := parselet(sourceString, 0, parser) 95 | if actualValue != parser.table["a"][0].val { 96 | t.Errorf("Expected %v go %v", parser.table["a"][0].val, actualValue) 97 | } 98 | if actualError != parser.table["a"][0].err { 99 | t.Errorf("Expected %v go %v", parser.table["a"][0].err, actualError) 100 | } 101 | } 102 | -------------------------------------------------------------------------------- /sentence_test.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import "testing" 4 | 5 | func TestInvalidSentence(t *testing.T) { 6 | parser := Cmb( 7 | "sentence", 8 | Ignore(" "), 9 | Define("sentence", Sequence(Rule("subject"), Rule("verb"), Rule("object"))), 10 | Define("subject", Literal("Robots")), 11 | Define("verb", Literal("love")), 12 | Define("object", Literal("dogs")), 13 | ) 14 | tree, err := parser.Parse("Robots love dog") 15 | if err == nil { 16 | t.Errorf("Expected an error") 17 | } 18 | if tree != nil { 19 | t.Errorf("Expected no results") 20 | } 21 | } 22 | 23 | func TestSentence(t *testing.T) { 24 | parser := Cmb( 25 | "sentence", 26 | Ignore(" "), 27 | Define("sentence", Sequence(Rule("subject"), Rule("verb"), Rule("object"))), 28 | Define("subject", Literal("Robots")), 29 | Define("verb", Literal("love")), 30 | Define("object", Literal("dogs")), 31 | ) 32 | tree, err := parser.Parse("Robots love dogs") 33 | if err != nil { 34 | t.Errorf("Expected no errors") 35 | } 36 | validateSentence(tree, t) 37 | } 38 | 39 | func validateSentence(node *ParseTreeNode, t *testing.T) { 40 | if node.NodeType != "sentence" { 41 | t.Errorf("Expected %v, got %v", "sentence", node.NodeType) 42 | } 43 | validateSubject(node.Children[0], t) 44 | validateVerb(node.Children[1], t) 45 | validateObject(node.Children[2], t) 46 | } 47 | 48 | func validateSubject(node *ParseTreeNode, t *testing.T) { 49 | if node.NodeType != "subject" { 50 | t.Errorf("Expected %v, got %v", "subject", node.NodeType) 51 | } 52 | if string(node.Text) != "Robots" { 53 | t.Errorf("Expected %v, got %v", "Robots", node.Text) 54 | } 55 | } 56 | 57 | func validateVerb(node *ParseTreeNode, t *testing.T) { 58 | if node.NodeType != "verb" { 59 | t.Errorf("Expected %v, got %v", "subject", node.NodeType) 60 | } 61 | if string(node.Text) != "love" { 62 | t.Errorf("Expected %v, got %v", "love", node.Text) 63 | } 64 | } 65 | 66 | func validateObject(node *ParseTreeNode, t *testing.T) { 67 | if node.NodeType != "object" { 68 | t.Errorf("Expected %v, got %v", "subject", node.NodeType) 69 | } 70 | if string(node.Text) != "dogs" { 71 | t.Errorf("Expected %v, got %v", "dogs", node.Text) 72 | } 73 | } 74 | -------------------------------------------------------------------------------- /sequence_test.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestSequence(t *testing.T) { 9 | parser := &Parser{} 10 | sourceString := []byte("abcdefghijkl") 11 | returnA := &ParseTreeNode{"a", []byte("abc"), 0, 3, sourceString, []*ParseTreeNode{}} 12 | returnB := &ParseTreeNode{"b", []byte("def"), 3, 6, sourceString, []*ParseTreeNode{}} 13 | returnC := &ParseTreeNode{"c", []byte("ghi"), 6, 9, sourceString, []*ParseTreeNode{}} 14 | errorA := fmt.Errorf("a") 15 | makeParselet := func(val *ParseTreeNode, err error) Parselet { 16 | return func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { return val, err } 17 | } 18 | 19 | parselet := Sequence() 20 | actualVal, actualErr := parselet(sourceString, 3, parser) 21 | if actualErr != nil { 22 | t.Errorf("Expected err to be nil, got %v", actualErr) 23 | } 24 | if string(actualVal.Text) != "" { 25 | t.Errorf("val.Text expected '%v' got '%v'", "", string(actualVal.Text)) 26 | } 27 | if actualVal.Start != 3 { 28 | t.Errorf("val.Start expected '%v' got '%v'", 3, actualVal.Start) 29 | } 30 | if actualVal.End != 3 { 31 | t.Errorf("val.End expected '%v' got '%v'", 3, actualVal.End) 32 | } 33 | 34 | parselet = Sequence( 35 | makeParselet(returnA, nil), 36 | ) 37 | actualVal, actualErr = parselet(sourceString, 0, parser) 38 | 39 | if actualErr != nil { 40 | t.Errorf("Expected err to be nil, got %v", actualErr) 41 | } 42 | if string(actualVal.Text) != "abc" { 43 | t.Errorf("val.Text expected '%v' got '%v'", "abc", string(actualVal.Text)) 44 | } 45 | if actualVal.Start != 0 { 46 | t.Errorf("val.Start expected '%v' got '%v'", 0, actualVal.Start) 47 | } 48 | if actualVal.End != 3 { 49 | t.Errorf("val.End expected '%v' got '%v'", 3, actualVal.End) 50 | } 51 | if actualVal.Children[0] != returnA { 52 | t.Errorf("val.Children[0] expected '%v' got '%v'", returnA, actualVal.Children[0]) 53 | } 54 | 55 | parselet = Sequence( 56 | makeParselet(returnA, nil), 57 | makeParselet(returnB, nil), 58 | makeParselet(returnC, nil), 59 | ) 60 | actualVal, actualErr = parselet(sourceString, 0, parser) 61 | 62 | if actualErr != nil { 63 | t.Errorf("Expected err to be nil, got %v", actualErr) 64 | } 65 | if string(actualVal.Text) != "abcdefghi" { 66 | t.Errorf("val.Text expected '%v' got '%v'", "abc", string(actualVal.Text)) 67 | } 68 | if actualVal.Start != 0 { 69 | t.Errorf("val.Start expected '%v' got '%v'", 0, actualVal.Start) 70 | } 71 | if actualVal.End != 9 { 72 | t.Errorf("val.End expected '%v' got '%v'", 3, actualVal.End) 73 | } 74 | if actualVal.Children[0] != returnA { 75 | t.Errorf("val.Children[0] expected '%v' got '%v'", returnA, actualVal.Children[0]) 76 | } 77 | if actualVal.Children[1] != returnB { 78 | t.Errorf("val.Children[1] expected '%v' got '%v'", returnB, actualVal.Children[1]) 79 | } 80 | if actualVal.Children[2] != returnC { 81 | t.Errorf("val.Children[2] expected '%v' got '%v'", returnC, actualVal.Children[2]) 82 | } 83 | 84 | parselet = Sequence( 85 | makeParselet(returnA, nil), 86 | makeParselet(returnB, nil), 87 | makeParselet(returnC, nil), 88 | makeParselet(nil, errorA), 89 | ) 90 | actualVal, actualErr = parselet(sourceString, 0, parser) 91 | if actualVal != nil { 92 | t.Errorf("val: expected nil got %v", actualVal) 93 | } 94 | if actualErr != errorA { 95 | t.Errorf("err: expected %v got %v", errorA, actualErr) 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /zeroOrMore_test.go: -------------------------------------------------------------------------------- 1 | package cmb 2 | 3 | import ( 4 | "fmt" 5 | "testing" 6 | ) 7 | 8 | func TestZeroOrMore(t *testing.T) { 9 | parser := &Parser{} 10 | sourceString := []byte("abcdefghijklabcdefghijkl") 11 | makeParselet := func(times int) Parselet { 12 | count := 0 13 | return func(s []byte, pos int, parser *Parser) (*ParseTreeNode, error) { 14 | if count < times { 15 | count++ 16 | return &ParseTreeNode{"a", []byte("abc"), pos, pos + 3, sourceString, []*ParseTreeNode{}}, nil 17 | } 18 | return nil, fmt.Errorf("a") 19 | } 20 | } 21 | 22 | parselet := ZeroOrMore(makeParselet(0)) 23 | actualVal, actualErr := parselet(sourceString, 3, parser) 24 | if actualErr != nil { 25 | t.Errorf("err: expected nil, got %v", actualErr) 26 | } 27 | if string(actualVal.Text) != "" { 28 | t.Errorf("val.Text: expected '%v' got '%v'", "", string(actualVal.Text)) 29 | } 30 | if actualVal.Start != 3 { 31 | t.Errorf("val.Start: expected '%v' got '%v'", 3, actualVal.Start) 32 | } 33 | if actualVal.End != 3 { 34 | t.Errorf("val.End: expected '%v' got '%v'", 3, actualVal.End) 35 | } 36 | if len(actualVal.Children) != 0 { 37 | t.Errorf("val.End length: expected '%v' got '%v'", 0, len(actualVal.Children)) 38 | } 39 | 40 | parselet = ZeroOrMore(makeParselet(5)) 41 | actualVal, actualErr = parselet(sourceString, 3, parser) 42 | if actualErr != nil { 43 | t.Errorf("err: expected nil, got %v", actualErr) 44 | } 45 | if string(actualVal.Text) != "defghijklabcdef" { 46 | t.Errorf("val.Text: expected '%v' got '%v'", "defghijklabcdef", string(actualVal.Text)) 47 | } 48 | if actualVal.Start != 3 { 49 | t.Errorf("val.Start: expected '%v' got '%v'", 3, actualVal.Start) 50 | } 51 | if actualVal.End != 18 { 52 | t.Errorf("val.End: expected '%v' got '%v'", 18, actualVal.End) 53 | } 54 | if len(actualVal.Children) != 5 { 55 | t.Errorf("val.End length: expected '%v' got '%v'", 5, len(actualVal.Children)) 56 | } 57 | } 58 | --------------------------------------------------------------------------------