├── .editorconfig
├── .gitignore
├── LICENSE
├── README.md
├── bench
    ├── edit.go
    ├── java.go
    └── main.go
├── capture_test.go
├── charset
    ├── charset.go
    └── charset_test.go
├── cmd
    └── gpeg
    │   └── main.go
├── go.mod
├── gpeg_test.go
├── grammars
    ├── arith.peg
    ├── c.peg
    ├── java.peg
    ├── java_memo.peg
    ├── json.peg
    ├── json_memo.peg
    ├── lpeg.peg
    ├── peg.peg
    └── re.peg
├── incremental_test.go
├── input
    ├── input.go
    ├── input_test.go
    ├── linerope
    │   ├── .gitignore
    │   ├── LICENSE
    │   ├── line.go
    │   ├── rope.go
    │   ├── rope_test.go
    │   └── util.go
    ├── reader.go
    └── reader_test.go
├── isa
    ├── checker.go
    └── isa.go
├── memo
    ├── capture.go
    ├── edit.go
    ├── entry.go
    ├── interval
    │   ├── interval_test.go
    │   ├── lazy
    │   │   ├── LICENSE-AVL
    │   │   ├── array.go
    │   │   ├── interval.go
    │   │   ├── interval_test.go
    │   │   └── tree.go
    │   ├── lazylog
    │   │   ├── interval.go
    │   │   └── tree.go
    │   └── map.go
    ├── none.go
    ├── table.go
    └── tree.go
├── pattern
    ├── compile.go
    ├── nodes.go
    ├── optimize.go
    ├── pattern.go
    └── string.go
├── re
    ├── grammar.go
    └── re.go
├── re_test.go
├── recover_test.go
├── rxconv
    ├── rxconv.go
    └── rxconv_test.go
├── testdata
    ├── ScriptRuntime.java
    ├── bible.txt
    ├── test.java
    └── test.json
└── vm
    ├── code.go
    ├── code_test.go
    ├── op.go
    ├── stack.go
    └── vm.go


/.editorconfig:
--------------------------------------------------------------------------------
1 | # See http://editorconfig.org
2 | 
3 | # In Go files we indent with tabs but still 
4 | # set indent_size to control the GitHub web viewer.  
5 | [*.go]
6 | indent_size=4
7 | 
8 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | *.out
 2 | *.test
 3 | main/
 4 | todo.txt
 5 | capt.txt
 6 | bench/bench
 7 | gpeg
 8 | !cmd/gpeg
 9 | *.pdf
10 | *.java
11 | /flare/flare
12 | *.svg
13 | *.dat
14 | *.so
15 | /benchmarks/apply
16 | /benchmarks/apply_gpeg
17 | /benchmarks/fullparse
18 | /benchmarks/reparse
19 | /bench/bench
20 | testdata/*
21 | !testdata/ScriptRuntime.java
22 | !testdata/bible.txt
23 | !testdata/test.java
24 | !testdata/test.json
25 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020: Zachary Yedidia.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GPeg
 2 | 
 3 | [![Documentation](https://godoc.org/github.com/zyedidia/gpeg?status.svg)](http://godoc.org/github.com/zyedidia/gpeg)
 4 | [![Go Report Card](https://goreportcard.com/badge/github.com/zyedidia/gpeg)](https://goreportcard.com/report/github.com/zyedidia/gpeg)
 5 | [![MIT License](https://img.shields.io/badge/license-MIT-blue.svg)](https://github.com/zyedidia/gpeg/blob/master/LICENSE)
 6 | 
 7 | GPeg is a tool for working with parsing expression grammars (PEGs). It is
 8 | built with three primary goals in mind:
 9 | 
10 | * Efficient parsing for two use-cases.
11 |     * Language grammars with AST construction (where PEGs serve as a CFG
12 |       alternative)
13 |     * Patterns (where PEGs serve as a regex alternative).
14 | * Incremental parsing.
15 | * Support for dynamically loading grammars (meaning parsers can be generated
16 |   and used at runtime).
17 | 
18 | GPeg uses the same general parsing techniques as Lua's LPeg library and is
19 | heavily inspired by LPeg.
20 | 
21 | # Features
22 | 
23 | * Fast incremental parsing.
24 | * Parsing virtual machine (parsers can be dynamically generated).
25 | * Pattern compiler with optimizations.
26 | * Support for the original PEG syntax with some extensions.
27 | * Parse more complex string data structures (via ReaderAt interface).
28 | * Support for back-references (context-sensitivity).
29 | * Can convert most Go regular expressions to PEGs (see the `rxconv` package).
30 | * Basic error recovery.
31 | * Syntax highlighting library ([zyedidia/flare](https://github.com/zyedidia/flare)).
32 | * Tools for visualizing grammars, ASTs, and memo tables ([zyedidia/gpeg-extra](https://github.com/zyedidia/gpeg-extra)).
33 | 
34 | # Publications
35 | 
36 | * Zachary Yedidia and Stephen Chong. "Fast Incremental PEG Parsing." Proceedings of the 14th ACM SIGPLAN International Conference on Software Language Engineering (SLE), October 2021. [Link](https://zyedidia.github.io/preprints/gpeg_sle21.pdf).
37 | * Zachary Yedidia. "Incremental PEG Parsing." Bachelor's thesis. [Link](https://zyedidia.github.io/notes/yedidia_thesis.pdf).
38 | 
39 | # Related work
40 | 
41 | * Ford, Bryan. "Parsing expression grammars: a recognition-based syntactic foundation." Proceedings of the 31st ACM SIGPLAN-SIGACT symposium on Principles of programming languages. 2004. [Link](https://bford.info/pub/lang/peg.pdf).
42 | * [LPeg](http://www.inf.puc-rio.br/~roberto/lpeg/).
43 |     * Ierusalimschy, Roberto. "A text pattern‐matching tool based on Parsing
44 |       Expression Grammars." Software: Practice and Experience 39.3 (2009):
45 |       221-258. [Link](http://www.inf.puc-rio.br/~roberto/docs/peg.pdf).
46 |     * Medeiros, Sérgio, and Fabio Mascarenhas. "Syntax error recovery in
47 |       parsing expression grammars." Proceedings of the 33rd Annual ACM
48 |       Symposium on Applied Computing. 2018.
49 |       [Link](https://arxiv.org/pdf/1806.11150.pdf).
50 |     * Medeiros, Sérgio, Fabio Mascarenhas, and Roberto Ierusalimschy. "Left
51 |       recursion in parsing expression grammars." Science of Computer
52 |       Programming 96 (2014): 177-190.
53 |       [Link](https://arxiv.org/pdf/1207.0443.pdf).
54 | * [NPeg](https://github.com/zevv/npeg).
55 | * [Papa Carlo](https://lakhin.com/projects/papa-carlo/).
56 | * Dubroy, Patrick, and Alessandro Warth. "Incremental packrat parsing."
57 |   Proceedings of the 10th ACM SIGPLAN International Conference on Software
58 |   Language Engineering. 2017.
59 |   [Link](https://ohmlang.github.io/pubs/sle2017/incremental-packrat-parsing.pdf).
60 | * Marcelo Oikawa, Roberto Ierusalimschy, Ana Lucia de Moura. "Converting regexes to Parsing Expression Grammars." [Link](http://www.inf.puc-rio.br/~roberto/docs/ry10-01.pdf).
61 | * [Tree Sitter](https://tree-sitter.github.io/tree-sitter/).
62 | 
63 | 


--------------------------------------------------------------------------------
/bench/edit.go:
--------------------------------------------------------------------------------
  1 | package bench
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | 
  6 | 	"github.com/zyedidia/gpeg/input/linerope"
  7 | 	"github.com/zyedidia/gpeg/memo"
  8 | 	p "github.com/zyedidia/gpeg/pattern"
  9 | 	"github.com/zyedidia/gpeg/vm"
 10 | )
 11 | 
 12 | type Edit struct {
 13 | 	Start, End int
 14 | 	Text       []byte
 15 | }
 16 | 
 17 | func EditToEdits(e Edit) []Edit {
 18 | 	var edits []Edit
 19 | 
 20 | 	for i := e.Start; i < e.End; i++ {
 21 | 		edits = append(edits, Edit{
 22 | 			Start: e.Start,
 23 | 			End:   e.Start + 1,
 24 | 			Text:  nil,
 25 | 		})
 26 | 	}
 27 | 
 28 | 	for i := 0; i < len(e.Text); i++ {
 29 | 		edits = append(edits, Edit{
 30 | 			Start: e.Start + i,
 31 | 			End:   e.Start + i,
 32 | 			Text:  []byte{e.Text[i]},
 33 | 		})
 34 | 	}
 35 | 
 36 | 	return edits
 37 | }
 38 | 
 39 | func ToSingleEdits(edits []Edit) []Edit {
 40 | 	single := make([]Edit, 0)
 41 | 
 42 | 	for _, e := range edits {
 43 | 		single = append(single, EditToEdits(e)...)
 44 | 	}
 45 | 
 46 | 	return single
 47 | }
 48 | 
 49 | // strategies for generating edits to a Java file:
 50 | // * insert newline at start of line
 51 | // * change contents of comment
 52 | // * delete single-line comment
 53 | // * change function name
 54 | // * change function qualifier (e.g., from 'private' to 'public')
 55 | // * change contents of string
 56 | 
 57 | type EditType int
 58 | 
 59 | const (
 60 | 	EditInsertNewline EditType = iota
 61 | 	EditRemoveNewline
 62 | 	EditWhitespace
 63 | 	EditChangeComment
 64 | 	EditRemoveComment
 65 | 	EditChangeFunc
 66 | 	EditChangeFuncQual
 67 | 	EditChangeString
 68 | )
 69 | 
 70 | var editTypes = []EditType{
 71 | 	EditInsertNewline,
 72 | 	EditRemoveNewline,
 73 | 	EditChangeComment,
 74 | 	EditRemoveComment,
 75 | 	EditChangeFunc,
 76 | 	EditChangeFuncQual,
 77 | 	EditChangeString,
 78 | }
 79 | 
 80 | func GenerateEdits(data []byte, nedits int) []Edit {
 81 | 	r := linerope.New(data)
 82 | 	edits := make([]Edit, 0, nedits)
 83 | 
 84 | 	prog := p.MustCompile(grammar)
 85 | 	java := vm.Encode(prog)
 86 | 	tbl := memo.NewTreeTable(512)
 87 | 
 88 | 	for i := 0; i < nedits; {
 89 | 		_, _, ast, _ := java.Exec(r, tbl)
 90 | 
 91 | 		var e Edit
 92 | 		typ := editTypes[rand.Intn(len(editTypes))]
 93 | 
 94 | 		switch typ {
 95 | 		case EditInsertNewline:
 96 | 			line := rand.Intn(r.NumLines())
 97 | 			off := r.OffsetAt(line, 0)
 98 | 			e = Edit{
 99 | 				Start: off,
100 | 				End:   off,
101 | 				Text:  []byte{'\n'},
102 | 			}
103 | 		case EditRemoveNewline:
104 | 			candidates := make([]*memo.Capture, 0)
105 | 			it := ast.ChildIterator(0)
106 | 			for ch := it(); ch != nil; ch = it() {
107 | 				if ch.Id() == capNewline {
108 | 					candidates = append(candidates, ch)
109 | 				}
110 | 			}
111 | 			if len(candidates) == 0 {
112 | 				continue
113 | 			}
114 | 			ch := candidates[rand.Intn(len(candidates))]
115 | 			e = Edit{
116 | 				Start: ch.Start(),
117 | 				End:   ch.Start() + ch.Len(),
118 | 				Text:  nil,
119 | 			}
120 | 		case EditRemoveComment:
121 | 			candidates := make([]*memo.Capture, 0)
122 | 			it := ast.ChildIterator(0)
123 | 			for ch := it(); ch != nil; ch = it() {
124 | 				if ch.Id() == capLineComment {
125 | 					candidates = append(candidates, ch)
126 | 				}
127 | 			}
128 | 			if len(candidates) == 0 {
129 | 				continue
130 | 			}
131 | 			ch := candidates[rand.Intn(len(candidates))]
132 | 			line, _ := r.LineColAt(ch.Start())
133 | 			e = Edit{
134 | 				Start: ch.Start(),
135 | 				End:   r.OffsetAt(line+1, 0),
136 | 				Text:  nil,
137 | 			}
138 | 		case EditChangeFunc:
139 | 			candidates := make([]*memo.Capture, 0)
140 | 			it := ast.ChildIterator(0)
141 | 			for ch := it(); ch != nil; ch = it() {
142 | 				if ch.Id() == capFuncName {
143 | 					candidates = append(candidates, ch)
144 | 				}
145 | 			}
146 | 			if len(candidates) == 0 {
147 | 				continue
148 | 			}
149 | 			ch := candidates[rand.Intn(len(candidates))]
150 | 			e = Edit{
151 | 				Start: ch.Start(),
152 | 				End:   ch.Start() + ch.Len(),
153 | 				Text:  randID(rand.Intn(5) + 4),
154 | 			}
155 | 		case EditChangeFuncQual:
156 | 			candidates := make([]*memo.Capture, 0)
157 | 			it := ast.ChildIterator(0)
158 | 			for ch := it(); ch != nil; ch = it() {
159 | 				if ch.Id() == capFuncQual {
160 | 					candidates = append(candidates, ch)
161 | 				}
162 | 			}
163 | 			if len(candidates) == 0 {
164 | 				continue
165 | 			}
166 | 			ch := candidates[rand.Intn(len(candidates))]
167 | 			modifiers := []string{
168 | 				"protected",
169 | 				"public",
170 | 				"private",
171 | 			}
172 | 			e = Edit{
173 | 				Start: ch.Start(),
174 | 				End:   ch.Start() + ch.Len(),
175 | 				Text:  []byte(modifiers[rand.Intn(len(modifiers))]),
176 | 			}
177 | 		default:
178 | 			continue
179 | 		}
180 | 
181 | 		r.Remove(e.Start, e.End)
182 | 		r.Insert(e.Start, e.Text)
183 | 		tbl.ApplyEdit(memo.Edit{
184 | 			Start: e.Start,
185 | 			End:   e.End,
186 | 			Len:   len(e.Text),
187 | 		})
188 | 
189 | 		edits = append(edits, e)
190 | 		i++
191 | 	}
192 | 
193 | 	// r.WriteTo(os.Stdout)
194 | 
195 | 	return edits
196 | }
197 | 
198 | var rbytes = []byte("abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
199 | 
200 | func randID(n int) []byte {
201 | 	id := make([]byte, n)
202 | 	for i := range id {
203 | 		id[i] = rbytes[rand.Intn(len(rbytes))]
204 | 	}
205 | 	return id
206 | }
207 | 


--------------------------------------------------------------------------------
/bench/java.go:
--------------------------------------------------------------------------------
  1 | package bench
  2 | 
  3 | import (
  4 | 	"github.com/zyedidia/gpeg/charset"
  5 | 	"github.com/zyedidia/gpeg/isa"
  6 | 	p "github.com/zyedidia/gpeg/pattern"
  7 | )
  8 | 
  9 | var (
 10 | 	alpha = p.Set(charset.Range('A', 'Z').Add(charset.Range('a', 'z')))
 11 | 	alnum = p.Set(charset.Range('A', 'Z').Add(charset.Range('a', 'z')).Add(charset.Range('0', '9')))
 12 | 
 13 | 	word = p.Concat(
 14 | 		p.Or(alpha, p.Literal("_")),
 15 | 		p.Star(p.Or(alnum, p.Literal("_"))),
 16 | 	)
 17 | )
 18 | 
 19 | func BlockPatt(start, end string, escape p.Pattern) p.Pattern {
 20 | 	if escape != nil {
 21 | 		return p.Concat(
 22 | 			p.Literal(start),
 23 | 			p.Star(
 24 | 				p.Or(
 25 | 					escape,
 26 | 					p.Concat(
 27 | 						p.Not(p.Literal(end)),
 28 | 						p.Any(1),
 29 | 					),
 30 | 				),
 31 | 			),
 32 | 			p.Literal(end),
 33 | 		)
 34 | 	}
 35 | 
 36 | 	return p.Concat(
 37 | 		p.Literal(start),
 38 | 		p.Star(p.Concat(
 39 | 			p.Not(p.Literal(end)),
 40 | 			p.Any(1),
 41 | 		)),
 42 | 		p.Literal(end),
 43 | 	)
 44 | }
 45 | 
 46 | func WordMatch(words ...string) p.Pattern {
 47 | 	m := make(map[string]struct{})
 48 | 
 49 | 	for _, w := range words {
 50 | 		m[w] = struct{}{}
 51 | 	}
 52 | 
 53 | 	return p.Check(word, isa.MapChecker(m))
 54 | }
 55 | 
 56 | // mini java grammar for picking out important pieces
 57 | 
 58 | var grammar = p.Grammar("S", map[string]p.Pattern{
 59 | 	"S": p.Star(p.Memo(p.Or(
 60 | 		p.NonTerm("Token"),
 61 | 		p.Concat(
 62 | 			p.Any(1),
 63 | 			p.Star(p.Concat(
 64 | 				p.Not(p.NonTerm("Token")),
 65 | 				p.Any(1),
 66 | 			)),
 67 | 		),
 68 | 	))),
 69 | 	"Token": p.Or(
 70 | 		p.NonTerm("Comment"),
 71 | 		p.NonTerm("FuncQual"),
 72 | 		p.NonTerm("FuncName"),
 73 | 		p.NonTerm("String"),
 74 | 		p.NonTerm("Newline"),
 75 | 	),
 76 | 	"Comment":     p.Or(p.NonTerm("LineComment"), p.NonTerm("LongComment")),
 77 | 	"LineComment": p.Cap(BlockPatt("//", "\n", nil), capLineComment),
 78 | 	"LongComment": BlockPatt("/*", "*/", nil),
 79 | 
 80 | 	"FuncQual": p.Cap(WordMatch("public", "protected", "private"), capFuncQual),
 81 | 
 82 | 	"FuncName": p.Concat(
 83 | 		p.Cap(p.NonTerm("Identifier"), capFuncName),
 84 | 		p.Literal("("),
 85 | 	),
 86 | 	"Identifier": word,
 87 | 
 88 | 	"String": p.Cap(
 89 | 		BlockPatt("\"", "\"", p.NonTerm("Escape")),
 90 | 		capString,
 91 | 	),
 92 | 	"Escape": p.Concat(
 93 | 		p.Literal("\\"),
 94 | 		p.Set(charset.New([]byte{'\'', '"', 't', 'n', 'b', 'f', 'r', '\\'})),
 95 | 	),
 96 | 
 97 | 	"Newline": p.Cap(p.Literal("\n"), capNewline),
 98 | })
 99 | 
100 | const (
101 | 	capLineComment = iota
102 | 	capFuncName
103 | 	capFuncQual
104 | 	capString
105 | 	capNewline
106 | )
107 | 


--------------------------------------------------------------------------------
/bench/main.go:
--------------------------------------------------------------------------------
 1 | // +build ignore
 2 | 
 3 | package main
 4 | 
 5 | import (
 6 | 	"flag"
 7 | 	"fmt"
 8 | 	"io/ioutil"
 9 | 	"log"
10 | 	"strconv"
11 | 
12 | 	"github.com/zyedidia/gpeg/bench"
13 | )
14 | 
15 | func main() {
16 | 	flag.Parse()
17 | 
18 | 	data, err := ioutil.ReadFile(flag.Args()[0])
19 | 	if err != nil {
20 | 		log.Fatal(err)
21 | 	}
22 | 
23 | 	edits := bench.GenerateEdits(data, 100)
24 | 
25 | 	for _, e := range edits {
26 | 		fmt.Printf("(%d, %d): %s\n", e.Start, e.End, strconv.Quote(string(e.Text)))
27 | 	}
28 | }
29 | 


--------------------------------------------------------------------------------
/capture_test.go:
--------------------------------------------------------------------------------
 1 | package gpeg
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/zyedidia/gpeg/charset"
 8 | 	"github.com/zyedidia/gpeg/memo"
 9 | 	. "github.com/zyedidia/gpeg/pattern"
10 | 	"github.com/zyedidia/gpeg/vm"
11 | )
12 | 
13 | func TestCaptures(t *testing.T) {
14 | 	const (
15 | 		digit = iota
16 | 		num
17 | 	)
18 | 
19 | 	p := Star(Memo(Concat(
20 | 		Cap(Plus(
21 | 			Cap(Set(charset.Range('0', '9')), digit),
22 | 		), num),
23 | 		Optional(Literal(" ")),
24 | 	)))
25 | 	code := vm.Encode(MustCompile(p))
26 | 	r := strings.NewReader("12 34 56 78 9")
27 | 	_, _, ast, _ := code.Exec(r, memo.NoneTable{})
28 | 
29 | 	expect := [][2]int{
30 | 		{0, 2},
31 | 		{3, 2},
32 | 		{6, 2},
33 | 		{9, 2},
34 | 		{12, 1},
35 | 	}
36 | 
37 | 	it := ast.ChildIterator(0)
38 | 	i := 0
39 | 	for ch := it(); ch != nil; ch = it() {
40 | 		if expect[i][0] != ch.Start() || expect[i][1] != ch.Len() {
41 | 			t.Fatal(ch.Start(), ch.Len())
42 | 		}
43 | 		i++
44 | 	}
45 | }
46 | 


--------------------------------------------------------------------------------
/charset/charset.go:
--------------------------------------------------------------------------------
  1 | // Package charset provides data types and functions for managing sets of
  2 | // characters.
  3 | package charset
  4 | 
  5 | import (
  6 | 	"math/bits"
  7 | 	"strconv"
  8 | )
  9 | 
 10 | const log2WordSize = 6
 11 | const wordSize = 64
 12 | 
 13 | // A Set represents a set of chars.
 14 | type Set struct {
 15 | 	// Bits is the bit array for indicating which chars are in the set.
 16 | 	// We have 256 bits because a char can have 256 different values.
 17 | 	Bits [4]uint64
 18 | }
 19 | 
 20 | // A SmallSet is the same as a Set but can only represent 128 possible chars.
 21 | // This is an optimization, since in the common case, only ASCII bytes are
 22 | // used which are <128. The full Set is only necessary when unicode control
 23 | // characters must be matched.
 24 | type SmallSet struct {
 25 | 	Bits [2]uint64
 26 | }
 27 | 
 28 | // Size returns the number of chars matched by this Set.
 29 | func (c SmallSet) Size() int {
 30 | 	return bits.OnesCount64(c.Bits[0]) + bits.OnesCount64(c.Bits[1])
 31 | }
 32 | 
 33 | // Has checks if a charset accepts a character.
 34 | // Pointer receiver is for performance.
 35 | func (c *SmallSet) Has(r byte) bool {
 36 | 	return c.Bits[r>>log2WordSize]&(uint64(1)<<(r&(wordSize-1))) != 0
 37 | }
 38 | 
 39 | // IsSmall returns true if this set can be converted to a small set. In other
 40 | // words, if this set only matches bytes <128.
 41 | func (c Set) IsSmall() bool {
 42 | 	return c.Bits[2] == 0 && c.Bits[3] == 0
 43 | }
 44 | 
 45 | // SmallSet converts this Set to a SmallSet.
 46 | func (c Set) SmallSet() SmallSet {
 47 | 	return SmallSet{
 48 | 		Bits: [2]uint64{c.Bits[0], c.Bits[1]},
 49 | 	}
 50 | }
 51 | 
 52 | // New returns a charset which accepts all chars in 'chars'. Note
 53 | // that all chars must be valid ASCII characters (<128).
 54 | func New(chars []byte) Set {
 55 | 	var set Set
 56 | 	for _, r := range chars {
 57 | 		switch {
 58 | 		case r < 64:
 59 | 			bit := uint64(1) << r
 60 | 			set.Bits[0] |= bit
 61 | 		case r < 128:
 62 | 			bit := uint64(1) << (r - 64)
 63 | 			set.Bits[1] |= bit
 64 | 		case r < 192:
 65 | 			bit := uint64(1) << (r - 128)
 66 | 			set.Bits[2] |= bit
 67 | 		default:
 68 | 			bit := uint64(1) << (r - 192)
 69 | 			set.Bits[3] |= bit
 70 | 		}
 71 | 	}
 72 | 
 73 | 	return set
 74 | }
 75 | 
 76 | // CharsetRange returns a charset matching all characters between `low` and
 77 | // `high` inclusive.
 78 | func Range(low, high byte) Set {
 79 | 	var set Set
 80 | 	for c := int(low); c <= int(high); c++ {
 81 | 		switch {
 82 | 		case c < 64:
 83 | 			bit := uint64(1) << c
 84 | 			set.Bits[0] |= bit
 85 | 		case c < 128:
 86 | 			bit := uint64(1) << (c - 64)
 87 | 			set.Bits[1] |= bit
 88 | 		case c < 192:
 89 | 			bit := uint64(1) << (c - 128)
 90 | 			set.Bits[2] |= bit
 91 | 		default:
 92 | 			bit := uint64(1) << (c - 192)
 93 | 			set.Bits[3] |= bit
 94 | 		}
 95 | 	}
 96 | 
 97 | 	return set
 98 | }
 99 | 
100 | // Complement returns a charset that matches all characters except for those
101 | // matched by `c`.
102 | func (c Set) Complement() Set {
103 | 	return Set{
104 | 		Bits: [4]uint64{^c.Bits[0], ^c.Bits[1], ^c.Bits[2], ^c.Bits[3]},
105 | 	}
106 | }
107 | 
108 | // Add combines the characters two charsets match together.
109 | func (c Set) Add(c1 Set) Set {
110 | 	return Set{
111 | 		Bits: [4]uint64{c1.Bits[0] | c.Bits[0], c1.Bits[1] | c.Bits[1], c1.Bits[2] | c.Bits[2], c1.Bits[3] | c.Bits[3]},
112 | 	}
113 | }
114 | 
115 | // Sub removes from 'c' any characters in 'c1'.
116 | func (c Set) Sub(c1 Set) Set {
117 | 	return Set{
118 | 		Bits: [4]uint64{^c1.Bits[0] & c.Bits[0], ^c1.Bits[1] & c.Bits[1], ^c1.Bits[2] & c.Bits[2], ^c1.Bits[3] & c.Bits[3]},
119 | 	}
120 | }
121 | 
122 | // Size returns the number of chars matched by this Set.
123 | func (c Set) Size() int {
124 | 	return bits.OnesCount64(c.Bits[0]) + bits.OnesCount64(c.Bits[1]) + bits.OnesCount64(c.Bits[2]) + bits.OnesCount64(c.Bits[3])
125 | }
126 | 
127 | // Has checks if a charset accepts a character.
128 | // Pointer receiver is for performance.
129 | func (c *Set) Has(r byte) bool {
130 | 	return c.Bits[r>>log2WordSize]&(uint64(1)<<(r&(wordSize-1))) != 0
131 | }
132 | 
133 | // String returns the string representation of the charset.
134 | func (c Set) String() string {
135 | 	s := ""
136 | 	inRange := false
137 | 	for b := int(0); b <= 255; b++ {
138 | 		if c.Has(byte(b)) && b == 255 {
139 | 			s += strconv.QuoteRuneToASCII(rune(b))
140 | 		} else if c.Has(byte(b)) && !inRange {
141 | 			inRange = true
142 | 			if c.Has(byte(b + 1)) {
143 | 				s += strconv.QuoteRuneToASCII(rune(b)) + ".."
144 | 			}
145 | 		} else if !c.Has(byte(b)) && inRange {
146 | 			inRange = false
147 | 			s += strconv.QuoteRuneToASCII(rune(b-1)) + ","
148 | 		}
149 | 	}
150 | 	if s != "" && s[len(s)-1] == ',' {
151 | 		s = s[:len(s)-1]
152 | 	}
153 | 	s = "{" + s + "}"
154 | 	return s
155 | }
156 | 


--------------------------------------------------------------------------------
/charset/charset_test.go:
--------------------------------------------------------------------------------
 1 | package charset_test
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/zyedidia/gpeg/charset"
 7 | )
 8 | 
 9 | func inSet(set charset.Set, in, notin []byte, t *testing.T) {
10 | 	for _, r := range in {
11 | 		if !set.Has(r) {
12 | 			t.Errorf("Error: %c returned 'not in set'", r)
13 | 		}
14 | 	}
15 | 
16 | 	for _, r := range notin {
17 | 		if set.Has(r) {
18 | 			t.Errorf("Error: %c returned 'in set'", r)
19 | 		}
20 | 	}
21 | }
22 | 
23 | func TestSet(t *testing.T) {
24 | 	in := []byte{'a', 'b', 'c', 'd', '{', '}'}
25 | 	notin := []byte{'x', 'y', 'z', '[', ']'}
26 | 
27 | 	set := charset.New(in)
28 | 
29 | 	inSet(set, in, notin, t)
30 | }
31 | 
32 | func TestRangeUnion(t *testing.T) {
33 | 	set := charset.Range('a', 'z').Add(charset.Range('A', 'Z'))
34 | 
35 | 	in := []byte{'a', 'b', 'c', 'd', 'z', 'y', 'A', 'Z', 'B'}
36 | 	notin := []byte{'0', '1', '2', 0}
37 | 
38 | 	inSet(set, in, notin, t)
39 | }
40 | 
41 | func TestComplement(t *testing.T) {
42 | 	in := []byte{'a', 'b', 'c', 'd', '{', '}'}
43 | 	notin := []byte{'x', 'y', 'z', '[', ']'}
44 | 
45 | 	set := charset.New(in).Complement()
46 | 
47 | 	inSet(set, notin, in, t)
48 | }
49 | 
50 | func TestBigSet(t *testing.T) {
51 | 	in := []byte{200, 201, 203}
52 | 	notin := []byte{0, 1, 2}
53 | 
54 | 	set := charset.Range(128, '\xff')
55 | 
56 | 	inSet(set, in, notin, t)
57 | }
58 | 


--------------------------------------------------------------------------------
/cmd/gpeg/main.go:
--------------------------------------------------------------------------------
 1 | package main
 2 | 
 3 | import (
 4 | 	"flag"
 5 | 	"fmt"
 6 | 	"io"
 7 | 	"log"
 8 | 	"os"
 9 | 	"regexp/syntax"
10 | 
11 | 	"github.com/zyedidia/gpeg/pattern"
12 | 	"github.com/zyedidia/gpeg/re"
13 | 	"github.com/zyedidia/gpeg/rxconv"
14 | )
15 | 
16 | var regex = flag.Bool("regex", false, "compile regex instead of PEG")
17 | 
18 | func main() {
19 | 	flag.Parse()
20 | 
21 | 	args := flag.Args()
22 | 
23 | 	var in io.Reader
24 | 	if len(args) <= 0 {
25 | 		in = os.Stdin
26 | 	} else {
27 | 		f, err := os.Open(args[0])
28 | 		if err != nil {
29 | 			log.Fatal(err)
30 | 		}
31 | 		defer f.Close()
32 | 		in = f
33 | 	}
34 | 
35 | 	bytes, err := io.ReadAll(in)
36 | 	if err != nil {
37 | 		log.Fatal(err)
38 | 	}
39 | 	var patt pattern.Pattern
40 | 
41 | 	if *regex {
42 | 		patt, err = rxconv.FromRegexp(string(bytes), syntax.Perl)
43 | 	} else {
44 | 		patt, err = re.Compile(string(bytes))
45 | 	}
46 | 	if err != nil {
47 | 		log.Fatal(err)
48 | 	}
49 | 	prog, err := pattern.Compile(patt)
50 | 	if err != nil {
51 | 		log.Fatal(err)
52 | 	}
53 | 	fmt.Println(prog)
54 | }
55 | 


--------------------------------------------------------------------------------
/go.mod:
--------------------------------------------------------------------------------
1 | module github.com/zyedidia/gpeg
2 | 
3 | go 1.16
4 | 


--------------------------------------------------------------------------------
/gpeg_test.go:
--------------------------------------------------------------------------------
  1 | package gpeg
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | 	"io/ioutil"
  7 | 	"os"
  8 | 	"strconv"
  9 | 	"strings"
 10 | 	"testing"
 11 | 
 12 | 	"github.com/zyedidia/gpeg/charset"
 13 | 	"github.com/zyedidia/gpeg/input"
 14 | 	"github.com/zyedidia/gpeg/isa"
 15 | 	"github.com/zyedidia/gpeg/memo"
 16 | 	. "github.com/zyedidia/gpeg/pattern"
 17 | 	"github.com/zyedidia/gpeg/vm"
 18 | )
 19 | 
 20 | type PatternTest struct {
 21 | 	in    string
 22 | 	match int
 23 | }
 24 | 
 25 | func check(p Pattern, tests []PatternTest, t *testing.T) {
 26 | 	code := vm.Encode(MustCompile(p))
 27 | 	for _, tt := range tests {
 28 | 		name := tt.in[:min(10, len(tt.in))]
 29 | 		t.Run(name, func(t *testing.T) {
 30 | 			match, off, _, _ := code.Exec(strings.NewReader(tt.in), memo.NoneTable{})
 31 | 			if tt.match == -1 && match || tt.match != -1 && !match || tt.match != -1 && tt.match != off {
 32 | 				t.Errorf("%s: got: (%t, %d), but expected (%d)\n", tt.in, match, off, tt.match)
 33 | 			}
 34 | 		})
 35 | 	}
 36 | }
 37 | 
 38 | func TestConcat(t *testing.T) {
 39 | 	p := Concat(
 40 | 		Literal("ana"),
 41 | 		Literal("hi"),
 42 | 	)
 43 | 
 44 | 	tests := []PatternTest{
 45 | 		{"ana", -1},
 46 | 		{"hi", -1},
 47 | 		{"anahi", 5},
 48 | 		{"anah", -1},
 49 | 	}
 50 | 
 51 | 	check(p, tests, t)
 52 | }
 53 | 
 54 | type uint8Checker struct{}
 55 | 
 56 | // only allows integers between 0 and 256
 57 | func (uint8Checker) Check(b []byte, src *input.Input, id, flag int) int {
 58 | 	i, err := strconv.Atoi(string(b))
 59 | 	if err != nil {
 60 | 		return -1
 61 | 	}
 62 | 	if i >= 0 && i < 256 {
 63 | 		return 0
 64 | 	}
 65 | 	return -1
 66 | }
 67 | 
 68 | func TestChecker(t *testing.T) {
 69 | 	p := Check(Plus(Set(charset.Range('0', '9'))), uint8Checker{})
 70 | 
 71 | 	tests := []PatternTest{
 72 | 		{"123", 3},
 73 | 		{"256", -1},
 74 | 		{"foo", -1},
 75 | 		{"0", 1},
 76 | 	}
 77 | 
 78 | 	check(p, tests, t)
 79 | }
 80 | 
 81 | func TestOr(t *testing.T) {
 82 | 	p := Or(Literal("ana"), Literal("hi"))
 83 | 
 84 | 	tests := []PatternTest{
 85 | 		{"ana", 3},
 86 | 		{"hi", 2},
 87 | 		{"an", -1},
 88 | 		{"anahi", 3},
 89 | 	}
 90 | 
 91 | 	check(p, tests, t)
 92 | }
 93 | 
 94 | func TestRepeat(t *testing.T) {
 95 | 	p := Star(Literal("ana"))
 96 | 	tests := []PatternTest{
 97 | 		{"", 0},
 98 | 		{"ana", 3},
 99 | 		{"anaanaana", 9},
100 | 		{"hiana", 0},
101 | 		{"anaanaan", 6},
102 | 		{"an", 0},
103 | 	}
104 | 	check(p, tests, t)
105 | 
106 | 	p = Plus(Literal("hi"))
107 | 	tests = []PatternTest{
108 | 		{"", -1},
109 | 		{"hi", 2},
110 | 		{"hihihi", 6},
111 | 		{"hihiana", 4},
112 | 		{"h", -1},
113 | 	}
114 | 	check(p, tests, t)
115 | 
116 | 	p = Concat(Plus(Set(charset.New([]byte{'0', '1'}))), Star(Set(charset.New([]byte{'a', 'b', 'c'}))))
117 | 	tests = []PatternTest{
118 | 		{"01", 2},
119 | 		{"01abaabbc", 9},
120 | 		{"abc", -1},
121 | 		{"5a", -1},
122 | 		{"1z", 1},
123 | 	}
124 | 	check(p, tests, t)
125 | }
126 | 
127 | func TestPredicate(t *testing.T) {
128 | 	p := Not(Literal("ana"))
129 | 	tests := []PatternTest{
130 | 		{"ana", -1},
131 | 		{"hi", 0},
132 | 		{"an", 0},
133 | 	}
134 | 	check(p, tests, t)
135 | 
136 | 	p1 := Not(Not(Literal("ana")))
137 | 	p2 := And(Literal("ana"))
138 | 	tests = []PatternTest{
139 | 		{"ana", 0},
140 | 		{"hi", -1},
141 | 		{"an", -1},
142 | 	}
143 | 	check(p1, tests, t)
144 | 	check(p2, tests, t)
145 | }
146 | 
147 | func TestAny(t *testing.T) {
148 | 	p := Concat(Any(5), Literal("ana"))
149 | 	tests := []PatternTest{
150 | 		{"helloana", 8},
151 | 		{"hiana", -1},
152 | 		{"anaanana", 8},
153 | 	}
154 | 	check(p, tests, t)
155 | }
156 | 
157 | func TestOptional(t *testing.T) {
158 | 	p := Concat(Literal("ana"), Optional(Literal("hello")))
159 | 	tests := []PatternTest{
160 | 		{"ana", 3},
161 | 		{"anahe", 3},
162 | 		{"hello", -1},
163 | 		{"anahello", 8},
164 | 	}
165 | 	check(p, tests, t)
166 | }
167 | 
168 | func TestSet(t *testing.T) {
169 | 	p := Plus(Set(charset.Range('0', '9')))
170 | 	tests := []PatternTest{
171 | 		{"hi", -1},
172 | 		{"1002", 4},
173 | 		{"10.02", 2},
174 | 		{"9", 1},
175 | 	}
176 | 	check(p, tests, t)
177 | }
178 | 
179 | func TestGrammar(t *testing.T) {
180 | 	// grammar:
181 | 	// S <- <B> / (![()] .)+
182 | 	// B <- '(' <S> ')'
183 | 	S := Or(NonTerm("B"), Plus(Concat(Not(Set(charset.New([]byte{'(', ')'}))), Any(1))))
184 | 	B := Concat(Concat(Literal("("), NonTerm("S")), Literal(")"))
185 | 
186 | 	p := Grammar("S", map[string]Pattern{
187 | 		"S": S,
188 | 		"B": B,
189 | 	})
190 | 	tests := []PatternTest{
191 | 		{"(hello)", 7},
192 | 		{"(hello", -1},
193 | 		{"((inside))", 10},
194 | 		{"((inside)", -1},
195 | 	}
196 | 	check(p, tests, t)
197 | }
198 | 
199 | func TestTailCall(t *testing.T) {
200 | 	p := Grammar("X", map[string]Pattern{
201 | 		"X": Or(Literal("ana"), Concat(Any(1), NonTerm("X"))),
202 | 	})
203 | 	tests := []PatternTest{
204 | 		{"asdf", -1},
205 | 		{"ana hello", 3},
206 | 		{"hello ana", 9},
207 | 		{"anaana", 3},
208 | 	}
209 | 	check(p, tests, t)
210 | }
211 | 
212 | func TestUnionSet(t *testing.T) {
213 | 	p := Plus(Or(Set(charset.Range('a', 'z')), Set(charset.Range('A', 'Z'))))
214 | 	tests := []PatternTest{
215 | 		{"Hello", 5},
216 | 		{"123", -1},
217 | 		{"Hello1", 5},
218 | 	}
219 | 	check(p, tests, t)
220 | }
221 | 
222 | func TestSearch(t *testing.T) {
223 | 	p := Search(
224 | 		Concat(
225 | 			Literal("ana"),
226 | 		),
227 | 	)
228 | 	tests := []PatternTest{
229 | 		{"hello ana hello", 9},
230 | 		{"hello", -1},
231 | 		{"hello ana ana ana", 9},
232 | 	}
233 | 	check(p, tests, t)
234 | 
235 | 	// search for last occurrence
236 | 	p = Plus(Search(Literal("ana")))
237 | 	tests = []PatternTest{
238 | 		{"hello ana hello", 9},
239 | 		{"hello", -1},
240 | 		{"hello ana ana ana hello", 17},
241 | 	}
242 | 	check(p, tests, t)
243 | }
244 | 
245 | func TestArithmeticGrammar(t *testing.T) {
246 | 	// grammar:
247 | 	// Expr   <- <Factor> ([+-] <Factor>)*
248 | 	// Factor <- <Term> ([*/] <Term>)*
249 | 	// Term   <- <Number> / '(' <Expr> ')'
250 | 	// Number <- [0-9]+
251 | 	p := Grammar("Expr", map[string]Pattern{
252 | 		"Expr":   Concat(NonTerm("Factor"), Star(Concat(Set(charset.New([]byte{'+', '-'})), NonTerm("Factor")))),
253 | 		"Factor": Concat(NonTerm("Term"), Star(Concat(Set(charset.New([]byte{'*', '/'})), NonTerm("Term")))),
254 | 		"Term":   Or(NonTerm("Number"), Concat(Concat(Literal("("), NonTerm("Expr")), Literal(")"))),
255 | 		"Number": Plus(Set(charset.Range('0', '9'))),
256 | 	})
257 | 	tests := []PatternTest{
258 | 		{"13+(22-15)", 10},
259 | 		{"24*5+3", 6},
260 | 		{"word 5*3", -1},
261 | 		{"10*(43", 2},
262 | 	}
263 | 	check(p, tests, t)
264 | }
265 | 
266 | func TestBackReference(t *testing.T) {
267 | 	word := Plus(Literal("/"))
268 | 	br := isa.NewBackRef()
269 | 	p := Concat(
270 | 		CheckFlags(word, br, 0, int(isa.RefDef)),
271 | 		Star(Concat(
272 | 			Not(CheckFlags(&EmptyNode{}, br, 0, int(isa.RefUse))),
273 | 			Any(1),
274 | 		)),
275 | 		CheckFlags(&EmptyNode{}, br, 0, int(isa.RefUse)),
276 | 	)
277 | 	tests := []PatternTest{
278 | 		{"/// hello world ///", 19},
279 | 		{"// hello world //", 17},
280 | 		{"/// hello world //", -1},
281 | 	}
282 | 	check(p, tests, t)
283 | }
284 | 
285 | // **************
286 | // * Benchmarks *
287 | // **************
288 | // These require `bible.txt` in the testdata directory.
289 | 
290 | var match bool
291 | var bible *bytes.Reader
292 | 
293 | func TestMain(m *testing.M) {
294 | 	data, err := ioutil.ReadFile("testdata/bible.txt")
295 | 	if err != nil {
296 | 		fmt.Println("Warning:", err)
297 | 	}
298 | 	bible = bytes.NewReader(data)
299 | 	os.Exit(m.Run())
300 | }
301 | 
302 | func BenchmarkBibleSearchFirstEartt(b *testing.B) {
303 | 	code := vm.Encode(MustCompile(Search(Literal("eartt"))))
304 | 
305 | 	b.ResetTimer()
306 | 	for i := 0; i < b.N; i++ {
307 | 		match, _, _, _ = code.Exec(bible, memo.NoneTable{})
308 | 	}
309 | }
310 | 
311 | func BenchmarkBibleSearchFirstAbram(b *testing.B) {
312 | 	abram := Concat(Plus(Set(charset.Range('a', 'z').Add(charset.Range('A', 'Z')))), Literal(" Abram"))
313 | 	code := vm.Encode(MustCompile(Search(abram)))
314 | 
315 | 	b.ResetTimer()
316 | 	for i := 0; i < b.N; i++ {
317 | 		match, _, _, _ = code.Exec(bible, memo.NoneTable{})
318 | 	}
319 | }
320 | 
321 | func BenchmarkBibleSearchLastAbram(b *testing.B) {
322 | 	abram := Concat(Plus(Set(charset.Range('a', 'z').Add(charset.Range('A', 'Z')))), Literal(" Abram"))
323 | 	code := vm.Encode(MustCompile(Star(Search(abram))))
324 | 
325 | 	b.ResetTimer()
326 | 	for i := 0; i < b.N; i++ {
327 | 		match, _, _, _ = code.Exec(bible, memo.NoneTable{})
328 | 	}
329 | }
330 | 
331 | func BenchmarkBibleSearchLastTubalcain(b *testing.B) {
332 | 	code := vm.Encode(MustCompile(Star(Search(Literal("Tubalcain")))))
333 | 
334 | 	b.ResetTimer()
335 | 	for i := 0; i < b.N; i++ {
336 | 		match, _, _, _ = code.Exec(bible, memo.NoneTable{})
337 | 	}
338 | }
339 | 
340 | func BenchmarkBibleOmegaPattern(b *testing.B) {
341 | 	omega := Concat(Star(Concat(Not(Literal("Omega")), Any(1))), Literal("Omega"))
342 | 	code := vm.Encode(MustCompile(omega))
343 | 
344 | 	b.ResetTimer()
345 | 	for i := 0; i < b.N; i++ {
346 | 		match, _, _, _ = code.Exec(bible, memo.NoneTable{})
347 | 	}
348 | }
349 | 
350 | func BenchmarkBibleOmegaGrammar(b *testing.B) {
351 | 	omega := Grammar("S", map[string]Pattern{
352 | 		"S": Concat(Star(Concat(Not(NonTerm("P")), Any(1))), NonTerm("P")),
353 | 		"P": Literal("Omega"),
354 | 	})
355 | 	code := vm.Encode(MustCompile(omega))
356 | 
357 | 	b.ResetTimer()
358 | 	for i := 0; i < b.N; i++ {
359 | 		match, _, _, _ = code.Exec(bible, memo.NoneTable{})
360 | 	}
361 | }
362 | 
363 | func min(a, b int) int {
364 | 	if a < b {
365 | 		return a
366 | 	}
367 | 	return b
368 | }
369 | 


--------------------------------------------------------------------------------
/grammars/arith.peg:
--------------------------------------------------------------------------------
1 | Expr   <- Factor ([+\-] Factor)*
2 | Factor <- Term ([*/] Term)*
3 | Term   <- Number / '(' Expr ')'
4 | Number <- [0-9]+
5 | 


--------------------------------------------------------------------------------
/grammars/json.peg:
--------------------------------------------------------------------------------
 1 | doc           <- JSON !.
 2 | JSON          <- S_ (Number / Object / Array / String / True / False / Null) S_
 3 | Object        <- '{' (String ':' JSON (',' String ':' JSON)* / S_) '}'
 4 | Array         <- '[' (JSON (',' JSON)* / S_) ']'
 5 | StringBody    <- Escape? ((!["\\\00-\37] .)+ Escape*)*
 6 | String        <- S_ '"' StringBody '"' S_
 7 | Escape        <- '\\' (["{|\\bfnrt] / UnicodeEscape)
 8 | UnicodeEscape <- 'u' [0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]
 9 | Number        <- Minus? IntPart FractPart? ExpPart?
10 | Minus         <- '-'
11 | IntPart       <- '0' / [1-9][0-9]*
12 | FractPart     <- '.' [0-9]+
13 | ExpPart       <- [eE] [+\-]? [0-9]+
14 | True          <- 'true'
15 | False         <- 'false'
16 | Null          <- 'null'
17 | S_            <- [\11-\15\40]*
18 | 


--------------------------------------------------------------------------------
/grammars/json_memo.peg:
--------------------------------------------------------------------------------
 1 | doc           <- JSON !.
 2 | JSON          <- S_ (Number / Object / Array / String / True / False / Null) S_
 3 | Object        <- '{' (String ':' JSON (',' String ':' JSON)* / S_) '}'
 4 | Array         <- '[' (JSON ({{',' JSON}})* / S_) ']'
 5 | StringBody    <- Escape? ((!["\\\00-\37] .)+ Escape*)*
 6 | String        <- S_ '"' StringBody '"' S_
 7 | Escape        <- '\\' (["{|\\bfnrt] / UnicodeEscape)
 8 | UnicodeEscape <- 'u' [0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]
 9 | Number        <- Minus? IntPart FractPart? ExpPart?
10 | Minus         <- '-'
11 | IntPart       <- '0' / [1-9][0-9]*
12 | FractPart     <- '.' [0-9]+
13 | ExpPart       <- [eE] [+\-]? [0-9]+
14 | True          <- 'true'
15 | False         <- 'false'
16 | Null          <- 'null'
17 | S_            <- [\11-\15\40]*
18 | 


--------------------------------------------------------------------------------
/grammars/lpeg.peg:
--------------------------------------------------------------------------------
 1 | pattern         <- exp !.
 2 | exp             <- S (alternative / grammar)
 3 | 
 4 | alternative     <- seq ('/' S seq)*
 5 | seq             <- prefix*
 6 | prefix          <- '&' S prefix / '!' S prefix / suffix
 7 | suffix          <- primary S (([+*?]
 8 |                             / '^' [+\-]? num
 9 |                             / '->' S (string / '{}' / name)
10 |                             / '=>' S name) S)*
11 | 
12 | primary         <- '(' exp ')' / string / class / defined
13 |                  / '{:' (name ':')? exp ':}'
14 |                  / '=' name
15 |                  / '{*' exp '*}' # bare capture
16 |                  / '{~' exp '~}' # substitution capture
17 |                  / '{|' exp '|}' # table capture
18 |                  / '{+' exp '+}' # memoization expression
19 |                  / '{' exp '}'   # string capture
20 |                  / '.'
21 |                  / name S !arrow
22 | 
23 | grammar         <- definition+
24 | definition      <- name S arrow exp
25 | 
26 | class           <- '[' '^'? item (!']' item)* ']'
27 | item            <- defined / range / .
28 | range           <- . '-' (!']' .)
29 | 
30 | S               <- (space / comment)*   # spaces and comments
31 | name            <- [A-Za-z][A-Za-z0-9_]*
32 | arrow           <- '<-'
33 | num             <- [0-9]+
34 | string          <- '"' (!'"' .)* '"' / "'" (!"'" .)* "'"
35 | defined         <- '%' name
36 | 
37 | comment         <- '#' (!eol .)* eol
38 | space           <- ' ' / '\t' / eol
39 | eol             <- '\r\n' / '\n' / '\r'
40 | 


--------------------------------------------------------------------------------
/grammars/peg.peg:
--------------------------------------------------------------------------------
 1 | # Hierarchical syntax
 2 | Grammar    <- Spacing_ Definition+ EndOfFile_
 3 | Definition <- Identifier LEFTARROW_ Expression
 4 | 
 5 | Expression <- Sequence (SLASH_ Sequence)*
 6 | Sequence   <- Prefix*
 7 | Prefix     <- (AND / NOT)? Suffix
 8 | Suffix     <- Primary (QUESTION / STAR / PLUS)?
 9 | Primary    <- Identifier !LEFTARROW_
10 |             / OPEN_ Expression CLOSE_
11 |             / Literal / Class / DOT
12 | 
13 | # Lexical syntax
14 | Identifier <- IdentStart IdentCont* Spacing_
15 | IdentStart <- [a-zA-Z_]
16 | IdentCont  <- IdentStart / [0-9]
17 | 
18 | Literal    <- ['] (!['] Char)* ['] Spacing_
19 |             / ["] (!["] Char)* ["] Spacing_
20 | Class      <- '[' (!']' Range)* ']' Spacing_
21 | Range      <- Char '-' Char / Char
22 | Char       <- '\\' [nrt'"\[\]\\]
23 |             / '\\' [0-2][0-7][0-7]
24 |             / '\\' [0-7][0-7]?
25 |             / !'\\' .
26 | 
27 | LEFTARROW_  <- '<-' Spacing_
28 | SLASH_      <- '/' Spacing_
29 | AND        <- '&' Spacing_
30 | NOT        <- '!' Spacing_
31 | QUESTION   <- '?' Spacing_
32 | STAR       <- '*' Spacing_
33 | PLUS       <- '+' Spacing_
34 | OPEN_       <- '(' Spacing_
35 | CLOSE_      <- ')' Spacing_
36 | DOT        <- '.' Spacing_
37 | 
38 | Spacing_   <- (Space_ / Comment_)*
39 | Comment_   <- '#' (!EndOfLine_ .)* EndOfLine_
40 | Space_     <- ' ' / '\t' / EndOfLine_
41 | EndOfLine_ <- '\r\n' / '\n' / '\r'
42 | EndOfFile_ <- !.
43 | 


--------------------------------------------------------------------------------
/grammars/re.peg:
--------------------------------------------------------------------------------
 1 | Pattern    <- Spacing_ (Expression / Grammar) EndOfFile_
 2 | Grammar    <- Definition+
 3 | Definition <- Identifier '<-' Expression
 4 | 
 5 | Expression <- Sequence ('/' Sequence)*
 6 | Sequence   <- Prefix*
 7 | Prefix     <- (AND / NOT)? Suffix
 8 | Suffix     <- Primary (QUESTION / STAR / PLUS)?
 9 | Primary    <- Identifier !'<-'
10 |             / '(' Expression ')'
11 |             / Literal / Class
12 |             / '{' Expression '}'
13 |             / '{+' Expression '+}'
14 |             / DOT
15 | 
16 | # Lexical syntax
17 | Identifier <- IdentStart IdentCont* Spacing_
18 | IdentStart <- [a-zA-Z_]
19 | IdentCont  <- IdentStart / [0-9]
20 | 
21 | Literal    <- ['] (!['] Char)* ['] Spacing_
22 |             / ["] (!["] Char)* ["] Spacing_
23 | Class      <- '[' CARAT? (!']' Range)* ']' Spacing_
24 | Range      <- Char '-' Char / Char
25 | Char       <- '\\' [nrt'"\[\]\\]
26 |             / '\\' [0-2][0-7][0-7]
27 |             / '\\' [0-7][0-7]?
28 |             / !'\\' .
29 | 
30 | AND        <- '&' Spacing_
31 | NOT        <- '!' Spacing_
32 | QUESTION   <- '?' Spacing_
33 | STAR       <- '*' Spacing_
34 | PLUS       <- '+' Spacing_
35 | DOT        <- '.' Spacing_
36 | CARAT      <- '^' Spacing_
37 | 
38 | Spacing_   <- (Space_ / Comment_)*
39 | Comment_   <- '#' (!EndOfLine_ .)* EndOfLine_
40 | Space_     <- ' ' / '\t' / EndOfLine_
41 | EndOfLine_ <- '\r\n' / '\n' / '\r'
42 | EndOfFile_ <- !.
43 | 


--------------------------------------------------------------------------------
/incremental_test.go:
--------------------------------------------------------------------------------
 1 | package gpeg
 2 | 
 3 | import (
 4 | 	"io/ioutil"
 5 | 	"math/rand"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/zyedidia/gpeg/bench"
 9 | 	"github.com/zyedidia/gpeg/input/linerope"
10 | 	"github.com/zyedidia/gpeg/memo"
11 | 	"github.com/zyedidia/gpeg/pattern"
12 | 	"github.com/zyedidia/gpeg/re"
13 | 	"github.com/zyedidia/gpeg/vm"
14 | )
15 | 
16 | // Open a 250k java file and apply some edits and verify that after each edit
17 | // the incremental result is the same as doing a full parse.
18 | func TestIncrementalJava(t *testing.T) {
19 | 	rand.Seed(42)
20 | 
21 | 	peg, err := ioutil.ReadFile("grammars/java_memo.peg")
22 | 	if err != nil {
23 | 		t.Error(err)
24 | 	}
25 | 	p := re.MustCompile(string(peg))
26 | 
27 | 	java, err := ioutil.ReadFile("testdata/ScriptRuntime.java")
28 | 	if err != nil {
29 | 		t.Error(err)
30 | 	}
31 | 
32 | 	edits := bench.GenerateEdits(java, 100)
33 | 	edits = bench.ToSingleEdits(edits)
34 | 
35 | 	tbl := memo.NewTreeTable(512)
36 | 	prog := pattern.MustCompile(p)
37 | 	code := vm.Encode(prog)
38 | 
39 | 	r := linerope.New(java)
40 | 
41 | 	for _, e := range edits {
42 | 		start := e.Start
43 | 		end := e.End
44 | 
45 | 		r.Remove(start, end)
46 | 		r.Insert(start, []byte(e.Text))
47 | 
48 | 		// st := time.Now()
49 | 		tbl.ApplyEdit(memo.Edit{
50 | 			Start: start,
51 | 			End:   end,
52 | 			Len:   len(e.Text),
53 | 		})
54 | 
55 | 		code.Exec(r, tbl)
56 | 		// fmt.Println("reparse", time.Since(st), match, off)
57 | 		// st = time.Now()
58 | 		// nmatch, noff, _, _ := code.Exec(r, memo.NoneTable{})
59 | 		// fmt.Println("full parse", time.Since(st))
60 | 
61 | 		// if match != nmatch || off != noff {
62 | 		// 	t.Fatal(i, match, nmatch, off, noff)
63 | 		// }
64 | 	}
65 | }
66 | 


--------------------------------------------------------------------------------
/input/input.go:
--------------------------------------------------------------------------------
  1 | // Package input defines data types and functions for managing input data.
  2 | package input
  3 | 
  4 | import (
  5 | 	"io"
  6 | )
  7 | 
  8 | const bufsz = 4096
  9 | 
 10 | // Input represents the input data and is an efficient wrapper of io.ReaderAt
 11 | // which provides a nicer API, avoids repeated interface function calls, and
 12 | // uses a cache for buffered reading.
 13 | // An Input also tracks the index of the furthest byte that has been read.
 14 | type Input struct {
 15 | 	r io.ReaderAt
 16 | 
 17 | 	// cached data.
 18 | 	chunk [bufsz]byte
 19 | 	b     [1]byte
 20 | 	// size of the cache.
 21 | 	nchunk int
 22 | 
 23 | 	// the position within the reader that the chunk starts at.
 24 | 	base int
 25 | 	// the offset within the chunk we are reading at.
 26 | 	coff int
 27 | 	// the furthest position we have read.
 28 | 	furthest int
 29 | }
 30 | 
 31 | // NewInput creates a new Input wrapper for the io.ReaderAt.
 32 | func NewInput(r io.ReaderAt) *Input {
 33 | 	i := &Input{
 34 | 		r: r,
 35 | 	}
 36 | 	i.refill(i.base)
 37 | 	return i
 38 | }
 39 | 
 40 | func (i *Input) refill(pos int) {
 41 | 	i.base = pos
 42 | 	i.coff = 0
 43 | 	i.nchunk, _ = i.r.ReadAt(i.chunk[:], int64(i.base))
 44 | }
 45 | 
 46 | // Peek returns the next byte in the stream or 'false' if there are no more
 47 | // bytes. Successive calls to Peek will return the same value unless there is a
 48 | // call to SeekTo or Advance in between.
 49 | func (i *Input) Peek() (byte, bool) {
 50 | 	pos := i.base + i.coff
 51 | 	if pos > i.furthest {
 52 | 		i.furthest = pos
 53 | 	}
 54 | 
 55 | 	return i.chunk[i.coff], i.nchunk != 0
 56 | }
 57 | 
 58 | func (i *Input) PeekBefore() (byte, bool) {
 59 | 	if i.base+i.coff-1 < 0 {
 60 | 		return 0, false
 61 | 	}
 62 | 	if i.coff >= 1 {
 63 | 		return i.chunk[i.coff-1], i.nchunk != 0
 64 | 	}
 65 | 	n, _ := i.r.ReadAt(i.b[:], int64(i.base+i.coff-1))
 66 | 	return i.b[0], n == 1
 67 | }
 68 | 
 69 | // SeekTo moves the current read position to the desired read position. Returns
 70 | // true if the seek went to a valid location within the reader, and false
 71 | // otherwise. In other words, if seek returns true the next call to Peek will
 72 | // return a valid byte.
 73 | func (i *Input) SeekTo(pos int) bool {
 74 | 	// check if the seek position in within the current chunk and if so just
 75 | 	// update the internal offset.
 76 | 	chunkEnd := i.base + i.nchunk
 77 | 	if pos < chunkEnd && pos >= i.base {
 78 | 		i.coff = pos - i.base
 79 | 		return true
 80 | 	}
 81 | 
 82 | 	// refill the cache (moves the base)
 83 | 	i.refill(pos)
 84 | 	return i.nchunk != 0
 85 | }
 86 | 
 87 | // Advance moves the offset forward by 'n' bytes. Returns true if the advance
 88 | // was successful (n chars were successfully skipped) and false otherwise. Note
 89 | // that even if Advance returns true the next call to Peek may return false if
 90 | // the advance went to the exact end of the data.
 91 | func (i *Input) Advance(n int) bool {
 92 | 	if i.nchunk == 0 {
 93 | 		return false
 94 | 	}
 95 | 
 96 | 	i.coff += n
 97 | 	if i.coff > i.nchunk {
 98 | 		i.refill(i.base + i.coff)
 99 | 		return false
100 | 	} else if i.coff == i.nchunk {
101 | 		i.refill(i.base + i.coff)
102 | 	}
103 | 	return true
104 | }
105 | 
106 | func (i *Input) ReadAt(b []byte, pos int64) (n int, err error) {
107 | 	return i.r.ReadAt(b, pos)
108 | }
109 | 
110 | // Slice returns a slice of the reader corresponding to the range [low:high).
111 | func (i *Input) Slice(low, high int) []byte {
112 | 	return Slice(i.r, low, high)
113 | }
114 | 
115 | // Pos returns the current read position.
116 | func (i *Input) Pos() int {
117 | 	return i.base + i.coff
118 | }
119 | 
120 | // Furthest returns the furthest read position.
121 | func (i *Input) Furthest() int {
122 | 	return i.furthest
123 | }
124 | 
125 | // ResetFurthest resets the furthest read tracker to zero.
126 | func (i *Input) ResetFurthest() {
127 | 	i.furthest = 0
128 | }
129 | 


--------------------------------------------------------------------------------
/input/input_test.go:
--------------------------------------------------------------------------------
 1 | package input_test
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/zyedidia/gpeg/input"
 8 | )
 9 | 
10 | func TestInput(t *testing.T) {
11 | 	b := bytes.NewReader([]byte("foo bar baz"))
12 | 	i := input.NewInput(b)
13 | 
14 | 	if b, _ := i.Peek(); b != 'f' {
15 | 		t.Error("incorrect peek, got", string(b))
16 | 	}
17 | 	i.Advance(1)
18 | 	if b, _ := i.Peek(); b != 'o' {
19 | 		t.Error("incorrect peek, got", string(b))
20 | 	}
21 | 	i.Advance(1)
22 | 	if b, _ := i.Peek(); b != 'o' {
23 | 		t.Error("incorrect peek, got", string(b))
24 | 	}
25 | 
26 | 	slice := i.Slice(4, 7)
27 | 	if string(slice) != "bar" {
28 | 		t.Error("incorrect slice, got", string(slice))
29 | 	}
30 | 
31 | 	success := i.Advance(9)
32 | 	if !success {
33 | 		t.Error("incorrect: couldn't advance by 9")
34 | 	}
35 | 
36 | 	if b, ok := i.Peek(); ok {
37 | 		t.Errorf("peek past end of buffer should return false, got %c", b)
38 | 	}
39 | }
40 | 


--------------------------------------------------------------------------------
/input/linerope/.gitignore:
--------------------------------------------------------------------------------
1 | /main
2 | 


--------------------------------------------------------------------------------
/input/linerope/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021: Zachary Yedidia.
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining
 6 | a copy of this software and associated documentation files (the
 7 | "Software"), to deal in the Software without restriction, including
 8 | without limitation the rights to use, copy, modify, merge, publish,
 9 | distribute, sublicense, and/or sell copies of the Software, and to
10 | permit persons to whom the Software is furnished to do so, subject to
11 | the following conditions:
12 | 
13 | The above copyright notice and this permission notice shall be
14 | included in all copies or substantial portions of the Software.
15 | 
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23 | 


--------------------------------------------------------------------------------
/input/linerope/line.go:
--------------------------------------------------------------------------------
  1 | package linerope
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | )
  6 | 
  7 | type loc struct {
  8 | 	line int
  9 | 	col  int
 10 | }
 11 | 
 12 | var lzero = loc{0, 0}
 13 | 
 14 | func llen(b, sep []byte) loc {
 15 | 	lines := bytes.Count(b, sep)
 16 | 
 17 | 	if lines != 0 {
 18 | 		last := bytes.LastIndex(b, sep) + len(sep)
 19 | 		return loc{
 20 | 			line: lines,
 21 | 			col:  len(b) - last,
 22 | 		}
 23 | 	}
 24 | 	return loc{
 25 | 		line: 0,
 26 | 		col:  len(b),
 27 | 	}
 28 | }
 29 | 
 30 | func addlocs(a, b loc) loc {
 31 | 	if a.line != 0 && b.line != 0 {
 32 | 		return loc{
 33 | 			line: a.line + b.line,
 34 | 			col:  b.col,
 35 | 		}
 36 | 	} else if a.line != 0 {
 37 | 		return loc{
 38 | 			line: a.line,
 39 | 			col:  b.col + a.col,
 40 | 		}
 41 | 	} else if b.line != 0 {
 42 | 		return loc{
 43 | 			line: b.line,
 44 | 			col:  b.col,
 45 | 		}
 46 | 	}
 47 | 	return loc{
 48 | 		line: 0,
 49 | 		col:  a.col + b.col,
 50 | 	}
 51 | }
 52 | 
 53 | func sublocs(a, b loc) loc {
 54 | 	if a.line == b.line {
 55 | 		return loc{
 56 | 			line: a.line - b.line,
 57 | 			col:  a.col - b.col,
 58 | 		}
 59 | 	}
 60 | 
 61 | 	return loc{
 62 | 		line: a.line - b.line,
 63 | 		col:  a.col,
 64 | 	}
 65 | }
 66 | 
 67 | func (l loc) cmp(other loc) int {
 68 | 	if l.line == other.line {
 69 | 		if l.col < other.col {
 70 | 			return -1
 71 | 		} else if l.col > other.col {
 72 | 			return 1
 73 | 		}
 74 | 		return 0
 75 | 	} else if l.line < other.line {
 76 | 		return -1
 77 | 	}
 78 | 	return 1
 79 | }
 80 | 
 81 | func minloc(a, b loc) loc {
 82 | 	if a.cmp(b) < 0 {
 83 | 		return a
 84 | 	}
 85 | 	return b
 86 | }
 87 | 
 88 | func maxloc(a, b loc) loc {
 89 | 	if a.cmp(b) > 0 {
 90 | 		return a
 91 | 	}
 92 | 	return b
 93 | }
 94 | 
 95 | func sliceloc(b, sep []byte, start, end loc) []byte {
 96 | 	soff := indexN(b, sep, start.line) + len(sep) + start.col
 97 | 	eoff := indexN(b, sep, end.line) + len(sep) + end.col
 98 | 	return b[soff:eoff]
 99 | }
100 | 


--------------------------------------------------------------------------------
/input/linerope/rope.go:
--------------------------------------------------------------------------------
  1 | package linerope
  2 | 
  3 | import (
  4 | 	"io"
  5 | 	"runtime"
  6 | 	"sync"
  7 | )
  8 | 
  9 | var DefaultOptions = Options{
 10 | 	SplitLen:       4096,
 11 | 	JoinLen:        2048,
 12 | 	RebalanceRatio: 1.2,
 13 | 	LineSep:        []byte{'\n'},
 14 | }
 15 | 
 16 | type Options struct {
 17 | 	// SplitLen is the threshold above which slices will be split into separate
 18 | 	// nodes.
 19 | 	SplitLen int
 20 | 	// JoinLen is the threshold below which nodes will be merged into slices.
 21 | 	JoinLen int
 22 | 	// RebalanceRatio is the threshold used to trigger a rebuild during a
 23 | 	// rebalance operation.
 24 | 	RebalanceRatio float64
 25 | 	// LineSep is the newline byte sequence (usually '\n' or '\r\n').
 26 | 	LineSep []byte
 27 | }
 28 | 
 29 | type nodeType byte
 30 | 
 31 | const (
 32 | 	tLeaf nodeType = iota
 33 | 	tNode
 34 | )
 35 | 
 36 | // A Node in the rope structure. If the kind is tLeaf, only the value and
 37 | // length are valid, and if the kind is tNode, only length, left, right are
 38 | // valid.
 39 | type Node struct {
 40 | 	kind        nodeType
 41 | 	value       []byte
 42 | 	length      int
 43 | 	llength     loc
 44 | 	left, right *Node
 45 | 	opts        Options
 46 | }
 47 | 
 48 | // New returns a new rope node from the given byte slice. The underlying
 49 | // data is not copied so the user should ensure that it is okay to insert and
 50 | // delete from the input slice.
 51 | func New(b []byte) *Node {
 52 | 	return NewWithOpts(b, DefaultOptions)
 53 | }
 54 | 
 55 | // NewWithOpts constructs a rope with the given options.
 56 | func NewWithOpts(b []byte, opts Options) *Node {
 57 | 	// We build the tree from the bottom up for extra efficiency. This avoids
 58 | 	// counting duplicate newlines a logarithmic number of times (for each
 59 | 	// level of the tree).
 60 | 	//
 61 | 	// We make the chunk size equal to SplitLength which means a node will be
 62 | 	// split when the first edit is made. Since most nodes will never be
 63 | 	// edited, it makes sense to fill them all up to avoid wasting space, even
 64 | 	// if it means inserting will require a split the first time a node is
 65 | 	// edited.
 66 | 	chunksz := opts.SplitLen
 67 | 	nchunks := len(b) / chunksz
 68 | 	nodes := make([]*Node, nchunks, nchunks+1)
 69 | 
 70 | 	// For even better performance, we load the chunks in parallel. Chunk
 71 | 	// loading is distributed among the cores available on the machine.
 72 | 	var nthreads = runtime.NumCPU()
 73 | 	var wg sync.WaitGroup
 74 | 	wg.Add(nthreads)
 75 | 	for t := 0; t < nthreads; t++ {
 76 | 		go func(t int) {
 77 | 			start := t * (nchunks / nthreads)
 78 | 			end := t*(nchunks/nthreads) + (nchunks / nthreads)
 79 | 			if t == nthreads-1 {
 80 | 				end = nchunks
 81 | 			}
 82 | 			for i := start; i < end; i++ {
 83 | 				j := i * chunksz
 84 | 				// triple index slice notation allows a sort of copy-on-write behavior
 85 | 				// which is extremely beneficial to us because it's likely that this
 86 | 				// slice is backed by a memory-mapped file.
 87 | 				slc := b[j : j+chunksz : j+chunksz]
 88 | 				nodes[i] = &Node{
 89 | 					kind:    tLeaf,
 90 | 					value:   slc,
 91 | 					length:  len(slc),
 92 | 					llength: llen(slc, opts.LineSep),
 93 | 					opts:    opts,
 94 | 				}
 95 | 			}
 96 | 			wg.Done()
 97 | 		}(t)
 98 | 	}
 99 | 	wg.Wait()
100 | 	// load any extra bytes
101 | 	slc := b[nchunks*chunksz : len(b) : len(b)]
102 | 	nodes = append(nodes, &Node{
103 | 		kind:    tLeaf,
104 | 		value:   slc,
105 | 		length:  len(slc),
106 | 		llength: llen(slc, opts.LineSep),
107 | 		opts:    opts,
108 | 	})
109 | 	return buildTree(nodes)
110 | }
111 | 
112 | // recursively creates parent nodes
113 | func buildTree(nodes []*Node) *Node {
114 | 	if len(nodes) == 1 {
115 | 		return nodes[0]
116 | 	}
117 | 	if len(nodes)%2 != 0 {
118 | 		l := len(nodes)
119 | 		nodes[l-2] = join(nodes[l-2], nodes[l-1])
120 | 		nodes = nodes[:l-1]
121 | 	}
122 | 
123 | 	newnodes := make([]*Node, 0, len(nodes)/2+1)
124 | 	for i := 0; i < len(nodes); i += 2 {
125 | 		newnodes = append(newnodes, join(nodes[i], nodes[i+1]))
126 | 	}
127 | 	return buildTree(newnodes)
128 | }
129 | 
130 | // Len returns the number of elements stored in the rope.
131 | func (n *Node) Len() int {
132 | 	return n.length
133 | }
134 | 
135 | // LLen returns the line/col location one byte beyond the last position in the
136 | // file.
137 | func (n *Node) LLen() (lines, cols int) {
138 | 	return n.llength.line, n.llength.col
139 | }
140 | 
141 | func (n *Node) NumLines() int {
142 | 	return n.llength.line
143 | }
144 | 
145 | func (n *Node) adjust() {
146 | 	switch n.kind {
147 | 	case tLeaf:
148 | 		if n.length > n.opts.SplitLen {
149 | 			divide := n.length / 2
150 | 			n.left = NewWithOpts(n.value[:divide], n.opts)
151 | 			n.right = NewWithOpts(n.value[divide:], n.opts)
152 | 			n.value = nil
153 | 			n.kind = tNode
154 | 			n.length = n.left.length + n.right.length
155 | 			n.llength = addlocs(n.left.llength, n.right.llength)
156 | 		}
157 | 	default: // case tNode
158 | 		if n.length < n.opts.JoinLen {
159 | 			n.value = n.Value()
160 | 			n.left = nil
161 | 			n.right = nil
162 | 			n.kind = tLeaf
163 | 			n.length = len(n.value)
164 | 			n.llength = llen(n.value, n.opts.LineSep)
165 | 		}
166 | 	}
167 | }
168 | 
169 | // Value returns the elements of this node concatenated into a slice. May
170 | // return the underlying slice without copying, so do not modify the returned
171 | // slice.
172 | func (n *Node) Value() []byte {
173 | 	switch n.kind {
174 | 	case tLeaf:
175 | 		return n.value
176 | 	default: // case tNode
177 | 		return concat(n.left.Value(), n.right.Value())
178 | 	}
179 | }
180 | 
181 | // Remove deletes the range [start:end) (exclusive bound) from the rope.
182 | func (n *Node) Remove(start, end int) {
183 | 	switch n.kind {
184 | 	case tLeaf:
185 | 		// slice tricks delete
186 | 		n.value = remove(n.value, start, end)
187 | 		n.length = len(n.value)
188 | 		n.llength = llen(n.value, n.opts.LineSep)
189 | 	default: // case tNode
190 | 		leftLength := n.left.length
191 | 		leftStart := min(start, leftLength)
192 | 		leftEnd := min(end, leftLength)
193 | 		rightLength := n.right.length
194 | 		rightStart := max(0, min(start-leftLength, rightLength))
195 | 		rightEnd := max(0, min(end-leftLength, rightLength))
196 | 		if leftStart < leftLength {
197 | 			n.left.Remove(leftStart, leftEnd)
198 | 		}
199 | 		if rightEnd > 0 {
200 | 			n.right.Remove(rightStart, rightEnd)
201 | 		}
202 | 		n.length = n.left.length + n.right.length
203 | 		n.llength = addlocs(n.left.llength, n.right.llength)
204 | 	}
205 | 	n.adjust()
206 | }
207 | 
208 | // Insert inserts the given value at pos.
209 | func (n *Node) Insert(pos int, value []byte) {
210 | 	switch n.kind {
211 | 	case tLeaf:
212 | 		// slice tricks insert
213 | 		n.value = insert(n.value, pos, value)
214 | 		n.length = len(n.value)
215 | 		n.llength = llen(n.value, n.opts.LineSep)
216 | 	default: // case tNode
217 | 		leftLength := n.left.length
218 | 		if pos < leftLength {
219 | 			n.left.Insert(pos, value)
220 | 		} else {
221 | 			n.right.Insert(pos-leftLength, value)
222 | 		}
223 | 		n.length = n.left.length + n.right.length
224 | 		n.llength = addlocs(n.left.llength, n.right.llength)
225 | 	}
226 | 	n.adjust()
227 | }
228 | 
229 | // slice returns the range of the rope from [start:end).
230 | func (n *Node) slice(start, end int) []byte {
231 | 	if start >= end {
232 | 		return []byte{}
233 | 	}
234 | 
235 | 	switch n.kind {
236 | 	case tLeaf:
237 | 		return n.value[start:end]
238 | 	default: // case tNode
239 | 		leftLength := n.left.length
240 | 		leftStart := min(start, leftLength)
241 | 		leftEnd := min(end, leftLength)
242 | 		rightLength := n.right.length
243 | 		rightStart := max(0, min(start-leftLength, rightLength))
244 | 		rightEnd := max(0, min(end-leftLength, rightLength))
245 | 
246 | 		if leftStart != leftEnd {
247 | 			if rightStart != rightEnd {
248 | 				return concat(n.left.slice(leftStart, leftEnd), n.right.slice(rightStart, rightEnd))
249 | 			} else {
250 | 				return n.left.slice(leftStart, leftEnd)
251 | 			}
252 | 		} else {
253 | 			if rightStart != rightEnd {
254 | 				return n.right.slice(rightStart, rightEnd)
255 | 			} else {
256 | 				return []byte{}
257 | 			}
258 | 		}
259 | 	}
260 | }
261 | 
262 | // OffsetAt returns the absolute character offset of a line/col position.
263 | func (n *Node) OffsetAt(line, col int) int {
264 | 	pos := loc{line, col}
265 | 	switch n.kind {
266 | 	case tLeaf:
267 | 		return indexN(n.value, n.opts.LineSep, line) + len(n.opts.LineSep) + col
268 | 	default: // case tNode
269 | 		leftLength := n.left.llength
270 | 		if pos.cmp(leftLength) < 0 {
271 | 			return n.left.OffsetAt(line, col)
272 | 		} else {
273 | 			l := sublocs(pos, leftLength)
274 | 			return n.left.length + n.right.OffsetAt(l.line, l.col)
275 | 		}
276 | 	}
277 | }
278 | 
279 | // LineColAt returns the line/col position of an absolute character offset.
280 | func (n *Node) LineColAt(pos int) (line, col int) {
281 | 	l := n.lineColAt(pos)
282 | 	return l.line, l.col
283 | }
284 | 
285 | func (n *Node) lineColAt(pos int) loc {
286 | 	switch n.kind {
287 | 	case tLeaf:
288 | 		return lineCol(n.value, n.opts.LineSep, pos)
289 | 	default: // case tNode
290 | 		leftLength := n.left.length
291 | 		if pos < leftLength {
292 | 			return n.left.lineColAt(pos)
293 | 		} else {
294 | 			return addlocs(n.left.llength, n.right.lineColAt(pos-leftLength))
295 | 		}
296 | 	}
297 | }
298 | 
299 | // SliceLC is the same as Slice but uses line/col positions for start and end.
300 | func (n *Node) SliceLC(startl, startc, endl, endc int) []byte {
301 | 	return n.sliceLC(loc{startl, startc}, loc{endl, endc})
302 | }
303 | 
304 | func (n *Node) sliceLC(start, end loc) []byte {
305 | 	if start.cmp(end) >= 0 {
306 | 		return []byte{}
307 | 	}
308 | 
309 | 	switch n.kind {
310 | 	case tLeaf:
311 | 		return sliceloc(n.value, n.opts.LineSep, start, end)
312 | 	default: // case tNode
313 | 		leftLength := n.left.llength
314 | 		leftStart := minloc(start, leftLength)
315 | 		leftEnd := minloc(end, leftLength)
316 | 		rightLength := n.right.llength
317 | 		rightStart := maxloc(lzero, minloc(sublocs(start, leftLength), rightLength))
318 | 		rightEnd := maxloc(lzero, minloc(sublocs(end, leftLength), rightLength))
319 | 
320 | 		if leftStart != leftEnd {
321 | 			if rightStart != rightEnd {
322 | 				return concat(n.left.sliceLC(leftStart, leftEnd), n.right.sliceLC(rightStart, rightEnd))
323 | 			} else {
324 | 				return n.left.sliceLC(leftStart, leftEnd)
325 | 			}
326 | 		} else {
327 | 			if rightStart != rightEnd {
328 | 				return n.right.sliceLC(rightStart, rightEnd)
329 | 			} else {
330 | 				return []byte{}
331 | 			}
332 | 		}
333 | 	}
334 | }
335 | 
336 | // At returns the element at the given position.
337 | func (n *Node) At(pos int) byte {
338 | 	s := n.slice(pos, pos+1)
339 | 	return s[0]
340 | }
341 | 
342 | // SplitAt splits the node at the given index and returns two new ropes
343 | // corresponding to the left and right portions of the split.
344 | func (n *Node) SplitAt(i int) (*Node, *Node) {
345 | 	switch n.kind {
346 | 	case tLeaf:
347 | 		return NewWithOpts(n.value[:i], n.opts), NewWithOpts(n.value[i:], n.opts)
348 | 	default: // case tNode
349 | 		m := n.left.length
350 | 		if i == m {
351 | 			return n.left, n.right
352 | 		} else if i < m {
353 | 			l, r := n.left.SplitAt(i)
354 | 			return l, join(r, n.right)
355 | 		}
356 | 		l, r := n.right.SplitAt(i - m)
357 | 		return join(n.left, l), r
358 | 	}
359 | }
360 | 
361 | func join(l, r *Node) *Node {
362 | 	n := &Node{
363 | 		left:    l,
364 | 		right:   r,
365 | 		length:  l.length + r.length,
366 | 		llength: addlocs(l.llength, r.llength),
367 | 		kind:    tNode,
368 | 		opts:    l.opts,
369 | 	}
370 | 	n.adjust()
371 | 	return n
372 | }
373 | 
374 | // Join merges all the given ropes together into one rope.
375 | func Join(a, b *Node, more ...*Node) *Node {
376 | 	s := join(a, b)
377 | 	for _, n := range more {
378 | 		s = join(s, n)
379 | 	}
380 | 	return s
381 | }
382 | 
383 | // Rebuild rebuilds the entire rope structure, resulting in a balanced tree.
384 | func (n *Node) Rebuild() {
385 | 	switch n.kind {
386 | 	case tNode:
387 | 		n.value = concat(n.left.Value(), n.right.Value())
388 | 		n.left = nil
389 | 		n.right = nil
390 | 		n.adjust()
391 | 	}
392 | }
393 | 
394 | // Rebalance finds unbalanced nodes and rebuilds them.
395 | func (n *Node) Rebalance() {
396 | 	switch n.kind {
397 | 	case tNode:
398 | 		lratio := float64(n.left.length) / float64(n.right.length)
399 | 		rratio := float64(n.right.length) / float64(n.left.length)
400 | 		if lratio > n.opts.RebalanceRatio || rratio > n.opts.RebalanceRatio {
401 | 			n.Rebuild()
402 | 		} else {
403 | 			n.left.Rebalance()
404 | 			n.right.Rebalance()
405 | 		}
406 | 	}
407 | }
408 | 
409 | // Each applies the given function to every node in the rope.
410 | func (n *Node) Each(fn func(n *Node)) {
411 | 	fn(n)
412 | 	if n.kind == tNode {
413 | 		n.left.Each(fn)
414 | 		n.right.Each(fn)
415 | 	}
416 | }
417 | 
418 | // EachLeaf applies the given function to every leaf node in order.
419 | func (n *Node) EachLeaf(fn func(n *Node) bool) bool {
420 | 	switch n.kind {
421 | 	case tLeaf:
422 | 		return fn(n)
423 | 	default: // case tNode
424 | 		if n.left.EachLeaf(fn) {
425 | 			return true
426 | 		}
427 | 		return n.right.EachLeaf(fn)
428 | 	}
429 | }
430 | 
431 | // ReadAt implements the io.ReaderAt interface.
432 | func (n *Node) ReadAt(p []byte, off int64) (nread int, err error) {
433 | 	if off > int64(n.length) {
434 | 		return 0, io.EOF
435 | 	}
436 | 
437 | 	end := off + int64(len(p))
438 | 	if end >= int64(n.length) {
439 | 		end = int64(n.length)
440 | 		err = io.EOF
441 | 	}
442 | 	b := n.slice(int(off), int(end))
443 | 	nread = copy(p, b)
444 | 	return nread, err
445 | }
446 | 
447 | // WriteTo implements the io.WriterTo interface.
448 | func (n *Node) WriteTo(w io.Writer) (int64, error) {
449 | 	var err error
450 | 	var ntotal int64
451 | 	n.EachLeaf(func(it *Node) bool {
452 | 		var nwritten int
453 | 		nwritten, err = w.Write(it.Value())
454 | 		ntotal += int64(nwritten)
455 | 		return err != nil
456 | 	})
457 | 	return ntotal, err
458 | }
459 | 
460 | func min(a, b int) int {
461 | 	if a < b {
462 | 		return a
463 | 	}
464 | 	return b
465 | }
466 | 
467 | func max(a, b int) int {
468 | 	if a > b {
469 | 		return a
470 | 	}
471 | 	return b
472 | }
473 | 
474 | // from slice tricks
475 | func insert(s []byte, k int, vs []byte) []byte {
476 | 	if n := len(s) + len(vs); n <= cap(s) {
477 | 		s2 := s[:n]
478 | 		copy(s2[k+len(vs):], s[k:])
479 | 		copy(s2[k:], vs)
480 | 		return s2
481 | 	}
482 | 	s2 := make([]byte, len(s)+len(vs))
483 | 	copy(s2, s[:k])
484 | 	copy(s2[k:], vs)
485 | 	copy(s2[k+len(vs):], s[k:])
486 | 	return s2
487 | }
488 | 
489 | func concat(a, b []byte) []byte {
490 | 	c := make([]byte, 0, len(a)+len(b))
491 | 	c = append(c, a...)
492 | 	c = append(c, b...)
493 | 	return c
494 | }
495 | 
496 | func remove(s []byte, start, end int) []byte {
497 | 	if len(s) == cap(s) {
498 | 		// "copy-on-write" for slices where len == cap.
499 | 		ns := make([]byte, len(s)-(end-start), cap(s))
500 | 		copy(ns, s[:start])
501 | 		copy(ns[start:], s[end:])
502 | 		return ns
503 | 	}
504 | 	return append(s[:start], s[end:]...)
505 | }
506 | 


--------------------------------------------------------------------------------
/input/linerope/rope_test.go:
--------------------------------------------------------------------------------
  1 | package linerope_test
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"math/rand"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/zyedidia/gpeg/input/linerope"
  9 | )
 10 | 
 11 | func check(r *linerope.Node, b *basicText, t *testing.T) {
 12 | 	if !bytes.Equal(r.Value(), b.value()) {
 13 | 		t.Errorf("incorrect bytes: %s %s", string(r.Value()), string(b.value()))
 14 | 	}
 15 | 	if r.Len() != b.length() {
 16 | 		t.Errorf("incorrect length: %d %d", r.Len(), b.length())
 17 | 	}
 18 | 	if r.NumLines() != b.NumLines() {
 19 | 		t.Errorf("incorrect line count: %d %d", r.NumLines(), b.NumLines())
 20 | 	}
 21 | 
 22 | 	const ncheck = 100
 23 | 	for i := 0; i < ncheck; i++ {
 24 | 		pos := rand.Intn(r.Len())
 25 | 		rline, rcol := r.LineColAt(pos)
 26 | 		bline, bcol := b.lineColAt(pos)
 27 | 		if rline != bline || rcol != bcol {
 28 | 			t.Errorf("incorrect offset conversion: %d, want (%d, %d), got (%d, %d)", pos, bline, bcol, rline, rcol)
 29 | 		}
 30 | 
 31 | 		off := r.OffsetAt(rline, rcol)
 32 | 		if off != pos {
 33 | 			t.Errorf("incorrect line/col conversion: (%d, %d), want %d, got %d", rline, rcol, pos, off)
 34 | 		}
 35 | 	}
 36 | }
 37 | 
 38 | const datasz = 5000
 39 | 
 40 | func data() (*linerope.Node, *basicText) {
 41 | 	data := randbytes(datasz)
 42 | 	r := linerope.New(data)
 43 | 	b := newBasicText(data)
 44 | 	return r, b
 45 | }
 46 | 
 47 | func randrange(high int) (int, int) {
 48 | 	i1 := rand.Intn(high)
 49 | 	i2 := rand.Intn(high)
 50 | 	return min(i1, i2), max(i1, i2)
 51 | }
 52 | 
 53 | var letters = []byte("\nabcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ")
 54 | 
 55 | func randbytes(n int) []byte {
 56 | 	b := make([]byte, n)
 57 | 	for i := range b {
 58 | 		b[i] = letters[rand.Intn(len(letters))]
 59 | 	}
 60 | 	return b
 61 | }
 62 | 
 63 | func TestConstruction(t *testing.T) {
 64 | 	r, b := data()
 65 | 	check(r, b, t)
 66 | }
 67 | 
 68 | func TestInsertRemove(t *testing.T) {
 69 | 	r, b := data()
 70 | 
 71 | 	const nedit = 100
 72 | 	const strlen = 20
 73 | 	for i := 0; i < nedit; i++ {
 74 | 		low, high := randrange(r.Len())
 75 | 		r.Remove(low, high)
 76 | 		b.remove(low, high)
 77 | 		check(r, b, t)
 78 | 		bstr := randbytes(strlen)
 79 | 		r.Insert(low, bstr)
 80 | 		b.insert(low, bstr)
 81 | 		check(r, b, t)
 82 | 	}
 83 | 	check(r, b, t)
 84 | }
 85 | 
 86 | func TestReadAt(t *testing.T) {
 87 | 	r, b := data()
 88 | 
 89 | 	const nslice = 100
 90 | 	length := r.Len()
 91 | 	for i := 0; i < nslice; i++ {
 92 | 		low, high := randrange(length)
 93 | 
 94 | 		rb := make([]byte, high-low)
 95 | 		r.ReadAt(rb, int64(low))
 96 | 		bb := b.slice(low, high)
 97 | 		if !bytes.Equal(rb, bb) {
 98 | 			t.Errorf("slice not equal: %s %s", string(rb), string(bb))
 99 | 		}
100 | 	}
101 | }
102 | 
103 | func TestSplit(t *testing.T) {
104 | 	r, b := data()
105 | 
106 | 	const nsplit = 10
107 | 	for i := 0; i < nsplit; i++ {
108 | 		splitidx := rand.Intn(r.Len())
109 | 		left, right := r.SplitAt(splitidx)
110 | 
111 | 		lb := b.slice(0, splitidx)
112 | 		rb := b.slice(splitidx, b.length())
113 | 		if !bytes.Equal(left.Value(), lb) {
114 | 			t.Errorf("%d: left slice not equal: %s %s", splitidx, string(left.Value()), string(lb))
115 | 		}
116 | 		if !bytes.Equal(right.Value(), rb) {
117 | 			t.Errorf("%d: right slice not equal: %s %s", splitidx, string(right.Value()), string(rb))
118 | 		}
119 | 		r = linerope.Join(left, right)
120 | 		check(r, b, t)
121 | 	}
122 | }
123 | 
124 | type basicText struct {
125 | 	data []byte
126 | }
127 | 
128 | func newBasicText(b []byte) *basicText {
129 | 	data := make([]byte, len(b))
130 | 	copy(data, b)
131 | 	return &basicText{
132 | 		data: data,
133 | 	}
134 | }
135 | 
136 | func (b *basicText) length() int {
137 | 	return len(b.data)
138 | }
139 | 
140 | func (b *basicText) value() []byte {
141 | 	return b.data
142 | }
143 | 
144 | func (b *basicText) remove(start, end int) {
145 | 	b.data = append(b.data[:start], b.data[end:]...)
146 | }
147 | 
148 | func (b *basicText) insert(pos int, val []byte) {
149 | 	b.data = insert(b.data, pos, val)
150 | }
151 | 
152 | func (b *basicText) slice(start, end int) []byte {
153 | 	return b.data[start:end]
154 | }
155 | 
156 | func (b *basicText) lineColAt(pos int) (line, col int) {
157 | 	var last int
158 | 	for i, c := range b.data {
159 | 		if c == '\n' {
160 | 			if i >= pos {
161 | 				return line, pos - last
162 | 			}
163 | 			last = i + 1
164 | 			line++
165 | 		}
166 | 	}
167 | 	return line, pos - last
168 | }
169 | 
170 | func (b *basicText) NumLines() int {
171 | 	return bytes.Count(b.data, []byte{'\n'})
172 | }
173 | 
174 | func min(a, b int) int {
175 | 	if a < b {
176 | 		return a
177 | 	}
178 | 	return b
179 | }
180 | 
181 | func max(a, b int) int {
182 | 	if a > b {
183 | 		return a
184 | 	}
185 | 	return b
186 | }
187 | 
188 | // from slice tricks
189 | func insert(s []byte, k int, vs []byte) []byte {
190 | 	if n := len(s) + len(vs); n <= cap(s) {
191 | 		s2 := s[:n]
192 | 		copy(s2[k+len(vs):], s[k:])
193 | 		copy(s2[k:], vs)
194 | 		return s2
195 | 	}
196 | 	s2 := make([]byte, len(s)+len(vs))
197 | 	copy(s2, s[:k])
198 | 	copy(s2[k:], vs)
199 | 	copy(s2[k+len(vs):], s[k:])
200 | 	return s2
201 | }
202 | 


--------------------------------------------------------------------------------
/input/linerope/util.go:
--------------------------------------------------------------------------------
 1 | package linerope
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | )
 6 | 
 7 | // indexN finds the index of n-th sep in b.
 8 | func indexN(b, sep []byte, n int) (index int) {
 9 | 	index, idx, sepLen := 0, -1, len(sep)
10 | 	for i := 0; i < n; i++ {
11 | 		if idx = bytes.Index(b, sep); idx == -1 {
12 | 			break
13 | 		}
14 | 		b = b[idx+sepLen:]
15 | 		index += idx
16 | 	}
17 | 
18 | 	if idx == -1 {
19 | 		index = -1
20 | 	} else {
21 | 		index += (n - 1) * sepLen
22 | 	}
23 | 
24 | 	return
25 | }
26 | 
27 | // lineCol converts an absolute position to a line/col pair by scanning b.
28 | func lineCol(b, sep []byte, pos int) loc {
29 | 	var line, last int
30 | 	for {
31 | 		idx := bytes.Index(b[last:], sep)
32 | 		if idx < 0 {
33 | 			break
34 | 		} else if last+idx >= pos {
35 | 			return loc{line, pos - last}
36 | 		}
37 | 		last += idx + len(sep)
38 | 		line++
39 | 	}
40 | 	return loc{line, pos - last}
41 | }
42 | 


--------------------------------------------------------------------------------
/input/reader.go:
--------------------------------------------------------------------------------
 1 | package input
 2 | 
 3 | import (
 4 | 	"io"
 5 | 	"sync"
 6 | )
 7 | 
 8 | // readerWrapper implements a io.ReaderAt from an io.Reader. The readerWrapper
 9 | // works by storing every byte read from the reader, and using that to read
10 | // data that has been read before.
11 | type readerWrapper struct {
12 | 	reader io.Reader
13 | 	buf    []byte
14 | 	lock   sync.Mutex
15 | }
16 | 
17 | // FromReader converts an io.Reader to an io.ReaderAt.
18 | func FromReader(r io.Reader) io.ReaderAt {
19 | 	return &readerWrapper{
20 | 		reader: r,
21 | 	}
22 | }
23 | 
24 | // ReadAt implements the io.ReaderAt interface to wrap an io.Reader. Note that
25 | // calls to ReadAt may change the offset within the wrapped io.Reader (since
26 | // Read is called on the wrapped io.Reader to fetch data).
27 | func (r *readerWrapper) ReadAt(b []byte, off int64) (n int, err error) {
28 | 	r.lock.Lock()
29 | 	defer r.lock.Unlock()
30 | 
31 | 	blen := int64(len(b))
32 | 	// if there is enough space to fill up b already in the buffer, just copy
33 | 	// the data and return it.
34 | 	if int64(len(r.buf))-off >= blen {
35 | 		return copy(b, r.buf[off:]), nil
36 | 	}
37 | 
38 | 	// otherwise read data until there is enough or there is an error.
39 | 	tmp := make([]byte, bufsz)
40 | 	for int64(len(r.buf))-off < blen {
41 | 		n, err = r.reader.Read(tmp)
42 | 		r.buf = append(r.buf, tmp[:n]...)
43 | 		if err != nil {
44 | 			break
45 | 		}
46 | 	}
47 | 	if off >= int64(len(r.buf)) {
48 | 		return 0, err
49 | 	}
50 | 
51 | 	return copy(b, r.buf[off:]), err
52 | }
53 | 
54 | // Slice returns the slice [low:high) in the given ReaderAt.
55 | func Slice(r io.ReaderAt, low, high int) []byte {
56 | 	buf := make([]byte, high-low)
57 | 	n, _ := r.ReadAt(buf, int64(low))
58 | 	return buf[:n]
59 | }
60 | 


--------------------------------------------------------------------------------
/input/reader_test.go:
--------------------------------------------------------------------------------
 1 | package input_test
 2 | 
 3 | import (
 4 | 	"bytes"
 5 | 	"io"
 6 | 	"testing"
 7 | 
 8 | 	"github.com/zyedidia/gpeg/input"
 9 | )
10 | 
11 | func TestReaderWrapper(t *testing.T) {
12 | 	r := bytes.NewBufferString("foo bar baz")
13 | 	rat := input.FromReader(r)
14 | 	b := make([]byte, 3)
15 | 
16 | 	rat.ReadAt(b, 4)
17 | 	if string(b) != "bar" {
18 | 		t.Errorf("want %s, got %s", "bar", string(b))
19 | 	}
20 | 
21 | 	rat.ReadAt(b, 0)
22 | 	if string(b) != "foo" {
23 | 		t.Errorf("want %s, got %s", "foo", string(b))
24 | 	}
25 | 
26 | 	n, err := rat.ReadAt(b, 9)
27 | 	if string(b[:n]) != "az" {
28 | 		t.Errorf("want %s, got %s", "az", string(b))
29 | 	}
30 | 	if n != 2 || err != io.EOF {
31 | 		t.Errorf("incorrect, n: %v, err: %v", n, err)
32 | 	}
33 | }
34 | 


--------------------------------------------------------------------------------
/isa/checker.go:
--------------------------------------------------------------------------------
 1 | package isa
 2 | 
 3 | import (
 4 | 	"github.com/zyedidia/gpeg/input"
 5 | )
 6 | 
 7 | // A Checker is used so the user can perform additional custom validation of
 8 | // parse results. For example, you might want to parse only 8-bit integers by
 9 | // matching [0-9]+ and then using a checker to ensure the matched integer is in
10 | // the range 0-256.
11 | type Checker interface {
12 | 	Check(b []byte, src *input.Input, id, flag int) int
13 | }
14 | 
15 | type MapChecker map[string]struct{}
16 | 
17 | func NewMapChecker(strs []string) MapChecker {
18 | 	m := make(map[string]struct{})
19 | 	for _, s := range strs {
20 | 		m[s] = struct{}{}
21 | 	}
22 | 	return m
23 | }
24 | 
25 | func (m MapChecker) Check(b []byte, src *input.Input, id, flag int) int {
26 | 	if _, ok := m[string(b)]; ok {
27 | 		return 0
28 | 	}
29 | 	return -1
30 | }
31 | 
32 | type RefKind uint8
33 | 
34 | const (
35 | 	RefDef RefKind = iota
36 | 	RefUse
37 | 	RefBlock
38 | )
39 | 
40 | type BackReference struct {
41 | 	Symbols map[int]string
42 | }
43 | 
44 | func NewBackRef() *BackReference {
45 | 	return &BackReference{
46 | 		Symbols: make(map[int]string),
47 | 	}
48 | }
49 | 
50 | func (r *BackReference) Check(b []byte, src *input.Input, id, flag int) int {
51 | 	switch RefKind(flag) {
52 | 	case RefDef:
53 | 		r.Symbols[id] = string(b)
54 | 		return 0
55 | 	case RefUse:
56 | 		back := r.Symbols[id]
57 | 		buf := make([]byte, len(back))
58 | 		n, _ := src.ReadAt(buf, int64(src.Pos()))
59 | 		if n == len(buf) && string(buf) == back {
60 | 			return n
61 | 		}
62 | 		return -1
63 | 	case RefBlock:
64 | 	}
65 | 	return 0
66 | }
67 | 


--------------------------------------------------------------------------------
/isa/isa.go:
--------------------------------------------------------------------------------
  1 | // Package isa provides types for all instructions in the GPeg virtual machine.
  2 | package isa
  3 | 
  4 | import (
  5 | 	"fmt"
  6 | 	"regexp/syntax"
  7 | 	"strconv"
  8 | 
  9 | 	"github.com/zyedidia/gpeg/charset"
 10 | )
 11 | 
 12 | // Insn represents the interface for an instruction in the ISA
 13 | type Insn interface {
 14 | 	insn()
 15 | }
 16 | 
 17 | // A Program is a sequence of instructions
 18 | type Program []Insn
 19 | 
 20 | // Size returns the number of instructions in a program ignoring labels and
 21 | // nops.
 22 | func (p Program) Size() int {
 23 | 	var sz int
 24 | 	for _, i := range p {
 25 | 		switch i.(type) {
 26 | 		case Label, Nop:
 27 | 			continue
 28 | 		default:
 29 | 			sz++
 30 | 		}
 31 | 	}
 32 | 	return sz
 33 | }
 34 | 
 35 | // A JumpType instruction is any instruction that refers to a Label.
 36 | type JumpType interface {
 37 | 	jumpt()
 38 | }
 39 | 
 40 | var uniqId int
 41 | 
 42 | // Label is used for marking a location in the instruction code with
 43 | // a unique ID
 44 | type Label struct {
 45 | 	Id int
 46 | 	basic
 47 | }
 48 | 
 49 | // NewLabel returns a new label with a unique ID
 50 | func NewLabel() Label {
 51 | 	uniqId++
 52 | 	return Label{
 53 | 		Id: uniqId,
 54 | 	}
 55 | }
 56 | 
 57 | // Char consumes the next byte of the subject if it matches Byte and
 58 | // fails otherwise.
 59 | type Char struct {
 60 | 	Byte byte
 61 | 	basic
 62 | }
 63 | 
 64 | // Jump jumps to Lbl.
 65 | type Jump struct {
 66 | 	Lbl Label
 67 | 	jump
 68 | }
 69 | 
 70 | // Choice pushes Lbl to the stack and if there is a failure the label will
 71 | // be popped from the stack and jumped to.
 72 | type Choice struct {
 73 | 	Lbl Label
 74 | 	jump
 75 | }
 76 | 
 77 | // Call pushes the next instruction to the stack as a return address and jumps
 78 | // to Lbl.
 79 | type Call struct {
 80 | 	Lbl Label
 81 | 	jump
 82 | }
 83 | 
 84 | // Commit jumps to Lbl and removes the top entry from the stack
 85 | type Commit struct {
 86 | 	Lbl Label
 87 | 	jump
 88 | }
 89 | 
 90 | // Return pops a return address off the stack and jumps to it.
 91 | type Return struct {
 92 | 	basic
 93 | }
 94 | 
 95 | // Fail causes the instruction pointer to go to the fail state.
 96 | type Fail struct {
 97 | 	basic
 98 | }
 99 | 
100 | // Set consumes the next byte of input if it is in the set of chars defined
101 | // by Chars.
102 | type Set struct {
103 | 	Chars charset.Set
104 | 	basic
105 | }
106 | 
107 | // Any consumes the next N bytes and fails if that is not possible.
108 | type Any struct {
109 | 	N byte
110 | 	basic
111 | }
112 | 
113 | // PartialCommit modifies the backtrack entry on the top of the stack to
114 | // point to the current subject offset, and jumps to Lbl.
115 | type PartialCommit struct {
116 | 	Lbl Label
117 | 	jump
118 | }
119 | 
120 | // Span consumes zero or more bytes in the set Chars. This instruction
121 | // never fails.
122 | type Span struct {
123 | 	Chars charset.Set
124 | 	basic
125 | }
126 | 
127 | // BackCommit pops a backtrack entry off the stack, goes to the subject
128 | // position in the entry, and jumps to Lbl.
129 | type BackCommit struct {
130 | 	Lbl Label
131 | 	jump
132 | }
133 | 
134 | // FailTwice pops an entry off the stack and sets the instruction pointer to
135 | // the fail state.
136 | type FailTwice struct {
137 | 	basic
138 | }
139 | 
140 | // Empty makes a zero-width assertion according to the Op option. We use the
141 | // same zero-width assertions that are supported by Go's regexp package.
142 | type Empty struct {
143 | 	Op syntax.EmptyOp
144 | 	basic
145 | }
146 | 
147 | // TestChar consumes the next byte if it matches Byte and jumps to Lbl
148 | // otherwise. If the consumption is possible, a backtrack entry referring
149 | // to Lbl and the subject position from before consumption is pushed to the
150 | // stack.
151 | type TestChar struct {
152 | 	Byte byte
153 | 	Lbl  Label
154 | 	jump
155 | }
156 | 
157 | // TestCharNoChoice consumes the next byte if it matches Byte and jumps to Lbl
158 | // otherwise. No backtrack entry is pushed to the stack.
159 | type TestCharNoChoice struct {
160 | 	Byte byte
161 | 	Lbl  Label
162 | 	jump
163 | }
164 | 
165 | // TestSet consumes the next byte if it is in the set Chars and jumps to
166 | // Lbl otherwise. If the consumption is possible, a backtrack entry referring
167 | // to Lbl and the subject position from before consumption is pushed to the
168 | // stack.
169 | type TestSet struct {
170 | 	Chars charset.Set
171 | 	Lbl   Label
172 | 	jump
173 | }
174 | 
175 | // TestSetNoChoice is the same as TestSet but no backtrack entry is pushed to
176 | // the stack.
177 | type TestSetNoChoice struct {
178 | 	Chars charset.Set
179 | 	Lbl   Label
180 | 	jump
181 | }
182 | 
183 | // TestAny consumes the next N bytes and jumps to Lbl if that is not possible.
184 | // If the consumption is possible, a backtrack entry referring to Lbl and
185 | // the subject position from before consumption is pushed to the stack.
186 | type TestAny struct {
187 | 	N   byte
188 | 	Lbl Label
189 | 	jump
190 | }
191 | 
192 | // End immediately completes the pattern as a match.
193 | type End struct {
194 | 	basic
195 | 	Fail bool
196 | }
197 | 
198 | // Nop does nothing.
199 | type Nop struct {
200 | 	basic
201 | }
202 | 
203 | // MemoOpen begins a memo entry at this position. It marks the pattern that is
204 | // being memoized with a unique ID for that pattern, and stores a label to
205 | // jump to if the pattern is found in the memoization table.
206 | type MemoOpen struct {
207 | 	Lbl Label
208 | 	Id  int
209 | 	jump
210 | }
211 | 
212 | // MemoClose completes a memoization entry and adds the entry into the memo
213 | // table if it meets certain conditions (size, or other heuristics).
214 | type MemoClose struct {
215 | 	basic
216 | }
217 | 
218 | // MemoTreeOpen starts a memoization tree repetition routine.
219 | type MemoTreeOpen struct {
220 | 	Lbl Label
221 | 	Id  int
222 | 	jump
223 | }
224 | 
225 | // MemoTreeInsert performs insertion into the memoization table for the tree
226 | // memoization strategy.
227 | type MemoTreeInsert struct {
228 | 	basic
229 | }
230 | 
231 | // MemoTree "tree-ifies" the current memoization entries on the stack.
232 | type MemoTree struct {
233 | 	basic
234 | }
235 | 
236 | // MemoTreeClose completes the tree memoization routine.
237 | type MemoTreeClose struct {
238 | 	Id int
239 | 	basic
240 | }
241 | 
242 | // CaptureBegin begins capturing the given ID.
243 | type CaptureBegin struct {
244 | 	Id int
245 | 	basic
246 | }
247 | 
248 | // CaptureLate begins capturing the given ID at the current subject position
249 | // minus Back.
250 | type CaptureLate struct {
251 | 	Back byte
252 | 	Id   int
253 | 	basic
254 | }
255 | 
256 | // CaptureEnd completes an active capture.
257 | type CaptureEnd struct {
258 | 	Id int
259 | 	basic
260 | }
261 | 
262 | // CaptureFull begins a capture for the given ID at the current subject
263 | // position minus Back, and immediately completes the capture. This is
264 | // equivalent to CaptureLate Back ID; CaptureEnd.
265 | type CaptureFull struct {
266 | 	Back byte
267 | 	Id   int
268 | 	basic
269 | }
270 | 
271 | // CheckBegin marks the beginning position for a checker.
272 | type CheckBegin struct {
273 | 	Id   int
274 | 	Flag int
275 | 	basic
276 | }
277 | 
278 | // CheckEnd records the end position of a checker and applies the checker to
279 | // determine if the match should fail.
280 | type CheckEnd struct {
281 | 	Checker Checker
282 | 	basic
283 | }
284 | 
285 | // Error logs an error message at the current position.
286 | type Error struct {
287 | 	basic
288 | 	Message string
289 | }
290 | 
291 | type basic struct{}
292 | 
293 | func (b basic) insn() {}
294 | 
295 | type jump struct {
296 | 	basic
297 | }
298 | 
299 | func (j jump) jumpt() {}
300 | 
301 | // String returns the string representation of this instruction.
302 | func (i Label) String() string {
303 | 	return fmt.Sprintf("L%v", i.Id)
304 | }
305 | 
306 | // String returns the string representation of this instruction.
307 | func (i Char) String() string {
308 | 	return fmt.Sprintf("Char %v", strconv.QuoteRune(rune(i.Byte)))
309 | }
310 | 
311 | // String returns the string representation of this instruction.
312 | func (i Jump) String() string {
313 | 	return fmt.Sprintf("Jump %v", i.Lbl)
314 | }
315 | 
316 | // String returns the string representation of this instruction.
317 | func (i Choice) String() string {
318 | 	return fmt.Sprintf("Choice %v", i.Lbl)
319 | }
320 | 
321 | // String returns the string representation of this instruction.
322 | func (i Call) String() string {
323 | 	return fmt.Sprintf("Call %v", i.Lbl)
324 | }
325 | 
326 | // String returns the string representation of this instruction.
327 | func (i Commit) String() string {
328 | 	return fmt.Sprintf("Commit %v", i.Lbl)
329 | }
330 | 
331 | // String returns the string representation of this instruction.
332 | func (i Return) String() string {
333 | 	return "Return"
334 | }
335 | 
336 | // String returns the string representation of this instruction.
337 | func (i Fail) String() string {
338 | 	return "Fail"
339 | }
340 | 
341 | // String returns the string representation of this instruction.
342 | func (i Set) String() string {
343 | 	return fmt.Sprintf("Set %v", i.Chars)
344 | }
345 | 
346 | // String returns the string representation of this instruction.
347 | func (i Any) String() string {
348 | 	return fmt.Sprintf("Any %v", i.N)
349 | }
350 | 
351 | // String returns the string representation of this instruction.
352 | func (i PartialCommit) String() string {
353 | 	return fmt.Sprintf("PartialCommit %v", i.Lbl)
354 | }
355 | 
356 | // String returns the string representation of this instruction.
357 | func (i Span) String() string {
358 | 	return fmt.Sprintf("Span %v", i.Chars)
359 | }
360 | 
361 | // String returns the string representation of this instruction.
362 | func (i BackCommit) String() string {
363 | 	return fmt.Sprintf("BackCommit %v", i.Lbl)
364 | }
365 | 
366 | // String returns the string representation of this instruction.
367 | func (i FailTwice) String() string {
368 | 	return "FailTwice"
369 | }
370 | 
371 | // String returns the string representation of this instruction.
372 | func (i TestChar) String() string {
373 | 	return fmt.Sprintf("TestChar %v %v", strconv.QuoteRune(rune(i.Byte)), i.Lbl)
374 | }
375 | 
376 | // String returns the string representation of this instruction.
377 | func (i TestCharNoChoice) String() string {
378 | 	return fmt.Sprintf("TestCharNoChoice %v %v", strconv.QuoteRune(rune(i.Byte)), i.Lbl)
379 | }
380 | 
381 | // String returns the string representation of this instruction.
382 | func (i TestSet) String() string {
383 | 	return fmt.Sprintf("TestSet %v %v", i.Chars, i.Lbl)
384 | }
385 | 
386 | // String returns the string representation of this instruction.
387 | func (i TestSetNoChoice) String() string {
388 | 	return fmt.Sprintf("TestSetNoChoice %v %v", i.Chars, i.Lbl)
389 | }
390 | 
391 | // String returns the string representation of this instruction.
392 | func (i TestAny) String() string {
393 | 	return fmt.Sprintf("TestAny %v %v", i.N, i.Lbl)
394 | }
395 | 
396 | // String returns the string representation of this instruction.
397 | func (i End) String() string {
398 | 	var result string
399 | 	if i.Fail {
400 | 		result = "Fail"
401 | 	} else {
402 | 		result = "Success"
403 | 	}
404 | 	return fmt.Sprintf("End %s", result)
405 | }
406 | 
407 | // String returns the string representation of this instruction.
408 | func (i Nop) String() string {
409 | 	return "Nop"
410 | }
411 | 
412 | // String returns the string representation of this instruction.
413 | func (i CheckBegin) String() string {
414 | 	return "CheckBegin"
415 | }
416 | 
417 | // String returns the string representation of this instruction.
418 | func (i CheckEnd) String() string {
419 | 	return fmt.Sprintf("CheckEnd %v", i.Checker)
420 | }
421 | 
422 | // String returns the string representation of this instruction.
423 | func (i MemoOpen) String() string {
424 | 	return fmt.Sprintf("MemoOpen %v %v", i.Lbl, i.Id)
425 | }
426 | 
427 | // String returns the string representation of this instruction.
428 | func (i MemoClose) String() string {
429 | 	return "MemoClose"
430 | }
431 | 
432 | // String returns the string representation of this instruction.
433 | func (i MemoTreeOpen) String() string {
434 | 	return fmt.Sprintf("MemoTreeOpen %v %v", i.Lbl, i.Id)
435 | }
436 | 
437 | // String returns the string representation of this instruction.
438 | func (i MemoTreeInsert) String() string {
439 | 	return "MemoTreeInsert"
440 | }
441 | 
442 | // String returns the string representation of this instruction.
443 | func (i MemoTree) String() string {
444 | 	return "MemoTree"
445 | }
446 | 
447 | // String returns the string representation of this instruction.
448 | func (i MemoTreeClose) String() string {
449 | 	return fmt.Sprintf("MemoTreeClose %v", i.Id)
450 | }
451 | 
452 | // String returns the string representation of this instruction.
453 | func (i CaptureBegin) String() string {
454 | 	return fmt.Sprintf("Capture begin %v", i.Id)
455 | }
456 | 
457 | // String returns the string representation of this instruction.
458 | func (i CaptureLate) String() string {
459 | 	return fmt.Sprintf("Capture late %v %v", i.Back, i.Id)
460 | }
461 | 
462 | // String returns the string representation of this instruction.
463 | func (i CaptureEnd) String() string {
464 | 	return "Capture end"
465 | }
466 | 
467 | // String returns the string representation of this instruction.
468 | func (i CaptureFull) String() string {
469 | 	return fmt.Sprintf("Capture full %v %v", i.Back, i.Id)
470 | }
471 | 
472 | // String returns the string representation of this instruction.
473 | func (i Error) String() string {
474 | 	return fmt.Sprintf("Error %s", strconv.QuoteToASCII(i.Message))
475 | }
476 | 
477 | // String returns the string representation of this instruction.
478 | func (i Empty) String() string {
479 | 	return fmt.Sprintf("Empty %s", emptyToString(i.Op))
480 | }
481 | 
482 | // String returns the string representation of the program.
483 | func (p Program) String() string {
484 | 	s := ""
485 | 	var last Insn
486 | 	for _, insn := range p {
487 | 		switch insn.(type) {
488 | 		case Nop:
489 | 			continue
490 | 		case Label:
491 | 			if _, ok := last.(Label); ok {
492 | 				s += fmt.Sprintf("\n%v:", insn)
493 | 			} else {
494 | 				s += fmt.Sprintf("%v:", insn)
495 | 			}
496 | 		default:
497 | 			s += fmt.Sprintf("\t%v\n", insn)
498 | 		}
499 | 		last = insn
500 | 	}
501 | 	s += "\n"
502 | 	return s
503 | }
504 | 
505 | func emptyToString(op syntax.EmptyOp) string {
506 | 	switch op {
507 | 	case syntax.EmptyBeginLine:
508 | 		return "BeginLine"
509 | 	case syntax.EmptyEndLine:
510 | 		return "EndLine"
511 | 	case syntax.EmptyBeginText:
512 | 		return "BeginText"
513 | 	case syntax.EmptyEndText:
514 | 		return "EndText"
515 | 	case syntax.EmptyWordBoundary:
516 | 		return "WordBoundary"
517 | 	case syntax.EmptyNoWordBoundary:
518 | 		return "NoWordBoundary"
519 | 	}
520 | 	return "Unknown"
521 | }
522 | 


--------------------------------------------------------------------------------
/memo/capture.go:
--------------------------------------------------------------------------------
  1 | package memo
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"fmt"
  6 | )
  7 | 
  8 | const (
  9 | 	tNode = iota
 10 | 	tDummy
 11 | )
 12 | 
 13 | type Capture struct {
 14 | 	id  int32
 15 | 	typ int32
 16 | 
 17 | 	off      int
 18 | 	length   int
 19 | 	ment     *Entry
 20 | 	children []*Capture
 21 | }
 22 | 
 23 | func NewCaptureNode(id int, start, length int, children []*Capture) *Capture {
 24 | 	c := &Capture{
 25 | 		id:       int32(id),
 26 | 		typ:      tNode,
 27 | 		off:      start,
 28 | 		length:   length,
 29 | 		children: children,
 30 | 	}
 31 | 	return c
 32 | }
 33 | 
 34 | func NewCaptureDummy(start, length int, children []*Capture) *Capture {
 35 | 	c := &Capture{
 36 | 		id:       0,
 37 | 		typ:      tDummy,
 38 | 		off:      start,
 39 | 		length:   length,
 40 | 		children: children,
 41 | 	}
 42 | 	return c
 43 | }
 44 | 
 45 | func (c *Capture) ChildIterator(start int) func() *Capture {
 46 | 	i := 0
 47 | 	var subit, ret func() *Capture
 48 | 	ret = func() *Capture {
 49 | 		if i >= len(c.children) {
 50 | 			return nil
 51 | 		}
 52 | 		ch := c.children[i]
 53 | 		if ch.Dummy() && subit == nil {
 54 | 			subit = ch.ChildIterator(ch.off)
 55 | 		}
 56 | 		if subit != nil {
 57 | 			ch = subit()
 58 | 		} else {
 59 | 			i++
 60 | 		}
 61 | 		if ch == nil {
 62 | 			subit = nil
 63 | 			i++
 64 | 			return ret()
 65 | 		}
 66 | 		return ch
 67 | 	}
 68 | 	return ret
 69 | }
 70 | 
 71 | func (c *Capture) Child(n int) *Capture {
 72 | 	it := c.ChildIterator(0)
 73 | 	i := 0
 74 | 	for ch := it(); ch != nil; ch = it() {
 75 | 		if i == n {
 76 | 			return ch
 77 | 		}
 78 | 		i++
 79 | 	}
 80 | 	return nil
 81 | }
 82 | 
 83 | func (c *Capture) NumChildren() int {
 84 | 	nchild := 0
 85 | 	for _, ch := range c.children {
 86 | 		if ch.Dummy() {
 87 | 			nchild += ch.NumChildren()
 88 | 		} else {
 89 | 			nchild++
 90 | 		}
 91 | 	}
 92 | 	return nchild
 93 | }
 94 | 
 95 | func (c *Capture) Start() int {
 96 | 	if c.ment != nil {
 97 | 		return c.ment.pos.Pos() + c.off
 98 | 	}
 99 | 	return c.off
100 | }
101 | 
102 | func (c *Capture) Len() int {
103 | 	return c.length
104 | }
105 | 
106 | func (c *Capture) End() int {
107 | 	return c.Start() + c.length
108 | }
109 | 
110 | func (c *Capture) Dummy() bool {
111 | 	return c.typ == tDummy
112 | }
113 | 
114 | func (c *Capture) Id() int {
115 | 	return int(c.id)
116 | }
117 | 
118 | func (c *Capture) setMEnt(e *Entry) {
119 | 	if c.ment != nil {
120 | 		return
121 | 	}
122 | 
123 | 	c.ment = e
124 | 	c.off = c.off - e.pos.Pos()
125 | 
126 | 	for _, c := range c.children {
127 | 		c.setMEnt(e)
128 | 	}
129 | }
130 | 
131 | // String returns a readable string representation of this node, showing the ID
132 | // of this node and its children.
133 | func (c *Capture) String() string {
134 | 	buf := &bytes.Buffer{}
135 | 	for i, c := range c.children {
136 | 		buf.WriteString(c.String())
137 | 		if i != len(c.children)-1 {
138 | 			buf.WriteString(", ")
139 | 		}
140 | 	}
141 | 	return fmt.Sprintf("{%d, [%s]}", c.id, buf.String())
142 | }
143 | 


--------------------------------------------------------------------------------
/memo/edit.go:
--------------------------------------------------------------------------------
 1 | package memo
 2 | 
 3 | // An Edit represents a modification to the subject string where the interval
 4 | // [Start, End) is modified to be Len bytes. If Len = 0, this is equivalent
 5 | // to deleting the interval, and if Start = End this is an insertion.
 6 | type Edit struct {
 7 | 	Start, End int
 8 | 	Len        int
 9 | }
10 | 


--------------------------------------------------------------------------------
/memo/entry.go:
--------------------------------------------------------------------------------
 1 | package memo
 2 | 
 3 | import (
 4 | 	"github.com/zyedidia/gpeg/memo/interval"
 5 | )
 6 | 
 7 | // An Entry represents a memoized parse result. It stores the non-terminal
 8 | // memoized, the start position of the parse result, the length, and the number
 9 | // of characters examined to make the parse determination. If the length is -1,
10 | // the non-terminal failed to match at this location (but still may have
11 | // examined a non-zero number of characters).
12 | type Entry struct {
13 | 	length   int
14 | 	examined int
15 | 	count    int
16 | 	captures []*Capture
17 | 	pos      interval.Pos
18 | }
19 | 
20 | func (e *Entry) setPos(pos interval.Pos) {
21 | 	e.pos = pos
22 | 	for i := range e.captures {
23 | 		e.captures[i].setMEnt(e)
24 | 	}
25 | }
26 | 
27 | // Pos returns this entry's starting position.
28 | func (e *Entry) Pos() int {
29 | 	return e.pos.Pos()
30 | }
31 | 
32 | // Length returns the number of characters memoized by this entry.
33 | func (e *Entry) Length() int {
34 | 	return e.length
35 | }
36 | 
37 | // Captures returns the captures that occurred within this memoized parse
38 | // result.
39 | func (e *Entry) Captures() []*Capture {
40 | 	return e.captures
41 | }
42 | 
43 | func (e *Entry) Count() int {
44 | 	return e.count
45 | }
46 | 
47 | func (e *Entry) Examined() int {
48 | 	return e.examined
49 | }
50 | 


--------------------------------------------------------------------------------
/memo/interval/interval_test.go:
--------------------------------------------------------------------------------
 1 | package interval_test
 2 | 
 3 | import (
 4 | 	"math/rand"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/zyedidia/gpeg/memo/interval"
 8 | 	"github.com/zyedidia/gpeg/memo/interval/lazy"
 9 | 	"github.com/zyedidia/gpeg/memo/interval/lazylog"
10 | )
11 | 
12 | func randrange(max int) (int, int) {
13 | 	low := rand.Intn(max)
14 | 	high := low + rand.Intn(1000)
15 | 	if low == high {
16 | 		high = low + 1
17 | 	}
18 | 	return low, high
19 | }
20 | 
21 | func randint(min, max int) int {
22 | 	return rand.Intn(max-min) + min
23 | }
24 | 
25 | func TestTree(t *testing.T) {
26 | 	it := &lazy.Array{}
27 | 	ia := &lazylog.Tree{}
28 | 
29 | 	const (
30 | 		opAdd = iota
31 | 		opFind
32 | 		opRemoveAndShift
33 | 		opPos
34 | 
35 | 		nops     = 300000
36 | 		maxidx   = 10
37 | 		maxid    = 10
38 | 		maxshamt = 50
39 | 	)
40 | 
41 | 	var pt, pa interval.Pos
42 | 	var length int
43 | 	var haspt bool
44 | 
45 | 	for i := 0; i < nops; i++ {
46 | 		op := rand.Intn(4)
47 | 		switch op {
48 | 		case opAdd:
49 | 			id := rand.Intn(maxid)
50 | 			low, high := randrange(maxidx)
51 | 			pt = it.Add(id, low, high, i)
52 | 			pa = ia.Add(id, low, high, i)
53 | 			length = high - low
54 | 			haspt = true
55 | 		case opFind:
56 | 			id := rand.Intn(maxid)
57 | 			pos := rand.Intn(maxidx)
58 | 
59 | 			vt := it.FindLargest(id, pos)
60 | 			va := ia.FindLargest(id, pos)
61 | 
62 | 			if vt == nil && va == nil {
63 | 				continue
64 | 			}
65 | 
66 | 			if vt == nil && va != nil || va == nil && vt != nil {
67 | 				t.Fatalf("Find (%d, %d): %v != %v", id, pos, vt, va)
68 | 			}
69 | 
70 | 			if vt.(int) != va.(int) {
71 | 				t.Fatalf("Find (%d, %d): %d != %d", id, pos, vt.(int), va.(int))
72 | 			}
73 | 		case opRemoveAndShift:
74 | 			low, high := randrange(maxidx)
75 | 			amt := randint(-maxshamt, maxshamt)
76 | 
77 | 			if haspt {
78 | 				ptpos := pt.Pos()
79 | 				if lazy.Overlaps(lazy.Interval{
80 | 					Low:  low,
81 | 					High: high,
82 | 				}, lazy.Interval{
83 | 					Low:  ptpos,
84 | 					High: ptpos + length,
85 | 				}) {
86 | 					haspt = false
87 | 				}
88 | 			}
89 | 
90 | 			it.RemoveAndShift(low, high, amt)
91 | 			ia.RemoveAndShift(low, high, amt)
92 | 		case opPos:
93 | 			if haspt && pt.Pos() != pa.Pos() {
94 | 				t.Fatalf("%d != %d", pt.Pos(), pa.Pos())
95 | 			}
96 | 		}
97 | 	}
98 | }
99 | 


--------------------------------------------------------------------------------
/memo/interval/lazy/LICENSE-AVL:
--------------------------------------------------------------------------------
 1 | This license applies to the file: tree.go
 2 | 
 3 | MIT License
 4 | 
 5 | Copyright (c) 2017 Kostas Karasavvas, 2021 Zachary Yedidia
 6 | 
 7 | Permission is hereby granted, free of charge, to any person obtaining a copy
 8 | of this software and associated documentation files (the "Software"), to deal
 9 | in the Software without restriction, including without limitation the rights
10 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
11 | copies of the Software, and to permit persons to whom the Software is
12 | furnished to do so, subject to the following conditions:
13 | 
14 | The above copyright notice and this permission notice shall be included in all
15 | copies or substantial portions of the Software.
16 | 
17 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
20 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 | SOFTWARE.
24 | 


--------------------------------------------------------------------------------
/memo/interval/lazy/array.go:
--------------------------------------------------------------------------------
 1 | package lazy
 2 | 
 3 | import "github.com/zyedidia/gpeg/memo/interval"
 4 | 
 5 | // An Array is another implementation of the interval.Set backed by an array
 6 | // rather than an AVL tree. This implementation is naive and ineffecient, but
 7 | // provides a good point of comparison for benchmarking and testing.
 8 | type Array struct {
 9 | 	slots []slot
10 | }
11 | 
12 | type slot struct {
13 | 	*ivalue
14 | 	id int
15 | }
16 | 
17 | func (iv *ivalue) Pos() int {
18 | 	return iv.interval.Low
19 | }
20 | 
21 | func (a *Array) FindLargest(id, pos int) interval.Value {
22 | 	var max int
23 | 	maxi := -1
24 | 	for i, in := range a.slots {
25 | 		if in.interval.Low == pos && in.id == id && in.interval.High > max {
26 | 			maxi = i
27 | 			max = in.interval.High
28 | 		}
29 | 	}
30 | 	if maxi == -1 || maxi >= len(a.slots) {
31 | 		return nil
32 | 	}
33 | 
34 | 	return a.slots[maxi].value
35 | }
36 | 
37 | func (a *Array) Add(id, low, high int, val interval.Value) interval.Pos {
38 | 	iv := &ivalue{
39 | 		interval: Interval{low, high},
40 | 		value:    val,
41 | 	}
42 | 	a.slots = append(a.slots, slot{
43 | 		id:     id,
44 | 		ivalue: iv,
45 | 	})
46 | 	return iv
47 | }
48 | 
49 | func (a *Array) RemoveAndShift(low, high, amt int) {
50 | 	for i := 0; i < len(a.slots); {
51 | 		if Overlaps(a.slots[i].interval, Interval{low, high}) {
52 | 			a.slots[i] = a.slots[len(a.slots)-1]
53 | 			a.slots = a.slots[:len(a.slots)-1]
54 | 		} else {
55 | 			i++
56 | 		}
57 | 	}
58 | 
59 | 	if amt == 0 {
60 | 		return
61 | 	}
62 | 
63 | 	for i := range a.slots {
64 | 		if a.slots[i].interval.Low >= low {
65 | 			a.slots[i].interval = a.slots[i].interval.Shift(amt)
66 | 		}
67 | 	}
68 | }
69 | 
70 | func (a *Array) Size() int {
71 | 	return len(a.slots)
72 | }
73 | 


--------------------------------------------------------------------------------
/memo/interval/lazy/interval.go:
--------------------------------------------------------------------------------
 1 | package lazy
 2 | 
 3 | import "fmt"
 4 | 
 5 | type Interval struct {
 6 | 	Low, High int
 7 | }
 8 | 
 9 | func (i Interval) String() string {
10 | 	return fmt.Sprintf("[%d, %d)", i.Low, i.High)
11 | }
12 | 
13 | func (i Interval) Shift(amt int) Interval {
14 | 	return Interval{
15 | 		Low:  i.Low + amt,
16 | 		High: i.High + amt,
17 | 	}
18 | }
19 | 
20 | func (i Interval) Len() int {
21 | 	return i.High - i.Low
22 | }
23 | 
24 | func Overlaps(i1, i2 Interval) bool {
25 | 	return i1.Low < i2.High && i1.High > i2.Low
26 | }
27 | 


--------------------------------------------------------------------------------
/memo/interval/lazy/interval_test.go:
--------------------------------------------------------------------------------
  1 | package lazy
  2 | 
  3 | import (
  4 | 	"math/rand"
  5 | 	"testing"
  6 | 
  7 | 	"github.com/zyedidia/gpeg/memo/interval"
  8 | )
  9 | 
 10 | func randrange(max int) (int, int) {
 11 | 	low := rand.Intn(max)
 12 | 	high := low + rand.Intn(1000)
 13 | 	if low == high {
 14 | 		high = low + 1
 15 | 	}
 16 | 	return low, high
 17 | }
 18 | 
 19 | func randint(min, max int) int {
 20 | 	return rand.Intn(max-min) + min
 21 | }
 22 | 
 23 | func checkParents(n *node, t *testing.T) {
 24 | 	if n == nil {
 25 | 		return
 26 | 	}
 27 | 	if n.left != nil && n.left.parent != n {
 28 | 		t.Fatalf("Incorrect parent n: %p, n.left.parent: %p", n, n.left.parent)
 29 | 	}
 30 | 	if n.right != nil && n.right.parent != n {
 31 | 		t.Fatalf("Incorrect parent n: %p, n.right.parent: %p", n, n.right.parent)
 32 | 	}
 33 | 	checkParents(n.left, t)
 34 | 	checkParents(n.right, t)
 35 | }
 36 | 
 37 | func TestTree(t *testing.T) {
 38 | 	it := &Tree{}
 39 | 	ia := &Array{}
 40 | 
 41 | 	const (
 42 | 		opAdd = iota
 43 | 		opFind
 44 | 		opRemoveAndShift
 45 | 		opPos
 46 | 
 47 | 		nops     = 300000
 48 | 		maxidx   = 10
 49 | 		maxid    = 10
 50 | 		maxshamt = 50
 51 | 	)
 52 | 
 53 | 	var pt, pa interval.Pos
 54 | 	var length int
 55 | 	var haspt bool
 56 | 
 57 | 	for i := 0; i < nops; i++ {
 58 | 		op := rand.Intn(4)
 59 | 		switch op {
 60 | 		case opAdd:
 61 | 			id := rand.Intn(maxid)
 62 | 			low, high := randrange(maxidx)
 63 | 			pt = it.Add(id, low, high, i)
 64 | 			pa = ia.Add(id, low, high, i)
 65 | 			length = high - low
 66 | 			haspt = true
 67 | 		case opFind:
 68 | 			id := rand.Intn(maxid)
 69 | 			pos := rand.Intn(maxidx)
 70 | 
 71 | 			vt := it.FindLargest(id, pos)
 72 | 			va := ia.FindLargest(id, pos)
 73 | 
 74 | 			if vt == nil && va == nil {
 75 | 				continue
 76 | 			}
 77 | 
 78 | 			if vt == nil && va != nil || va == nil && vt != nil {
 79 | 				t.Fatalf("Find (%d, %d): %v != %v", id, pos, vt, va)
 80 | 			}
 81 | 
 82 | 			if vt.(int) != va.(int) {
 83 | 				t.Fatalf("Find (%d, %d): %d != %d", id, pos, vt.(int), va.(int))
 84 | 			}
 85 | 		case opRemoveAndShift:
 86 | 			low, high := randrange(maxidx)
 87 | 			amt := randint(-maxshamt, maxshamt)
 88 | 
 89 | 			if haspt {
 90 | 				ptpos := pt.Pos()
 91 | 				if Overlaps(Interval{
 92 | 					Low:  low,
 93 | 					High: high,
 94 | 				}, Interval{
 95 | 					Low:  ptpos,
 96 | 					High: ptpos + length,
 97 | 				}) {
 98 | 					haspt = false
 99 | 				}
100 | 			}
101 | 
102 | 			it.RemoveAndShift(low, high, amt)
103 | 			ia.RemoveAndShift(low, high, amt)
104 | 		case opPos:
105 | 			if haspt && pt.Pos() != pa.Pos() {
106 | 				t.Fatalf("%d != %d", pt.Pos(), pa.Pos())
107 | 			}
108 | 		}
109 | 		checkParents(it.root, t)
110 | 	}
111 | }
112 | 


--------------------------------------------------------------------------------
/memo/interval/lazy/tree.go:
--------------------------------------------------------------------------------
  1 | // Package lazy provides an interval tree backed by an AVL tree. In addition,
  2 | // the interval tree supports a lazy shifting algorithm.
  3 | package lazy
  4 | 
  5 | import "github.com/zyedidia/gpeg/memo/interval"
  6 | 
  7 | type key struct {
  8 | 	id  int
  9 | 	pos int
 10 | }
 11 | 
 12 | // compare orders keys by pos and then id.
 13 | func (k key) compare(other key) int {
 14 | 	if k.pos < other.pos {
 15 | 		return -1
 16 | 	} else if k.pos > other.pos {
 17 | 		return 1
 18 | 	} else if k.id < other.id {
 19 | 		return -1
 20 | 	} else if k.id > other.id {
 21 | 		return 1
 22 | 	}
 23 | 	return 0
 24 | }
 25 | 
 26 | type Tree struct {
 27 | 	root *node
 28 | }
 29 | 
 30 | // Adds the given interval to the tree. An id can also be given to the interval
 31 | // to separate different types of intervals.
 32 | func (t *Tree) Add(id, low, high int, val interval.Value) (pos interval.Pos) {
 33 | 	t.root, pos = t.root.add(key{id, low}, high, val, nil)
 34 | 	return pos
 35 | }
 36 | 
 37 | // FindLargest returns the largest interval associated with (id, pos).
 38 | func (t *Tree) FindLargest(id, pos int) interval.Value {
 39 | 	n := t.root.search(key{id, pos})
 40 | 	if n == nil || len(n.iv.ivs) == 0 {
 41 | 		return nil
 42 | 	}
 43 | 
 44 | 	var max, maxi int
 45 | 	for i := range n.iv.ivs {
 46 | 		if n.iv.ivs[i].interval.High > max {
 47 | 			max = n.iv.ivs[i].interval.High
 48 | 			maxi = i
 49 | 		}
 50 | 	}
 51 | 	return n.iv.ivs[maxi].value
 52 | }
 53 | 
 54 | // RemoveAndShift removes all entries that overlap with [low, high) and then shifts
 55 | // all entries greater than low by amt.
 56 | func (t *Tree) RemoveAndShift(low, high, amt int) {
 57 | 	t.root = t.root.removeOverlaps(low, high, nil)
 58 | 	if amt != 0 {
 59 | 		t.root.addShift(shift{low, amt})
 60 | 	}
 61 | }
 62 | 
 63 | // Size returns the number of intervals in the tree.
 64 | func (t *Tree) Size() int {
 65 | 	return t.root.getSize()
 66 | }
 67 | 
 68 | type ivalues struct {
 69 | 	ivs  []ivalue
 70 | 	node *node
 71 | }
 72 | 
 73 | func (iv *ivalues) Pos() int {
 74 | 	iv.node.applyAllShifts()
 75 | 	return iv.node.key.pos
 76 | }
 77 | 
 78 | type ivalue struct {
 79 | 	interval Interval
 80 | 	value    interval.Value
 81 | }
 82 | 
 83 | // A shift of intervals in the tree. The shift starts at idx and moves
 84 | // intervals after idx by amt. Shifts are lazily applied in the tree to avoid
 85 | // frequent linear time costs.
 86 | type shift struct {
 87 | 	idx int
 88 | 	amt int
 89 | }
 90 | 
 91 | type node struct {
 92 | 	key    key
 93 | 	max    int
 94 | 	iv     *ivalues
 95 | 	shifts []shift
 96 | 
 97 | 	// height counts nodes (not edges)
 98 | 	height int
 99 | 	left   *node
100 | 	right  *node
101 | 	parent *node
102 | }
103 | 
104 | func (n *node) addShift(sh shift) {
105 | 	if n == nil {
106 | 		return
107 | 	}
108 | 
109 | 	n.shifts = append(n.shifts, sh)
110 | }
111 | 
112 | func (n *node) applyShifts() {
113 | 	if n == nil {
114 | 		return
115 | 	}
116 | 	for _, sh := range n.shifts {
117 | 		if n.max >= sh.idx {
118 | 			if n.key.pos >= sh.idx {
119 | 				n.key.pos += sh.amt
120 | 				for i, iv := range n.iv.ivs {
121 | 					n.iv.ivs[i].interval = iv.interval.Shift(sh.amt)
122 | 				}
123 | 			}
124 | 			n.max += sh.amt
125 | 			// n.updateMax()
126 | 		}
127 | 
128 | 		n.left.addShift(sh)
129 | 		n.right.addShift(sh)
130 | 	}
131 | 	n.shifts = nil
132 | }
133 | 
134 | func (n *node) add(key key, high int, value interval.Value, parent *node) (*node, *ivalues) {
135 | 	if n == nil {
136 | 		n = new(node)
137 | 		*n = node{
138 | 			key: key,
139 | 			max: high,
140 | 			iv: &ivalues{
141 | 				ivs: []ivalue{{
142 | 					interval: Interval{key.pos, high},
143 | 					value:    value,
144 | 				}},
145 | 				node: n,
146 | 			},
147 | 			height: 1,
148 | 			left:   nil,
149 | 			right:  nil,
150 | 			parent: parent,
151 | 		}
152 | 		return n, n.iv
153 | 	}
154 | 	n.applyShifts()
155 | 
156 | 	var iv *ivalues
157 | 	if key.compare(n.key) < 0 {
158 | 		n.left, iv = n.left.add(key, high, value, n)
159 | 	} else if key.compare(n.key) > 0 {
160 | 		n.right, iv = n.right.add(key, high, value, n)
161 | 	} else {
162 | 		// if same key exists update value
163 | 		n.iv.ivs = append(n.iv.ivs, ivalue{
164 | 			interval: Interval{key.pos, high},
165 | 			value:    value,
166 | 		})
167 | 		iv = n.iv
168 | 	}
169 | 	return n.rebalanceTree(parent), iv
170 | }
171 | 
172 | func (n *node) calcMax() int {
173 | 	max := 0
174 | 	for _, iv := range n.iv.ivs {
175 | 		if iv.interval.High > max {
176 | 			max = iv.interval.High
177 | 		}
178 | 	}
179 | 	return max
180 | }
181 | 
182 | func (n *node) updateMax() {
183 | 	if n != nil {
184 | 		if n.right != nil {
185 | 			n.max = max(n.max, n.right.max)
186 | 		}
187 | 		if n.left != nil {
188 | 			n.max = max(n.max, n.left.max)
189 | 		}
190 | 		n.max = max(n.max, n.calcMax())
191 | 	}
192 | }
193 | 
194 | func (n *node) remove(key key, parent *node) *node {
195 | 	if n == nil {
196 | 		return nil
197 | 	}
198 | 	n.applyShifts()
199 | 	if key.compare(n.key) < 0 {
200 | 		n.left = n.left.remove(key, n)
201 | 	} else if key.compare(n.key) > 0 {
202 | 		n.right = n.right.remove(key, n)
203 | 	} else {
204 | 		if n.left != nil && n.right != nil {
205 | 			n.left.applyShifts()
206 | 			n.right.applyShifts()
207 | 			// node to delete found with both children;
208 | 			// replace values with smallest node of the right sub-tree
209 | 			rightMinNode := n.right.findSmallest()
210 | 			n.key = rightMinNode.key
211 | 			n.iv = rightMinNode.iv
212 | 			n.iv.node = n
213 | 			n.shifts = rightMinNode.shifts
214 | 			// delete smallest node that we replaced
215 | 			n.right = n.right.remove(rightMinNode.key, n)
216 | 		} else if n.left != nil {
217 | 			n.left.applyShifts()
218 | 			// node only has left child
219 | 			n = n.left
220 | 		} else if n.right != nil {
221 | 			n.right.applyShifts()
222 | 			// node only has right child
223 | 			n = n.right
224 | 		} else {
225 | 			// node has no children
226 | 			n = nil
227 | 			return n
228 | 		}
229 | 
230 | 	}
231 | 	n.parent = parent
232 | 	return n.rebalanceTree(parent)
233 | }
234 | 
235 | func (n *node) search(key key) *node {
236 | 	if n == nil {
237 | 		return nil
238 | 	}
239 | 	n.applyShifts()
240 | 	if key.compare(n.key) < 0 {
241 | 		return n.left.search(key)
242 | 	} else if key.compare(n.key) > 0 {
243 | 		return n.right.search(key)
244 | 	} else {
245 | 		return n
246 | 	}
247 | }
248 | 
249 | func (n *node) overlaps(low, high int, result []interval.Value) []interval.Value {
250 | 	if n == nil {
251 | 		return result
252 | 	}
253 | 
254 | 	n.applyShifts()
255 | 
256 | 	if low >= n.max {
257 | 		return result
258 | 	}
259 | 
260 | 	result = n.left.overlaps(low, high, result)
261 | 
262 | 	for _, iv := range n.iv.ivs {
263 | 		if Overlaps(iv.interval, Interval{low, high}) {
264 | 			result = append(result, iv.value)
265 | 		}
266 | 	}
267 | 
268 | 	if high <= n.key.pos {
269 | 		return result
270 | 	}
271 | 
272 | 	result = n.right.overlaps(low, high, result)
273 | 	return result
274 | }
275 | 
276 | func (n *node) removeOverlaps(low, high int, parent *node) *node {
277 | 	if n == nil {
278 | 		return n
279 | 	}
280 | 
281 | 	n.applyShifts()
282 | 
283 | 	if low >= n.max {
284 | 		return n
285 | 	}
286 | 
287 | 	n.left = n.left.removeOverlaps(low, high, n)
288 | 
289 | 	for i := 0; i < len(n.iv.ivs); {
290 | 		if Overlaps(n.iv.ivs[i].interval, Interval{low, high}) {
291 | 			n.iv.ivs[i] = n.iv.ivs[len(n.iv.ivs)-1]
292 | 			n.iv.ivs[len(n.iv.ivs)-1] = ivalue{}
293 | 			n.iv.ivs = n.iv.ivs[:len(n.iv.ivs)-1]
294 | 		} else {
295 | 			i++
296 | 		}
297 | 	}
298 | 
299 | 	if len(n.iv.ivs) == 0 {
300 | 		doright := high > n.key.pos
301 | 		n = n.remove(n.key, parent)
302 | 		if doright {
303 | 			return n.removeOverlaps(low, high, parent)
304 | 		}
305 | 		return n
306 | 	}
307 | 
308 | 	if high <= n.key.pos {
309 | 		return n
310 | 	}
311 | 
312 | 	n.right = n.right.removeOverlaps(low, high, n)
313 | 	return n
314 | }
315 | 
316 | func (n *node) getHeight() int {
317 | 	if n == nil {
318 | 		return 0
319 | 	}
320 | 	return n.height
321 | }
322 | 
323 | func (n *node) getSize() int {
324 | 	if n == nil {
325 | 		return 0
326 | 	}
327 | 	return n.left.getSize() + n.right.getSize() + 1
328 | }
329 | 
330 | func (n *node) updateHeightAndMax() {
331 | 	n.height = 1 + max(n.left.getHeight(), n.right.getHeight())
332 | 	n.updateMax()
333 | }
334 | 
335 | // Checks if node is balanced and rebalance
336 | func (n *node) rebalanceTree(parent *node) *node {
337 | 	if n == nil {
338 | 		return n
339 | 	}
340 | 	n.updateHeightAndMax()
341 | 
342 | 	// check balance factor and rotateLeft if right-heavy and rotateRight if left-heavy
343 | 	balanceFactor := n.left.getHeight() - n.right.getHeight()
344 | 	if balanceFactor == -2 {
345 | 		// check if child is left-heavy and rotateRight first
346 | 		if n.right.left.getHeight() > n.right.right.getHeight() {
347 | 			n.right = n.right.rotateRight(n)
348 | 		}
349 | 		return n.rotateLeft(parent)
350 | 	} else if balanceFactor == 2 {
351 | 		// check if child is right-heavy and rotateLeft first
352 | 		if n.left.right.getHeight() > n.left.left.getHeight() {
353 | 			n.left = n.left.rotateLeft(n)
354 | 		}
355 | 		return n.rotateRight(parent)
356 | 	}
357 | 	return n
358 | }
359 | 
360 | // Rotate nodes left to balance node
361 | func (n *node) rotateLeft(newParent *node) *node {
362 | 	n.applyShifts()
363 | 	if n.right != nil {
364 | 		n.right.applyShifts()
365 | 	}
366 | 
367 | 	newRoot := n.right
368 | 	n.right = newRoot.left
369 | 	if newRoot.left != nil {
370 | 		newRoot.left.parent = n
371 | 	}
372 | 	newRoot.left = n
373 | 	n.parent = newRoot
374 | 	newRoot.parent = newParent
375 | 
376 | 	n.updateHeightAndMax()
377 | 	newRoot.updateHeightAndMax()
378 | 	return newRoot
379 | }
380 | 
381 | // Rotate nodes right to balance node
382 | func (n *node) rotateRight(newParent *node) *node {
383 | 	n.applyShifts()
384 | 	if n.left != nil {
385 | 		n.left.applyShifts()
386 | 	}
387 | 
388 | 	newRoot := n.left
389 | 	n.left = newRoot.right
390 | 	if newRoot.right != nil {
391 | 		newRoot.right.parent = n
392 | 	}
393 | 	newRoot.right = n
394 | 	n.parent = newRoot
395 | 	newRoot.parent = newParent
396 | 
397 | 	n.updateHeightAndMax()
398 | 	newRoot.updateHeightAndMax()
399 | 	return newRoot
400 | }
401 | 
402 | // Finds the smallest child (based on the key) for the current node
403 | func (n *node) findSmallest() *node {
404 | 	if n.left != nil {
405 | 		n.left.applyShifts()
406 | 		return n.left.findSmallest()
407 | 	} else {
408 | 		return n
409 | 	}
410 | }
411 | 
412 | func (n *node) applyAllShifts() {
413 | 	if n.parent != nil && n.parent != n {
414 | 		n.parent.applyAllShifts()
415 | 	}
416 | 	n.applyShifts()
417 | }
418 | 
419 | func max(a int, b int) int {
420 | 	if a > b {
421 | 		return a
422 | 	}
423 | 	return b
424 | }
425 | 


--------------------------------------------------------------------------------
/memo/interval/lazylog/interval.go:
--------------------------------------------------------------------------------
 1 | package lazylog
 2 | 
 3 | import "fmt"
 4 | 
 5 | type interval struct {
 6 | 	low, high int
 7 | 	value     interface{}
 8 | }
 9 | 
10 | func (i *interval) Low() int {
11 | 	return i.low
12 | }
13 | 
14 | func (i *interval) High() int {
15 | 	return i.high
16 | }
17 | 
18 | func (i *interval) length() int {
19 | 	return i.High() - i.Low()
20 | }
21 | 
22 | func (i *interval) String() string {
23 | 	return fmt.Sprintf("[%d, %d)", i.low, i.high)
24 | }
25 | 
26 | // returns true if i1 overlaps with the interval [low:high)
27 | func overlaps(i1 interval, low, high int) bool {
28 | 	return i1.Low() <= high && i1.High() >= low
29 | }
30 | 


--------------------------------------------------------------------------------
/memo/interval/lazylog/tree.go:
--------------------------------------------------------------------------------
  1 | // Package lazylog provides an interval tree backed by an AVL tree. In addition,
  2 | // the interval tree supports shifting intervals in amortized constant time
  3 | // using lazy shifts.
  4 | package lazylog
  5 | 
  6 | import (
  7 | 	intval "github.com/zyedidia/gpeg/memo/interval"
  8 | )
  9 | 
 10 | // ShiftThreshold is the number of shifts to accumulate before applying all
 11 | // shifts.
 12 | const ShiftThreshold = -1
 13 | 
 14 | // A key stores the start position of an interval, and a unique ID if you would
 15 | // like to store multiple intervals starting from the same position. The key is
 16 | // used for uniquely identifying a particular interval when searching or
 17 | // removing from the tree.
 18 | type key struct {
 19 | 	pos int
 20 | 	id  int
 21 | }
 22 | 
 23 | // compare orders keys by pos and then id.
 24 | func (k key) compare(other key) int {
 25 | 	if k.pos < other.pos {
 26 | 		return -1
 27 | 	} else if k.pos > other.pos {
 28 | 		return 1
 29 | 	} else if k.id < other.id {
 30 | 		return -1
 31 | 	} else if k.id > other.id {
 32 | 		return 1
 33 | 	}
 34 | 	return 0
 35 | }
 36 | 
 37 | // A shift of intervals in the tree. The shift starts at idx and moves
 38 | // intervals after idx by amt. Shifts are lazily applied in the tree to avoid
 39 | // linear time costs.
 40 | type shift struct {
 41 | 	idx    int
 42 | 	amt    int
 43 | 	tstamp uint64
 44 | }
 45 | 
 46 | type Tree struct {
 47 | 	root   *node
 48 | 	shifts []shift // list of non-applied shifts
 49 | 	tstamp uint64  // most recent timestamp
 50 | }
 51 | 
 52 | // Adds the given interval to the tree. An id should also be given to the
 53 | // interval to uniquely identify it if any other intervals begin at the same
 54 | // location.
 55 | func (t *Tree) Add(id, low, high int, value intval.Value) intval.Pos {
 56 | 	var loc intval.Pos
 57 | 	t.root, loc = t.root.add(t, key{
 58 | 		pos: low,
 59 | 		id:  id,
 60 | 	}, interval{
 61 | 		low:   low,
 62 | 		high:  high,
 63 | 		value: value,
 64 | 	})
 65 | 	return loc
 66 | }
 67 | 
 68 | // Search for the interval starting at pos with the given id. Returns nil if no
 69 | // such interval exists.
 70 | func (t *Tree) FindLargest(id, pos int) intval.Value {
 71 | 	n := t.root.search(key{
 72 | 		pos: pos,
 73 | 		id:  id,
 74 | 	})
 75 | 	if n != nil {
 76 | 		if len(n.interval.ins) == 0 {
 77 | 			return nil
 78 | 		}
 79 | 
 80 | 		max := 0
 81 | 		for i, in := range n.interval.ins[1:] {
 82 | 			if in.length() > n.interval.ins[max].length() {
 83 | 				max = i + 1
 84 | 			}
 85 | 		}
 86 | 
 87 | 		return n.interval.ins[max].value
 88 | 	}
 89 | 	return nil
 90 | }
 91 | 
 92 | func (t *Tree) RemoveAndShift(low, high, amt int) {
 93 | 	t.root = t.root.removeOverlaps(low, high)
 94 | 	if amt != 0 {
 95 | 		t.shift(low, amt)
 96 | 	}
 97 | }
 98 | 
 99 | func (t *Tree) AllValues() []intval.Value {
100 | 	var vals []intval.Value
101 | 	return t.root.allvals(vals)
102 | }
103 | 
104 | // Shift all intervals in the tree after idx by amt. The shift idx should not
105 | // lie inside an interval. This could conceivably be implemented, but is not
106 | // currently. If a negative shift is performed, ensure that there is space for
107 | // all intervals to be shifted left without overlapping with another interval.
108 | func (t *Tree) shift(idx, amt int) {
109 | 	if amt == 0 {
110 | 		return
111 | 	}
112 | 
113 | 	t.tstamp++
114 | 	t.shifts = append(t.shifts, shift{
115 | 		idx:    idx,
116 | 		amt:    amt,
117 | 		tstamp: t.tstamp,
118 | 	})
119 | 	if ShiftThreshold != -1 && len(t.shifts) >= ShiftThreshold {
120 | 		t.applyAllShifts()
121 | 	}
122 | }
123 | 
124 | func (t *Tree) applyAllShifts() {
125 | 	t.root.applyAllShifts()
126 | 	t.shifts = nil
127 | }
128 | 
129 | // Size returns the total number of intervals stored in the tree.
130 | func (t *Tree) Size() int {
131 | 	return t.root.size()
132 | }
133 | 
134 | type node struct {
135 | 	key      key
136 | 	max      int
137 | 	interval *lazyInterval
138 | 	tstamp   uint64 // timestamp to determine which shifts to apply
139 | 	tree     *Tree
140 | 
141 | 	// height counts nodes (not edges)
142 | 	height int
143 | 	left   *node
144 | 	right  *node
145 | }
146 | 
147 | // Adds a new node
148 | func (n *node) add(tree *Tree, key key, value interval) (*node, *lazyInterval) {
149 | 	if n == nil {
150 | 		nn := &node{
151 | 			tree:   tree,
152 | 			key:    key,
153 | 			max:    value.High(),
154 | 			height: 1,
155 | 			left:   nil,
156 | 			right:  nil,
157 | 			tstamp: tree.tstamp,
158 | 		}
159 | 		nn.interval = &lazyInterval{
160 | 			ins: []interval{value},
161 | 			n:   nn,
162 | 		}
163 | 		return nn, nn.interval
164 | 	}
165 | 	n.applyShifts()
166 | 
167 | 	var loc *lazyInterval
168 | 	if key.compare(n.key) < 0 {
169 | 		n.left, loc = n.left.add(tree, key, value)
170 | 	} else if key.compare(n.key) > 0 {
171 | 		n.right, loc = n.right.add(tree, key, value)
172 | 	} else {
173 | 		// if same key exists update value
174 | 		n.interval.ins = append(n.interval.ins, value)
175 | 		n.tstamp = tree.tstamp
176 | 		loc = n.interval
177 | 	}
178 | 	return n.rebalanceTree(), loc
179 | }
180 | 
181 | func (n *node) updateMax() {
182 | 	if n != nil {
183 | 		if n.right != nil {
184 | 			n.max = max(n.max, n.right.max)
185 | 		}
186 | 		if n.left != nil {
187 | 			n.max = max(n.max, n.left.max)
188 | 		}
189 | 		n.max = max(n.max, n.interval.High())
190 | 	}
191 | }
192 | 
193 | // Removes a node
194 | func (n *node) remove(key key) *node {
195 | 	if n == nil {
196 | 		return nil
197 | 	}
198 | 	n.applyShifts()
199 | 	if key.compare(n.key) < 0 {
200 | 		n.left = n.left.remove(key)
201 | 	} else if key.compare(n.key) > 0 {
202 | 		n.right = n.right.remove(key)
203 | 	} else {
204 | 		if n.left != nil && n.right != nil {
205 | 			n.left.applyShifts()
206 | 			n.right.applyShifts()
207 | 			// node to delete found with both children;
208 | 			// replace values with smallest node of the right sub-tree
209 | 			rightMinNode := n.right.findSmallest()
210 | 
211 | 			n.key = rightMinNode.key
212 | 			copy(n.interval.ins, rightMinNode.interval.ins)
213 | 			n.interval.n = n
214 | 			n.tstamp = rightMinNode.tstamp
215 | 			// delete smallest node that we replaced
216 | 			n.right = n.right.remove(rightMinNode.key)
217 | 		} else if n.left != nil {
218 | 			n.left.applyShifts()
219 | 			// node only has left child
220 | 			n = n.left
221 | 		} else if n.right != nil {
222 | 			n.right.applyShifts()
223 | 			// node only has right child
224 | 			n = n.right
225 | 		} else {
226 | 			// node has no children
227 | 			n = nil
228 | 			return n
229 | 		}
230 | 
231 | 	}
232 | 	return n.rebalanceTree()
233 | }
234 | 
235 | // Searches for a node
236 | func (n *node) search(key key) *node {
237 | 	if n == nil {
238 | 		return nil
239 | 	}
240 | 	n.applyShifts()
241 | 	if key.compare(n.key) < 0 {
242 | 		return n.left.search(key)
243 | 	} else if key.compare(n.key) > 0 {
244 | 		return n.right.search(key)
245 | 	} else {
246 | 		return n
247 | 	}
248 | }
249 | 
250 | func (n *node) removeOverlaps(low, high int) *node {
251 | 	if n == nil {
252 | 		return n
253 | 	}
254 | 
255 | 	n.applyShifts()
256 | 
257 | 	if low > n.max {
258 | 		return n
259 | 	}
260 | 
261 | 	n.left = n.left.removeOverlaps(low, high)
262 | 
263 | 	for i := 0; i < len(n.interval.ins); {
264 | 		if overlaps(n.interval.ins[i], low, high) {
265 | 			n.interval.ins[i] = n.interval.ins[len(n.interval.ins)-1]
266 | 			n.interval.ins[len(n.interval.ins)-1] = interval{}
267 | 			n.interval.ins = n.interval.ins[:len(n.interval.ins)-1]
268 | 		} else {
269 | 			i++
270 | 		}
271 | 	}
272 | 
273 | 	if len(n.interval.ins) == 0 {
274 | 		doright := high >= n.key.pos
275 | 		n = n.remove(n.key)
276 | 		if doright {
277 | 			return n.removeOverlaps(low, high)
278 | 		}
279 | 		return n
280 | 	}
281 | 
282 | 	if high < n.key.pos {
283 | 		return n
284 | 	}
285 | 	n.right = n.right.removeOverlaps(low, high)
286 | 	return n
287 | }
288 | 
289 | func (n *node) allvals(vals []intval.Value) []intval.Value {
290 | 	if n == nil {
291 | 		return vals
292 | 	}
293 | 
294 | 	vals = n.left.allvals(vals)
295 | 
296 | 	for _, in := range n.interval.ins {
297 | 		vals = append(vals, in.value)
298 | 	}
299 | 
300 | 	vals = n.right.allvals(vals)
301 | 
302 | 	return vals
303 | }
304 | 
305 | func (n *node) getHeight() int {
306 | 	if n == nil {
307 | 		return 0
308 | 	}
309 | 	return n.height
310 | }
311 | 
312 | func (n *node) size() int {
313 | 	if n == nil {
314 | 		return 0
315 | 	}
316 | 	return n.left.size() + n.right.size() + 1
317 | }
318 | 
319 | func (n *node) recalculateHeight() {
320 | 	n.height = 1 + max(n.left.getHeight(), n.right.getHeight())
321 | }
322 | 
323 | // Checks if node is balanced and rebalance
324 | func (n *node) rebalanceTree() *node {
325 | 	if n == nil {
326 | 		return n
327 | 	}
328 | 	n.recalculateHeight()
329 | 	n.updateMax()
330 | 
331 | 	// check balance factor and rotateLeft if right-heavy and rotateRight if left-heavy
332 | 	balanceFactor := n.left.getHeight() - n.right.getHeight()
333 | 	if balanceFactor <= -2 {
334 | 		// check if child is left-heavy and rotateRight first
335 | 		if n.right.left.getHeight() > n.right.right.getHeight() {
336 | 			n.right = n.right.rotateRight()
337 | 		}
338 | 		return n.rotateLeft()
339 | 	} else if balanceFactor >= 2 {
340 | 		// check if child is right-heavy and rotateLeft first
341 | 		if n.left.right.getHeight() > n.left.left.getHeight() {
342 | 			n.left = n.left.rotateLeft()
343 | 		}
344 | 		return n.rotateRight()
345 | 	}
346 | 	return n
347 | }
348 | 
349 | // Rotate nodes left to balance node
350 | func (n *node) rotateLeft() *node {
351 | 	n.applyShifts()
352 | 	if n.right != nil {
353 | 		n.right.applyShifts()
354 | 	}
355 | 
356 | 	newRoot := n.right
357 | 	n.right = newRoot.left
358 | 	newRoot.left = n
359 | 
360 | 	n.recalculateHeight()
361 | 	n.updateMax()
362 | 	newRoot.recalculateHeight()
363 | 	newRoot.updateMax()
364 | 	return newRoot
365 | }
366 | 
367 | // Rotate nodes right to balance node
368 | func (n *node) rotateRight() *node {
369 | 	n.applyShifts()
370 | 	if n.left != nil {
371 | 		n.left.applyShifts()
372 | 	}
373 | 
374 | 	newRoot := n.left
375 | 	n.left = newRoot.right
376 | 	newRoot.right = n
377 | 
378 | 	n.recalculateHeight()
379 | 	n.updateMax()
380 | 	newRoot.recalculateHeight()
381 | 	newRoot.updateMax()
382 | 	return newRoot
383 | }
384 | 
385 | // Finds the smallest child (based on the key) for the current node
386 | func (n *node) findSmallest() *node {
387 | 	if n.left != nil {
388 | 		n.left.applyShifts()
389 | 		return n.left.findSmallest()
390 | 	} else {
391 | 		return n
392 | 	}
393 | }
394 | 
395 | func (n *node) applyShift(s *shift) {
396 | 	if n.tstamp >= s.tstamp {
397 | 		// this shift is outdated and we have already applied it
398 | 		return
399 | 	}
400 | 
401 | 	n.tstamp = s.tstamp
402 | 	if n.max < s.idx {
403 | 		return
404 | 	}
405 | 	n.max += s.amt
406 | 	if n.key.pos >= s.idx {
407 | 		n.key.pos += s.amt
408 | 		n.interval.Shift(s.amt)
409 | 	}
410 | 	n.updateMax()
411 | }
412 | 
413 | func (n *node) applyShifts() {
414 | 	// optimization: first check if we are completely up-to-date and if so
415 | 	// there is nothing to do.
416 | 	if len(n.tree.shifts) == 0 || n.tstamp >= n.tree.shifts[len(n.tree.shifts)-1].tstamp {
417 | 		return
418 | 	}
419 | 	// optimization: search backwards to find the starting point. Alternatively
420 | 	// we could binary search? not sure which is faster.
421 | 	var j int
422 | 	for j = len(n.tree.shifts) - 1; j > 0; j-- {
423 | 		if n.tstamp >= n.tree.shifts[j].tstamp {
424 | 			j = j + 1
425 | 			break
426 | 		}
427 | 	}
428 | 	for i := range n.tree.shifts[j:] {
429 | 		n.applyShift(&n.tree.shifts[j+i])
430 | 	}
431 | }
432 | 
433 | func (n *node) applyAllShifts() {
434 | 	if n == nil {
435 | 		return
436 | 	}
437 | 
438 | 	n.left.applyAllShifts()
439 | 	n.right.applyAllShifts()
440 | 	n.applyShifts()
441 | }
442 | 
443 | func (n *node) eachNode(fn func(*node)) {
444 | 	if n == nil {
445 | 		return
446 | 	}
447 | 
448 | 	n.left.eachNode(fn)
449 | 	n.applyShifts()
450 | 	fn(n)
451 | 	n.right.eachNode(fn)
452 | }
453 | 
454 | type lazyInterval struct {
455 | 	ins []interval
456 | 	n   *node
457 | }
458 | 
459 | func (i *lazyInterval) Pos() int {
460 | 	i.n.applyShifts()
461 | 	return i.n.key.pos
462 | }
463 | 
464 | func (i *lazyInterval) High() int {
465 | 	high := 0
466 | 	for _, in := range i.ins {
467 | 		if in.High() > high {
468 | 			high = in.High()
469 | 		}
470 | 	}
471 | 	return high
472 | }
473 | 
474 | func (i *lazyInterval) Shift(amt int) {
475 | 	for j := range i.ins {
476 | 		i.ins[j].low += amt
477 | 		i.ins[j].high += amt
478 | 	}
479 | }
480 | 
481 | // Returns max number
482 | func max(a int, b int) int {
483 | 	if a > b {
484 | 		return a
485 | 	}
486 | 	return b
487 | }
488 | 


--------------------------------------------------------------------------------
/memo/interval/map.go:
--------------------------------------------------------------------------------
 1 | package interval
 2 | 
 3 | type Value interface{}
 4 | 
 5 | type Pos interface {
 6 | 	Pos() int
 7 | }
 8 | 
 9 | // An interval map is a key-value data structure that maps intervals to
10 | // values.  Every value is associated with an interval [low, high) and an id.
11 | // Values may be looked up, added, removed, and queried for overlapping
12 | // intervals. The tree also supports efficient shifting of intervals via
13 | // a lazy shift propagation mechanism.
14 | type Map interface {
15 | 	// Returns the value associated with the largest interval at (id, pos).
16 | 	FindLargest(id, pos int) Value
17 | 	// Adds a new value with 'id' and interval [low, high). Returns a value
18 | 	// that can be used to locate the inserted value even after shifts have
19 | 	// occurred (you may want to associate the Pos with your value).
20 | 	Add(id, low, high int, val Value) Pos
21 | 	// Removes all values with intervals that overlap [low, high) and then
22 | 	// performs a shift of size amt at idx.
23 | 	RemoveAndShift(low, high, amt int)
24 | 	// AllValues returns all values in the tree.
25 | 	AllValues() []Value
26 | 	// Returns the number of values in the tree.
27 | 	Size() int
28 | }
29 | 


--------------------------------------------------------------------------------
/memo/none.go:
--------------------------------------------------------------------------------
 1 | package memo
 2 | 
 3 | // NoneTable implements a memoization table that does nothing.
 4 | type NoneTable struct{}
 5 | 
 6 | // Get always returns 'not found'
 7 | func (t NoneTable) Get(id, pos int) (*Entry, bool) {
 8 | 	return nil, false
 9 | }
10 | 
11 | func (t NoneTable) Put(id, start, length, examined, count int, captures []*Capture) {}
12 | func (t NoneTable) ApplyEdit(e Edit)                                                {}
13 | func (t NoneTable) Overlaps(low, high int) []*Entry                                 { return nil }
14 | func (t NoneTable) Size() int                                                       { return 0 }
15 | func (t NoneTable) AllValues() []*Entry                                             { return nil }
16 | 


--------------------------------------------------------------------------------
/memo/table.go:
--------------------------------------------------------------------------------
 1 | package memo
 2 | 
 3 | // A Table is an interface for a memoization table data structure. The
 4 | // memoization table tracks memoized parse results corresponding to a
 5 | // non-terminal parsed at a certain location. The table interface defines the
 6 | // ApplyEdit function which is crucial for incremental parsing.
 7 | type Table interface {
 8 | 	// Get returns the entry associated with the given position and ID. If
 9 | 	// there are multiple entries with the same ID at that position, the
10 | 	// largest entry is returned (determined by matched length).
11 | 	Get(id, pos int) (*Entry, bool)
12 | 
13 | 	// Put adds a new entry to the table.
14 | 	Put(id, start, length, examined, count int, captures []*Capture)
15 | 
16 | 	// ApplyEdit updates the table as necessary when an edit occurs. This
17 | 	// operation invalidates all entries within the range of the edit and
18 | 	// shifts entries that are to the right of the edit as necessary.
19 | 	ApplyEdit(Edit)
20 | 
21 | 	AllValues() []*Entry
22 | 
23 | 	// Size returns the number of entries in the table.
24 | 	Size() int
25 | }
26 | 


--------------------------------------------------------------------------------
/memo/tree.go:
--------------------------------------------------------------------------------
 1 | package memo
 2 | 
 3 | import (
 4 | 	"sync"
 5 | 
 6 | 	"github.com/zyedidia/gpeg/memo/interval"
 7 | 	"github.com/zyedidia/gpeg/memo/interval/lazylog"
 8 | )
 9 | 
10 | // TreeTable implements a memoization table using an interval tree (augmented
11 | // to support efficient shifting).
12 | type TreeTable struct {
13 | 	interval.Map
14 | 	threshold int
15 | 	lock      sync.Mutex
16 | }
17 | 
18 | func NewTreeTable(threshold int) *TreeTable {
19 | 	return &TreeTable{
20 | 		Map:       &lazylog.Tree{},
21 | 		threshold: threshold,
22 | 	}
23 | }
24 | 
25 | func (t *TreeTable) Get(id, pos int) (*Entry, bool) {
26 | 	t.lock.Lock()
27 | 	entry := t.Map.FindLargest(id, pos)
28 | 	t.lock.Unlock()
29 | 	e, ok := entry.(*Entry)
30 | 	return e, ok
31 | }
32 | 
33 | func (t *TreeTable) Put(id, start, length, examined, count int, captures []*Capture) {
34 | 	if examined < t.threshold || length == 0 {
35 | 		return
36 | 	}
37 | 
38 | 	examined = max(examined, length)
39 | 
40 | 	e := &Entry{
41 | 		length:   length,
42 | 		examined: examined,
43 | 		count:    count,
44 | 		captures: captures,
45 | 	}
46 | 	t.lock.Lock()
47 | 	e.setPos(t.Map.Add(id, start, start+examined, e))
48 | 	t.lock.Unlock()
49 | }
50 | 
51 | func (t *TreeTable) ApplyEdit(e Edit) {
52 | 	low, high := e.Start, e.End
53 | 	if low == high {
54 | 		high = low + 1
55 | 	}
56 | 	amt := e.Len - (e.End - e.Start)
57 | 
58 | 	t.lock.Lock()
59 | 	t.Map.RemoveAndShift(low, high, amt)
60 | 	t.lock.Unlock()
61 | }
62 | 
63 | func (t *TreeTable) AllValues() []*Entry {
64 | 	vals := t.Map.AllValues()
65 | 	entries := make([]*Entry, len(vals))
66 | 	for i, v := range vals {
67 | 		entries[i] = v.(*Entry)
68 | 	}
69 | 	return entries
70 | }
71 | 
72 | func max(a, b int) int {
73 | 	if a > b {
74 | 		return a
75 | 	}
76 | 	return b
77 | }
78 | 


--------------------------------------------------------------------------------
/pattern/compile.go:
--------------------------------------------------------------------------------
  1 | package pattern
  2 | 
  3 | import (
  4 | 	"fmt"
  5 | 
  6 | 	"github.com/zyedidia/gpeg/charset"
  7 | 	"github.com/zyedidia/gpeg/isa"
  8 | )
  9 | 
 10 | // A NotFoundError means a a non-terminal was not found during grammar
 11 | // compilation.
 12 | type NotFoundError struct {
 13 | 	Name string
 14 | }
 15 | 
 16 | // Error returns the error message.
 17 | func (e *NotFoundError) Error() string { return "non-terminal " + e.Name + ": not found" }
 18 | 
 19 | // Compile takes an input pattern and returns the result of compiling it into a
 20 | // parsing program, and optimizing the program.
 21 | func Compile(p Pattern) (isa.Program, error) {
 22 | 	c, err := p.Compile()
 23 | 	if err != nil {
 24 | 		return nil, err
 25 | 	}
 26 | 
 27 | 	Optimize(c)
 28 | 	return c, nil
 29 | }
 30 | 
 31 | // MustCompile is the same as Compile but panics if there is an error during
 32 | // compilation.
 33 | func MustCompile(p Pattern) isa.Program {
 34 | 	c, err := Compile(p)
 35 | 	if err != nil {
 36 | 		panic(err)
 37 | 	}
 38 | 	return c
 39 | }
 40 | 
 41 | // openCall is a dummy instruction for resolving recursive function calls in
 42 | // grammars.
 43 | type openCall struct {
 44 | 	name string
 45 | 	isa.Nop
 46 | }
 47 | 
 48 | func (i openCall) String() string {
 49 | 	return fmt.Sprintf("OpenCall %v", i.name)
 50 | }
 51 | 
 52 | // Compile this node.
 53 | func (p *AltNode) Compile() (isa.Program, error) {
 54 | 	// optimization: if Left and Right are charsets/single chars, return the union
 55 | 	set, ok := combine(Get(p.Left), Get(p.Right))
 56 | 	if ok {
 57 | 		return isa.Program{
 58 | 			isa.Set{Chars: set},
 59 | 		}, nil
 60 | 	}
 61 | 
 62 | 	l, err1 := Get(p.Left).Compile()
 63 | 	r, err2 := Get(p.Right).Compile()
 64 | 	if err1 != nil {
 65 | 		return nil, err1
 66 | 	}
 67 | 	if err2 != nil {
 68 | 		return nil, err2
 69 | 	}
 70 | 
 71 | 	L1 := isa.NewLabel()
 72 | 
 73 | 	// optimization: if the right and left nodes are disjoint, we can use
 74 | 	// NoChoice variants of the head-fail optimization instructions.
 75 | 	var disjoint bool
 76 | 	var testinsn isa.Insn
 77 | 	linsn, okl := nextInsn(l)
 78 | 	rinsn, okr := nextInsn(r)
 79 | 	if okl && okr {
 80 | 		switch lt := linsn.(type) {
 81 | 		case isa.Set:
 82 | 			switch rt := rinsn.(type) {
 83 | 			case isa.Char:
 84 | 				disjoint = !lt.Chars.Has(rt.Byte)
 85 | 			}
 86 | 			testinsn = isa.TestSetNoChoice{Chars: lt.Chars, Lbl: L1}
 87 | 		case isa.Char:
 88 | 			switch rt := rinsn.(type) {
 89 | 			case isa.Char:
 90 | 				disjoint = lt.Byte != rt.Byte
 91 | 			case isa.Set:
 92 | 				disjoint = !rt.Chars.Has(lt.Byte)
 93 | 			}
 94 | 			testinsn = isa.TestCharNoChoice{Byte: lt.Byte, Lbl: L1}
 95 | 		}
 96 | 	}
 97 | 
 98 | 	L2 := isa.NewLabel()
 99 | 	code := make(isa.Program, 0, len(l)+len(r)+5)
100 | 	if disjoint {
101 | 		code = append(code, testinsn)
102 | 		code = append(code, l[1:]...)
103 | 		code = append(code, isa.Jump{Lbl: L2})
104 | 	} else {
105 | 		code = append(code, isa.Choice{Lbl: L1})
106 | 		code = append(code, l...)
107 | 		code = append(code, isa.Commit{Lbl: L2})
108 | 	}
109 | 	code = append(code, L1)
110 | 	code = append(code, r...)
111 | 	code = append(code, L2)
112 | 	return code, nil
113 | }
114 | 
115 | // Compile this node.
116 | func (p *SeqNode) Compile() (isa.Program, error) {
117 | 	l, err1 := Get(p.Left).Compile()
118 | 	r, err2 := Get(p.Right).Compile()
119 | 	if err1 != nil {
120 | 		return nil, err1
121 | 	}
122 | 	if err2 != nil {
123 | 		return nil, err2
124 | 	}
125 | 
126 | 	return append(l, r...), nil
127 | }
128 | 
129 | // Compile this node.
130 | func (p *StarNode) Compile() (isa.Program, error) {
131 | 	switch t := Get(p.Patt).(type) {
132 | 	case *ClassNode:
133 | 		// optimization: repeating a charset uses the dedicated instruction 'span'
134 | 		return isa.Program{
135 | 			isa.Span{Chars: t.Chars},
136 | 		}, nil
137 | 	case *MemoNode:
138 | 		// optimization: if the pattern we are repeating is a memoization
139 | 		// entry, we should use special instructions to memoize it as a tree to
140 | 		// get logarithmic saving when reparsing.
141 | 		sub, err := Get(t.Patt).Compile()
142 | 		code := make(isa.Program, 0, len(sub)+7)
143 | 		L1 := isa.NewLabel()
144 | 		L2 := isa.NewLabel()
145 | 		L3 := isa.NewLabel()
146 | 		NoJump := isa.NewLabel()
147 | 		memoId++
148 | 
149 | 		code = append(code, L1)
150 | 		code = append(code, isa.MemoTreeOpen{Id: memoId, Lbl: L3})
151 | 		code = append(code, isa.Choice{Lbl: L2})
152 | 		code = append(code, sub...)
153 | 		code = append(code, isa.Commit{Lbl: NoJump})
154 | 		code = append(code, NoJump)
155 | 		code = append(code, isa.MemoTreeInsert{})
156 | 		code = append(code, L3)
157 | 		code = append(code, isa.MemoTree{})
158 | 		code = append(code, isa.Jump{Lbl: L1})
159 | 		code = append(code, L2)
160 | 		code = append(code, isa.MemoTreeClose{Id: memoId})
161 | 		return code, err
162 | 	}
163 | 
164 | 	sub, err := Get(p.Patt).Compile()
165 | 	code := make(isa.Program, 0, len(sub)+4)
166 | 
167 | 	L1 := isa.NewLabel()
168 | 	L2 := isa.NewLabel()
169 | 	code = append(code, isa.Choice{Lbl: L2})
170 | 	code = append(code, L1)
171 | 	code = append(code, sub...)
172 | 	code = append(code, isa.PartialCommit{Lbl: L1})
173 | 	code = append(code, L2)
174 | 	return code, err
175 | }
176 | 
177 | // Compile this node.
178 | func (p *PlusNode) Compile() (isa.Program, error) {
179 | 	starp := Star(Get(p.Patt))
180 | 	star, err1 := starp.Compile()
181 | 	sub, err2 := Get(p.Patt).Compile()
182 | 	if err1 != nil {
183 | 		return nil, err1
184 | 	}
185 | 	if err2 != nil {
186 | 		return nil, err2
187 | 	}
188 | 
189 | 	code := make(isa.Program, 0, len(sub)+len(star))
190 | 	code = append(code, sub...)
191 | 	code = append(code, star...)
192 | 	return code, nil
193 | }
194 | 
195 | // Compile this node.
196 | func (p *OptionalNode) Compile() (isa.Program, error) {
197 | 	// optimization: if the pattern is a class node or single char literal, we
198 | 	// can use the Test*NoChoice instructions.
199 | 	switch t := Get(p.Patt).(type) {
200 | 	case *LiteralNode:
201 | 		if len(t.Str) == 1 {
202 | 			L1 := isa.NewLabel()
203 | 			return isa.Program{
204 | 				isa.TestCharNoChoice{Byte: t.Str[0], Lbl: L1},
205 | 				L1,
206 | 			}, nil
207 | 		}
208 | 	case *ClassNode:
209 | 		L1 := isa.NewLabel()
210 | 		prog := isa.Program{
211 | 			isa.TestSetNoChoice{Chars: t.Chars, Lbl: L1},
212 | 			L1,
213 | 		}
214 | 		return prog, nil
215 | 	}
216 | 
217 | 	a := AltNode{
218 | 		Left:  Get(p.Patt),
219 | 		Right: &EmptyNode{},
220 | 	}
221 | 	return a.Compile()
222 | }
223 | 
224 | // Compile this node.
225 | func (p *NotNode) Compile() (isa.Program, error) {
226 | 	sub, err := Get(p.Patt).Compile()
227 | 	L1 := isa.NewLabel()
228 | 	code := make(isa.Program, 0, len(sub)+3)
229 | 	code = append(code, isa.Choice{Lbl: L1})
230 | 	code = append(code, sub...)
231 | 	code = append(code, isa.FailTwice{})
232 | 	code = append(code, L1)
233 | 	return code, err
234 | }
235 | 
236 | // Compile this node.
237 | func (p *AndNode) Compile() (isa.Program, error) {
238 | 	sub, err := Get(p.Patt).Compile()
239 | 	code := make(isa.Program, 0, len(sub)+5)
240 | 	L1 := isa.NewLabel()
241 | 	L2 := isa.NewLabel()
242 | 
243 | 	code = append(code, isa.Choice{Lbl: L1})
244 | 	code = append(code, sub...)
245 | 	code = append(code, isa.BackCommit{Lbl: L2})
246 | 	code = append(code, L1)
247 | 	code = append(code, isa.Fail{})
248 | 	code = append(code, L2)
249 | 	return code, err
250 | }
251 | 
252 | // Compile this node.
253 | func (p *CapNode) Compile() (isa.Program, error) {
254 | 	sub, err := Get(p.Patt).Compile()
255 | 	if err != nil {
256 | 		return nil, err
257 | 	}
258 | 	code := make(isa.Program, 0, len(sub)+2)
259 | 
260 | 	i := 0
261 | 	back := 0
262 | loop:
263 | 	for _, insn := range sub {
264 | 		switch t := insn.(type) {
265 | 		case isa.Char, isa.Set:
266 | 			back++
267 | 		case isa.Any:
268 | 			back += int(t.N)
269 | 		default:
270 | 			break loop
271 | 		}
272 | 		i++
273 | 	}
274 | 
275 | 	if i == 0 || back >= 256 {
276 | 		code = append(code, isa.CaptureBegin{Id: p.Id})
277 | 		i = 0
278 | 	} else if i == len(sub) && back < 256 {
279 | 		code = append(code, sub...)
280 | 		code = append(code, isa.CaptureFull{Back: byte(back), Id: p.Id})
281 | 		return code, nil
282 | 	} else {
283 | 		code = append(code, sub[:i]...)
284 | 		code = append(code, isa.CaptureLate{Back: byte(back), Id: p.Id})
285 | 	}
286 | 	code = append(code, sub[i:]...)
287 | 	code = append(code, isa.CaptureEnd{})
288 | 	return code, nil
289 | }
290 | 
291 | // Compile this node.
292 | func (p *MemoNode) Compile() (isa.Program, error) {
293 | 	L1 := isa.NewLabel()
294 | 	sub, err := Get(p.Patt).Compile()
295 | 	code := make(isa.Program, 0, len(sub)+3)
296 | 	code = append(code, isa.MemoOpen{Lbl: L1, Id: p.Id})
297 | 	code = append(code, sub...)
298 | 	code = append(code, isa.MemoClose{})
299 | 	code = append(code, L1)
300 | 	return code, err
301 | }
302 | 
303 | // Compile this node.
304 | func (p *CheckNode) Compile() (isa.Program, error) {
305 | 	L1 := isa.NewLabel()
306 | 	sub, err := Get(p.Patt).Compile()
307 | 	code := make(isa.Program, 0, len(sub)+3)
308 | 	code = append(code, isa.CheckBegin{
309 | 		Id:   p.Id,
310 | 		Flag: p.Flag,
311 | 	})
312 | 	code = append(code, sub...)
313 | 	code = append(code, isa.CheckEnd{Checker: p.Checker})
314 | 	code = append(code, L1)
315 | 	return code, err
316 | }
317 | 
318 | // Compile this node.
319 | func (p *SearchNode) Compile() (isa.Program, error) {
320 | 	var rsearch Pattern
321 | 	var set charset.Set
322 | 	opt := false
323 | 
324 | 	sub, err := Get(p.Patt).Compile()
325 | 	if err != nil {
326 | 		return nil, err
327 | 	}
328 | 
329 | 	next, ok := nextInsn(sub)
330 | 	if ok {
331 | 		switch t := next.(type) {
332 | 		case isa.Char:
333 | 			set = charset.New([]byte{t.Byte}).Complement()
334 | 			opt = true
335 | 		case isa.Set:
336 | 			// Heuristic: if the set is smaller than 10 chars, it
337 | 			// is unlikely enough to match that we should consume all
338 | 			// chars from the complement before continuing the search.
339 | 			// The number 10 was arbitrarily chosen.
340 | 			if t.Chars.Size() < 10 {
341 | 				set = t.Chars.Complement()
342 | 				opt = true
343 | 			}
344 | 		}
345 | 	}
346 | 
347 | 	if opt {
348 | 		rsearch = Concat(Star(Set(set)), NonTerm("S"))
349 | 	} else {
350 | 		rsearch = NonTerm("S")
351 | 	}
352 | 
353 | 	return Grammar("S", map[string]Pattern{
354 | 		"S": Or(Get(p.Patt), Concat(Any(1), rsearch)),
355 | 	}).Compile()
356 | }
357 | 
358 | // Compile this node.
359 | func (p *EmptyOpNode) Compile() (isa.Program, error) {
360 | 	return isa.Program{
361 | 		isa.Empty{
362 | 			Op: p.Op,
363 | 		},
364 | 	}, nil
365 | }
366 | 
367 | // Compile this node.
368 | func (p *GrammarNode) Compile() (isa.Program, error) {
369 | 	p.Inline()
370 | 
371 | 	used := make(map[string]bool)
372 | 	for _, v := range p.Defs {
373 | 		WalkPattern(v, true, func(sub Pattern) {
374 | 			switch t := sub.(type) {
375 | 			case *NonTermNode:
376 | 				if t.Inlined == nil {
377 | 					used[t.Name] = true
378 | 				}
379 | 			}
380 | 		})
381 | 	}
382 | 
383 | 	if len(used) == 0 {
384 | 		return p.Defs[p.Start].Compile()
385 | 	}
386 | 
387 | 	code := make(isa.Program, 0)
388 | 	LEnd := isa.NewLabel()
389 | 	code = append(code, openCall{name: p.Start}, isa.Jump{Lbl: LEnd})
390 | 
391 | 	labels := make(map[string]isa.Label)
392 | 	for k, v := range p.Defs {
393 | 		if k != p.Start && !used[k] {
394 | 			continue
395 | 		}
396 | 		label := isa.NewLabel()
397 | 		labels[k] = label
398 | 		fn, err := v.Compile()
399 | 		if err != nil {
400 | 			return nil, err
401 | 		}
402 | 		code = append(code, label)
403 | 		code = append(code, fn...)
404 | 		code = append(code, isa.Return{})
405 | 	}
406 | 
407 | 	// resolve calls to openCall and do tail call optimization
408 | 	for i := 0; i < len(code); i++ {
409 | 		insn := code[i]
410 | 		if oc, ok := insn.(openCall); ok {
411 | 			lbl, ok := labels[oc.name]
412 | 			if !ok {
413 | 				return nil, &NotFoundError{
414 | 					Name: oc.name,
415 | 				}
416 | 			}
417 | 
418 | 			// replace this placeholder instruction with a normal call
419 | 			var replace isa.Insn = isa.Call{Lbl: lbl}
420 | 			// if a call is immediately followed by a return, optimize to
421 | 			// a jump for tail call optimization.
422 | 			next, ok := nextInsn(code[i+1:])
423 | 			if ok {
424 | 				switch next.(type) {
425 | 				case isa.Return:
426 | 					replace = isa.Jump{Lbl: lbl}
427 | 					// remove the return instruction if there is no label referring to it
428 | 					retidx, hadlbl := nextInsnLabel(code[i+1:])
429 | 					if !hadlbl {
430 | 						code[i+1+retidx] = isa.Nop{}
431 | 					}
432 | 				}
433 | 			}
434 | 
435 | 			// perform the replacement of the opencall by either a call or jump
436 | 			code[i] = replace
437 | 		}
438 | 	}
439 | 
440 | 	code = append(code, LEnd)
441 | 
442 | 	return code, nil
443 | }
444 | 
445 | // Compile this node.
446 | func (p *ClassNode) Compile() (isa.Program, error) {
447 | 	return isa.Program{
448 | 		isa.Set{Chars: p.Chars},
449 | 	}, nil
450 | }
451 | 
452 | // Compile this node.
453 | func (p *LiteralNode) Compile() (isa.Program, error) {
454 | 	code := make(isa.Program, len(p.Str))
455 | 	for i := 0; i < len(p.Str); i++ {
456 | 		code[i] = isa.Char{Byte: p.Str[i]}
457 | 	}
458 | 	return code, nil
459 | }
460 | 
461 | // Compile this node.
462 | func (p *NonTermNode) Compile() (isa.Program, error) {
463 | 	if p.Inlined != nil {
464 | 		return p.Inlined.Compile()
465 | 	}
466 | 	return isa.Program{
467 | 		openCall{name: p.Name},
468 | 	}, nil
469 | }
470 | 
471 | // Compile this node.
472 | func (p *DotNode) Compile() (isa.Program, error) {
473 | 	return isa.Program{
474 | 		isa.Any{N: p.N},
475 | 	}, nil
476 | }
477 | 
478 | // Compile this node.
479 | func (p *ErrorNode) Compile() (isa.Program, error) {
480 | 	var recovery isa.Program
481 | 	var err error
482 | 
483 | 	if p.Recover == nil {
484 | 		recovery = isa.Program{
485 | 			isa.End{Fail: true},
486 | 		}
487 | 	} else {
488 | 		recovery, err = Get(p.Recover).Compile()
489 | 	}
490 | 
491 | 	code := make(isa.Program, 0, len(recovery)+1)
492 | 	code = append(code, isa.Error{Message: p.Message})
493 | 	code = append(code, recovery...)
494 | 	return code, err
495 | }
496 | 
497 | // Compile this node.
498 | func (p *EmptyNode) Compile() (isa.Program, error) {
499 | 	return isa.Program{}, nil
500 | }
501 | 


--------------------------------------------------------------------------------
/pattern/nodes.go:
--------------------------------------------------------------------------------
  1 | package pattern
  2 | 
  3 | import (
  4 | 	"regexp/syntax"
  5 | 
  6 | 	"github.com/zyedidia/gpeg/charset"
  7 | 	"github.com/zyedidia/gpeg/isa"
  8 | )
  9 | 
 10 | // A Pattern is an object that can be compiled into a parsing program.
 11 | type Pattern interface {
 12 | 	Compile() (isa.Program, error)
 13 | }
 14 | 
 15 | // AltNode is the binary operator for alternation.
 16 | type AltNode struct {
 17 | 	Left, Right Pattern
 18 | }
 19 | 
 20 | // SeqNode is the binary operator for sequences.
 21 | type SeqNode struct {
 22 | 	Left, Right Pattern
 23 | }
 24 | 
 25 | // StarNode is the operator for the Kleene star.
 26 | type StarNode struct {
 27 | 	Patt Pattern
 28 | }
 29 | 
 30 | // PlusNode is the operator for the Kleene plus.
 31 | type PlusNode struct {
 32 | 	Patt Pattern
 33 | }
 34 | 
 35 | // OptionalNode is the operator for making a pattern optional.
 36 | type OptionalNode struct {
 37 | 	Patt Pattern
 38 | }
 39 | 
 40 | // NotNode is the not predicate.
 41 | type NotNode struct {
 42 | 	Patt Pattern
 43 | }
 44 | 
 45 | // AndNode is the and predicate.
 46 | type AndNode struct {
 47 | 	Patt Pattern
 48 | }
 49 | 
 50 | // CapNode marks a pattern to be captured with a certain ID.
 51 | type CapNode struct {
 52 | 	Patt Pattern
 53 | 	Id   int
 54 | }
 55 | 
 56 | // MemoNode marks a pattern to be memoized with a certain ID.
 57 | type MemoNode struct {
 58 | 	Patt Pattern
 59 | 	Id   int
 60 | }
 61 | 
 62 | // CheckNode marks a pattern to be checker by a certain checker.
 63 | type CheckNode struct {
 64 | 	Patt     Pattern
 65 | 	Checker  isa.Checker
 66 | 	Id, Flag int
 67 | }
 68 | 
 69 | // GrammarNode represents a grammar of non-terminals and their associated
 70 | // patterns. The Grammar must also have an entry non-terminal.
 71 | type GrammarNode struct {
 72 | 	Defs  map[string]Pattern
 73 | 	Start string
 74 | }
 75 | 
 76 | // SearchNode represents a search for a certain pattern.
 77 | type SearchNode struct {
 78 | 	Patt Pattern
 79 | }
 80 | 
 81 | // RepeatNode represents the repetition of a pattern a constant number of
 82 | // times.
 83 | type RepeatNode struct {
 84 | 	Patt Pattern
 85 | 	N    int
 86 | }
 87 | 
 88 | // ClassNode represents a character set.
 89 | type ClassNode struct {
 90 | 	Chars charset.Set
 91 | }
 92 | 
 93 | // LiteralNode represents a literal string.
 94 | type LiteralNode struct {
 95 | 	Str string
 96 | }
 97 | 
 98 | // NonTermNode represents the use of a non-terminal. If this non-terminal is
 99 | // inlined during compilation, the `inlined` field will point to the pattern
100 | // that is inlined.
101 | type NonTermNode struct {
102 | 	Name    string
103 | 	Inlined Pattern
104 | }
105 | 
106 | // DotNode represents the pattern to match any byte.
107 | type DotNode struct {
108 | 	N uint8
109 | }
110 | 
111 | // ErrorNode represents a pattern that fails with a certain error message.
112 | type ErrorNode struct {
113 | 	Message string
114 | 	Recover Pattern
115 | }
116 | 
117 | // EmptyOpNode is a node that performs a zero-width assertion.
118 | type EmptyOpNode struct {
119 | 	Op syntax.EmptyOp
120 | }
121 | 
122 | // EmtpyNode represents the empty pattern.
123 | type EmptyNode struct {
124 | }
125 | 
126 | // WalkFunc is a function that takes a pattern.
127 | type WalkFunc func(sub Pattern)
128 | 
129 | // CountSubPatterns returns the number of subpatterns that exist in the given
130 | // pattern.
131 | func CountSubPatterns(p Pattern) int {
132 | 	count := 0
133 | 	WalkPattern(p, true, func(sub Pattern) {
134 | 		count++
135 | 	})
136 | 	return count
137 | }
138 | 
139 | // WalkPattern calls fn for every subpattern contained in p. If followInline
140 | // is true, WalkPattern will walk over inlined patterns as well.
141 | func WalkPattern(p Pattern, followInline bool, fn WalkFunc) {
142 | 	fn(p)
143 | 	switch t := p.(type) {
144 | 	case *AltNode:
145 | 		WalkPattern(t.Left, followInline, fn)
146 | 		WalkPattern(t.Right, followInline, fn)
147 | 	case *SeqNode:
148 | 		WalkPattern(t.Left, followInline, fn)
149 | 		WalkPattern(t.Right, followInline, fn)
150 | 	case *StarNode:
151 | 		WalkPattern(t.Patt, followInline, fn)
152 | 	case *PlusNode:
153 | 		WalkPattern(t.Patt, followInline, fn)
154 | 	case *OptionalNode:
155 | 		WalkPattern(t.Patt, followInline, fn)
156 | 	case *NotNode:
157 | 		WalkPattern(t.Patt, followInline, fn)
158 | 	case *AndNode:
159 | 		WalkPattern(t.Patt, followInline, fn)
160 | 	case *CapNode:
161 | 		WalkPattern(t.Patt, followInline, fn)
162 | 	case *MemoNode:
163 | 		WalkPattern(t.Patt, followInline, fn)
164 | 	case *SearchNode:
165 | 		WalkPattern(t.Patt, followInline, fn)
166 | 	case *CheckNode:
167 | 		WalkPattern(t.Patt, followInline, fn)
168 | 	case *ErrorNode:
169 | 		WalkPattern(t.Recover, followInline, fn)
170 | 	case *GrammarNode:
171 | 		for _, p := range t.Defs {
172 | 			WalkPattern(p, followInline, fn)
173 | 		}
174 | 	case *NonTermNode:
175 | 		if t.Inlined != nil && followInline {
176 | 			WalkPattern(t.Inlined, followInline, fn)
177 | 		}
178 | 	}
179 | }
180 | 


--------------------------------------------------------------------------------
/pattern/optimize.go:
--------------------------------------------------------------------------------
  1 | package pattern
  2 | 
  3 | import (
  4 | 	"github.com/zyedidia/gpeg/charset"
  5 | 	"github.com/zyedidia/gpeg/isa"
  6 | )
  7 | 
  8 | // Nodes with trees larger than this size will not be inlined.
  9 | var InlineThreshold = 100
 10 | 
 11 | // Inline performs inlining passes until the inliner reaches a steady-state.
 12 | func (p *GrammarNode) Inline() {
 13 | 	for p.inline() {
 14 | 	}
 15 | }
 16 | 
 17 | // Get returns a possibly optimized version of this pattern. Always use this
 18 | // function to read a pattern, especially if you will be using the types of the
 19 | // underlying nodes. This function performs optimizations like collapsing an
 20 | // alternation of two class nodes into one class node.
 21 | func Get(p Pattern) Pattern {
 22 | 	switch t := p.(type) {
 23 | 	case *NonTermNode:
 24 | 		// Return the inlined pattern for a non-terminal that has been inlined.
 25 | 		if t.Inlined != nil {
 26 | 			return t.Inlined
 27 | 		}
 28 | 	case *AltNode:
 29 | 		l, r := Get(t.Left), Get(t.Right)
 30 | 		if n, emptyL := l.(*EmptyNode); emptyL {
 31 | 			return n
 32 | 		}
 33 | 		if _, emptyR := r.(*EmptyNode); emptyR {
 34 | 			return Get(Optional(l))
 35 | 		}
 36 | 
 37 | 		// Combine the left and right sides of an alternation into a class node
 38 | 		// if possible.
 39 | 		set, ok := combine(l, r)
 40 | 		if ok {
 41 | 			return &ClassNode{Chars: set}
 42 | 		}
 43 | 	case *OptionalNode:
 44 | 		// Optional of a Kleene star is unnecessary and we can remove the
 45 | 		// optional.
 46 | 		star, ok := Get(t.Patt).(*StarNode)
 47 | 		if ok {
 48 | 			return star
 49 | 		}
 50 | 	case *SeqNode:
 51 | 		// optimize use of empty: `a ""` and `"" a` are just `a`.
 52 | 		l, r := Get(t.Left), Get(t.Right)
 53 | 		if _, emptyR := r.(*EmptyNode); emptyR {
 54 | 			return l
 55 | 		}
 56 | 		if _, emptyL := l.(*EmptyNode); emptyL {
 57 | 			return r
 58 | 		}
 59 | 
 60 | 		// This optimizes patterns like `![a-z] .`. Instead of using a not
 61 | 		// predicate in this case, we can just complement the set and use a
 62 | 		// class node.
 63 | 		nn, okl := l.(*NotNode)
 64 | 		if !okl {
 65 | 			break
 66 | 		}
 67 | 
 68 | 		var set charset.Set
 69 | 		switch lt := Get(nn.Patt).(type) {
 70 | 		case *LiteralNode:
 71 | 			if len(lt.Str) != 1 {
 72 | 				return p
 73 | 			}
 74 | 			set = charset.New([]byte{lt.Str[0]})
 75 | 		case *ClassNode:
 76 | 			set = lt.Chars
 77 | 		default:
 78 | 			return p
 79 | 		}
 80 | 
 81 | 		switch rt := r.(type) {
 82 | 		case *DotNode:
 83 | 			if rt.N == 1 {
 84 | 				return &ClassNode{
 85 | 					Chars: set.Complement(),
 86 | 				}
 87 | 			}
 88 | 		case *ClassNode:
 89 | 			return &ClassNode{
 90 | 				Chars: rt.Chars.Sub(set),
 91 | 			}
 92 | 		case *LiteralNode:
 93 | 			if len(rt.Str) == 1 {
 94 | 				return &ClassNode{
 95 | 					Chars: charset.New([]byte{rt.Str[0]}).Sub(set),
 96 | 				}
 97 | 			}
 98 | 		}
 99 | 	}
100 | 	return p
101 | }
102 | 
103 | // Performs inlining on a grammar node.
104 | func (p *GrammarNode) inline() bool {
105 | 	sizes := make(map[string]int)
106 | 	leaves := make(map[string]bool)
107 | 	for n, sub := range p.Defs {
108 | 		size := 0
109 | 		leaf := true
110 | 		WalkPattern(sub, true, func(s Pattern) {
111 | 			switch t := s.(type) {
112 | 			case *NonTermNode:
113 | 				if t.Inlined == nil {
114 | 					leaf = false
115 | 				}
116 | 			}
117 | 			size++
118 | 		})
119 | 		sizes[n] = size
120 | 		leaves[n] = leaf
121 | 	}
122 | 
123 | 	didInline := false
124 | 	WalkPattern(p, true, func(sub Pattern) {
125 | 		switch t := sub.(type) {
126 | 		case *NonTermNode:
127 | 			if sz, ok := sizes[t.Name]; ok && t.Inlined == nil {
128 | 				// We only inline nodes if they are small enough and don't use
129 | 				// any non-terminals themselves.
130 | 				if sz < InlineThreshold && leaves[t.Name] {
131 | 					didInline = true
132 | 					t.Inlined = p.Defs[t.Name]
133 | 				}
134 | 			}
135 | 		}
136 | 	})
137 | 	return didInline
138 | }
139 | 
140 | // If the bytes matched by p1 and p2 can be matched by a single charset, then
141 | // that single combined charset is returned.
142 | func combine(p1 Pattern, p2 Pattern) (charset.Set, bool) {
143 | 	var set charset.Set
144 | 	switch t1 := p1.(type) {
145 | 	case *LiteralNode:
146 | 		if len(t1.Str) != 1 {
147 | 			return set, false
148 | 		}
149 | 		switch t2 := p2.(type) {
150 | 		case *ClassNode:
151 | 			return t2.Chars.Add(charset.New([]byte{t1.Str[0]})), true
152 | 		case *LiteralNode:
153 | 			if len(t2.Str) != 1 {
154 | 				return set, false
155 | 			}
156 | 			return charset.New([]byte{t1.Str[0], t2.Str[0]}), true
157 | 		}
158 | 	case *ClassNode:
159 | 		switch t2 := p2.(type) {
160 | 		case *ClassNode:
161 | 			return t2.Chars.Add(t1.Chars), true
162 | 		case *LiteralNode:
163 | 			if len(t2.Str) != 1 {
164 | 				return set, false
165 | 			}
166 | 			return t1.Chars.Add(charset.New([]byte{t2.Str[0]})), true
167 | 		}
168 | 	}
169 | 	return set, false
170 | }
171 | 
172 | // Returns the next instruction in p, skipping labels and nops.
173 | // If false is returned, there is no next instruction.
174 | func nextInsn(p isa.Program) (isa.Insn, bool) {
175 | 	for i := 0; i < len(p); i++ {
176 | 		switch p[i].(type) {
177 | 		case isa.Label, isa.Nop:
178 | 			continue
179 | 		default:
180 | 			return p[i], true
181 | 		}
182 | 	}
183 | 
184 | 	return isa.Nop{}, false
185 | }
186 | 
187 | // Returns the index of the next instruction and if there was a label before
188 | // it.
189 | func nextInsnLabel(p isa.Program) (int, bool) {
190 | 	hadLabel := false
191 | 	for i := 0; i < len(p); i++ {
192 | 		switch p[i].(type) {
193 | 		case isa.Nop:
194 | 			continue
195 | 		case isa.Label:
196 | 			hadLabel = true
197 | 		default:
198 | 			return i, hadLabel
199 | 		}
200 | 	}
201 | 
202 | 	return -1, hadLabel
203 | }
204 | 
205 | // Optimize performs some optimization passes on the code in p. In particular
206 | // it performs head-fail optimization and jump replacement.
207 | func Optimize(p isa.Program) {
208 | 	// map from label to index in code
209 | 	labels := make(map[isa.Label]int)
210 | 	for i, insn := range p {
211 | 		switch l := insn.(type) {
212 | 		case isa.Label:
213 | 			labels[l] = i
214 | 		}
215 | 	}
216 | 
217 | 	for i, insn := range p {
218 | 		// head-fail optimization: if we find a choice instruction immediately
219 | 		// followed (no label) by Char/Set/Any, we can replace with the
220 | 		// dedicated instruction TestChar/TestSet/TestAny.
221 | 		if ch, ok := insn.(isa.Choice); ok && i < len(p)-1 {
222 | 			next := p[i+1]
223 | 			switch t := next.(type) {
224 | 			case isa.Char:
225 | 				p[i] = isa.TestChar{
226 | 					Byte: t.Byte,
227 | 					Lbl:  ch.Lbl,
228 | 				}
229 | 				p[i+1] = isa.Nop{}
230 | 			case isa.Set:
231 | 				p[i] = isa.TestSet{
232 | 					Chars: t.Chars,
233 | 					Lbl:   ch.Lbl,
234 | 				}
235 | 				p[i+1] = isa.Nop{}
236 | 			case isa.Any:
237 | 				p[i] = isa.TestAny{
238 | 					N:   t.N,
239 | 					Lbl: ch.Lbl,
240 | 				}
241 | 				p[i+1] = isa.Nop{}
242 | 			}
243 | 		}
244 | 
245 | 		// jump optimization: if we find a jump to another control flow
246 | 		// instruction, we can replace the current jump directly with the
247 | 		// target instruction.
248 | 		if j, ok := insn.(isa.Jump); ok {
249 | 			next, ok := nextInsn(p[labels[j.Lbl]:])
250 | 			if ok {
251 | 				switch next.(type) {
252 | 				case isa.PartialCommit, isa.BackCommit, isa.Commit,
253 | 					isa.Jump, isa.Return, isa.Fail, isa.FailTwice, isa.End:
254 | 					p[i] = next
255 | 				}
256 | 			}
257 | 		}
258 | 	}
259 | }
260 | 


--------------------------------------------------------------------------------
/pattern/pattern.go:
--------------------------------------------------------------------------------
  1 | // Package pattern provides data types and functions for compiling patterns
  2 | // into GPeg VM programs.
  3 | package pattern
  4 | 
  5 | import (
  6 | 	"regexp/syntax"
  7 | 
  8 | 	"github.com/zyedidia/gpeg/charset"
  9 | 	"github.com/zyedidia/gpeg/isa"
 10 | )
 11 | 
 12 | // Cap marks a pattern to be captured.
 13 | func Cap(p Pattern, id int) Pattern {
 14 | 	return &CapNode{
 15 | 		Patt: p,
 16 | 		Id:   id,
 17 | 	}
 18 | }
 19 | 
 20 | // Check marks a pattern to be checked with the given checker.
 21 | func Check(p Pattern, c isa.Checker) Pattern {
 22 | 	return &CheckNode{
 23 | 		Patt:    p,
 24 | 		Checker: c,
 25 | 	}
 26 | }
 27 | 
 28 | func CheckFlags(p Pattern, c isa.Checker, id, flag int) Pattern {
 29 | 	return &CheckNode{
 30 | 		Patt:    p,
 31 | 		Checker: c,
 32 | 		Id:      id,
 33 | 		Flag:    flag,
 34 | 	}
 35 | }
 36 | 
 37 | var memoId = 0
 38 | 
 39 | // MemoId marks a pattern as memoizable with a particular ID.
 40 | func MemoId(p Pattern, id int) Pattern {
 41 | 	m := &MemoNode{
 42 | 		Patt: p,
 43 | 		Id:   id,
 44 | 	}
 45 | 	memoId = max(memoId, id) + 1
 46 | 	return m
 47 | }
 48 | 
 49 | // Memo marks a pattern as memoizable.
 50 | func Memo(p Pattern) Pattern {
 51 | 	m := &MemoNode{
 52 | 		Patt: p,
 53 | 		Id:   memoId,
 54 | 	}
 55 | 	memoId++
 56 | 	return m
 57 | }
 58 | 
 59 | // Literal matches a given string literal.
 60 | func Literal(s string) Pattern {
 61 | 	return &LiteralNode{
 62 | 		Str: s,
 63 | 	}
 64 | }
 65 | 
 66 | // Set matches any character in the given set.
 67 | func Set(chars charset.Set) Pattern {
 68 | 	return &ClassNode{
 69 | 		Chars: chars,
 70 | 	}
 71 | }
 72 | 
 73 | // Any consumes n characters, and only fails if there
 74 | // aren't enough input characters left.
 75 | func Any(n uint8) Pattern {
 76 | 	return &DotNode{
 77 | 		N: n,
 78 | 	}
 79 | }
 80 | 
 81 | // Repeat matches p exactly n times
 82 | func Repeat(p Pattern, n int) Pattern {
 83 | 	if n <= 0 {
 84 | 		return &EmptyNode{}
 85 | 	}
 86 | 
 87 | 	acc := p
 88 | 	for i := 1; i < n; i++ {
 89 | 		acc = &SeqNode{
 90 | 			Left:  acc,
 91 | 			Right: p,
 92 | 		}
 93 | 	}
 94 | 	return acc
 95 | }
 96 | 
 97 | // Concat concatenates n patterns: `p1 p2 p3...`.
 98 | func Concat(patts ...Pattern) Pattern {
 99 | 	if len(patts) <= 0 {
100 | 		return &EmptyNode{}
101 | 	}
102 | 
103 | 	acc := patts[0]
104 | 	for _, p := range patts[1:] {
105 | 		acc = &SeqNode{
106 | 			Left:  acc,
107 | 			Right: p,
108 | 		}
109 | 	}
110 | 
111 | 	return acc
112 | }
113 | 
114 | // Or returns the ordered choice between n patterns: `p1 / p2 / p3...`.
115 | func Or(patts ...Pattern) Pattern {
116 | 	if len(patts) <= 0 {
117 | 		return &EmptyNode{}
118 | 	}
119 | 
120 | 	// optimization: make or right associative
121 | 	acc := patts[len(patts)-1]
122 | 	for i := len(patts) - 2; i >= 0; i-- {
123 | 		acc = &AltNode{
124 | 			Left:  patts[i],
125 | 			Right: acc,
126 | 		}
127 | 	}
128 | 
129 | 	return acc
130 | }
131 | 
132 | // Star returns the Kleene star repetition of a pattern: `p*`.
133 | // This matches zero or more occurrences of p.
134 | func Star(p Pattern) Pattern {
135 | 	return &StarNode{
136 | 		Patt: p,
137 | 	}
138 | }
139 | 
140 | // Plus returns the Kleene plus repetition of a pattern: `p+`.
141 | // This matches one or more occurrences of p.
142 | func Plus(p Pattern) Pattern {
143 | 	return &PlusNode{
144 | 		Patt: p,
145 | 	}
146 | }
147 | 
148 | // Optional matches at most 1 occurrence of p: `p?`.
149 | func Optional(p Pattern) Pattern {
150 | 	return &OptionalNode{
151 | 		Patt: p,
152 | 	}
153 | }
154 | 
155 | // Not returns the not predicate applied to a pattern: `!p`.
156 | // The not predicate succeeds if matching `p` at the current position
157 | // fails, and does not consume any input.
158 | func Not(p Pattern) Pattern {
159 | 	return &NotNode{
160 | 		Patt: p,
161 | 	}
162 | }
163 | 
164 | // And returns the and predicate applied to a pattern: `&p`.
165 | // The and predicate succeeds if matching `p` at the current position
166 | // succeeds and does not consume any input.
167 | // This is equivalent to `!!p`.
168 | func And(p Pattern) Pattern {
169 | 	return &AndNode{
170 | 		Patt: p,
171 | 	}
172 | }
173 | 
174 | // Search is a dedicated operator for creating searches. It will match
175 | // the first occurrence of the given pattern. Use Star(Search(p)) to match
176 | // the last occurrence (for a non-overlapping pattern).
177 | func Search(p Pattern) Pattern {
178 | 	return &SearchNode{
179 | 		Patt: p,
180 | 	}
181 | }
182 | 
183 | func EmptyOp(op syntax.EmptyOp) Pattern {
184 | 	return &EmptyOpNode{
185 | 		Op: op,
186 | 	}
187 | }
188 | 
189 | // NonTerm builds an unresolved non-terminal with a given name.
190 | // NonTerms should be used together with `Grammar` to build a recursive
191 | // grammar.
192 | func NonTerm(name string) Pattern {
193 | 	return &NonTermNode{
194 | 		Name: name,
195 | 	}
196 | }
197 | 
198 | // Grammar builds a grammar from a map of non-terminal patterns.
199 | // Any unresolved non-terminals are resolved with their definitions
200 | // in the map.
201 | func Grammar(start string, nonterms map[string]Pattern) Pattern {
202 | 	return &GrammarNode{
203 | 		Defs:  nonterms,
204 | 		Start: start,
205 | 	}
206 | }
207 | 
208 | // CapGrammar builds a grammar, but all values are automatically captured. The
209 | // capture IDs are returned in the 'ids' map.
210 | func CapGrammar(start string, nonterms map[string]Pattern, ids map[string]int) Pattern {
211 | 	m := make(map[string]Pattern)
212 | 	id := 0
213 | 	for k, v := range nonterms {
214 | 		m[k] = Cap(v, id)
215 | 		ids[k] = id
216 | 		id++
217 | 	}
218 | 	return Grammar(start, m)
219 | }
220 | 
221 | // Error is a pattern that throws an error with the given message.
222 | func Error(msg string, recovery Pattern) Pattern {
223 | 	return &ErrorNode{
224 | 		Message: msg,
225 | 		Recover: recovery,
226 | 	}
227 | }
228 | 
229 | func max(a, b int) int {
230 | 	if a > b {
231 | 		return a
232 | 	}
233 | 	return b
234 | }
235 | 


--------------------------------------------------------------------------------
/pattern/string.go:
--------------------------------------------------------------------------------
 1 | package pattern
 2 | 
 3 | import (
 4 | 	"fmt"
 5 | 	"strconv"
 6 | )
 7 | 
 8 | func Prettify(p Pattern) string {
 9 | 	switch t := Get(p).(type) {
10 | 	case *LiteralNode:
11 | 		return strconv.Quote(t.Str)
12 | 	case *ClassNode:
13 | 		return fmt.Sprintf("[%s]", t.Chars.String())
14 | 	case *DotNode:
15 | 		return "."
16 | 	case *EmptyNode:
17 | 		return "\"\""
18 | 	case *AltNode:
19 | 		return fmt.Sprintf("(%s / %s)", Prettify(Get(t.Left)), Prettify(Get(t.Right)))
20 | 	case *SeqNode:
21 | 		return fmt.Sprintf("(%s %s)", Prettify(Get(t.Left)), Prettify(Get(t.Right)))
22 | 	case *StarNode:
23 | 		return fmt.Sprintf("%s*", Prettify(Get(t.Patt)))
24 | 	case *PlusNode:
25 | 		return fmt.Sprintf("%s+", Prettify(Get(t.Patt)))
26 | 	case *OptionalNode:
27 | 		return fmt.Sprintf("%s?", Prettify(Get(t.Patt)))
28 | 	case *NotNode:
29 | 		return fmt.Sprintf("!%s", Prettify(Get(t.Patt)))
30 | 	case *AndNode:
31 | 		return fmt.Sprintf("&%s", Prettify(Get(t.Patt)))
32 | 	case *CapNode:
33 | 		return fmt.Sprintf("{ %s }", Prettify(Get(t.Patt)))
34 | 	case *MemoNode:
35 | 		return fmt.Sprintf("{{ %s }}", Prettify(Get(t.Patt)))
36 | 	case *SearchNode:
37 | 		return fmt.Sprintf("search(%s)", Prettify(Get(t.Patt)))
38 | 	case *CheckNode:
39 | 		return fmt.Sprintf("check(%s)", Prettify(Get(t.Patt)))
40 | 	case *ErrorNode:
41 | 		return fmt.Sprintf("err(%s, %s)", t.Message, Prettify(Get(t.Recover)))
42 | 	case *EmptyOpNode:
43 | 		return fmt.Sprintf("empty(%v)", t.Op)
44 | 	case *GrammarNode:
45 | 		s := fmt.Sprintf("%s\n", t.Start)
46 | 		t.Inline()
47 | 		for name, patt := range t.Defs {
48 | 			s += fmt.Sprintf("%s <- %s\n", name, Prettify(Get(patt)))
49 | 		}
50 | 		return s
51 | 	case *NonTermNode:
52 | 		if t.Inlined != nil {
53 | 			return Prettify(Get(t.Inlined))
54 | 		}
55 | 		return t.Name
56 | 	}
57 | 
58 | 	return "<invalid>"
59 | }
60 | 


--------------------------------------------------------------------------------
/re/grammar.go:
--------------------------------------------------------------------------------
  1 | package re
  2 | 
  3 | import (
  4 | 	"github.com/zyedidia/gpeg/charset"
  5 | 	p "github.com/zyedidia/gpeg/pattern"
  6 | )
  7 | 
  8 | // Pattern    <- Spacing_ (Grammar / Expression) EndOfFile_
  9 | // Grammar    <- Definition+
 10 | // Definition <- Identifier LEFTARROW Expression
 11 | //
 12 | // Expression <- Sequence (SLASH Sequence)*
 13 | // Sequence   <- Prefix*
 14 | // Prefix     <- (AND / NOT)? Suffix
 15 | // Suffix     <- Primary (QUESTION / STAR / PLUS)?
 16 | // Primary    <- Identifier !LEFTARROW
 17 | // 			/ '(' Expression ')'
 18 | // 			/ Literal / Class
 19 | // 			/ BRACEPO Expression BRACEPC
 20 | // 			/ BRACEO Expression BRACEC
 21 | // 			/ DOT
 22 | //
 23 | // Identifier <- IdentStart IdentCont* Spacing_
 24 | // IdentStart <- [a-zA-Z_]
 25 | // IdentCont  <- IdentStart / [0-9]
 26 | //
 27 | // Literal    <- ['] (!['] Char)* ['] Spacing_
 28 | // 			/ ["] (!["] Char)* ["] Spacing_
 29 | // Class      <- '[' CARAT? (!']' Range)* ']' Spacing_
 30 | // Range      <- Char '-' Char / Char
 31 | // Char       <- '\\' [nrt'"\[\]\\\-]
 32 | // 			/ '\\' [0-2][0-7][0-7]
 33 | // 			/ '\\' [0-7][0-7]?
 34 | // 			/ !'\\' .
 35 | //
 36 | // AND        <- '&' Spacing_
 37 | // NOT        <- '!' Spacing_
 38 | // QUESTION   <- '?' Spacing_
 39 | // STAR       <- '*' Spacing_
 40 | // PLUS       <- '+' Spacing_
 41 | // DOT        <- '.' Spacing_
 42 | // CARAT      <- '^' Spacing_
 43 | // BRACEO     <- '{' Spacing_
 44 | // BRACEC     <- '}' Spacing_
 45 | // BRACEPO    <- '{{' Spacing_
 46 | // BRACEPC    <- '}}' Spacing_
 47 | // LEFTARROW  <- '<-' Spacing_
 48 | // OPEN       <- '(' Spacing_
 49 | // CLOSE      <- ')' Spacing_
 50 | // SLASH      <- '/' Spacing_
 51 | //
 52 | // Spacing_   <- (Space_ / Comment_)*
 53 | // Comment_   <- '#' (!EndOfLine_ .)* EndOfLine_
 54 | // Space_     <- ' ' / '\t' / EndOfLine_
 55 | // EndOfLine_ <- '\r\n' / '\n' / '\r'
 56 | // EndOfFile_ <- !.
 57 | 
 58 | const (
 59 | 	idPattern = iota
 60 | 	idGrammar
 61 | 	idDefinition
 62 | 	idExpression
 63 | 	idSequence
 64 | 	idPrefix
 65 | 	idSuffix
 66 | 	idPrimary
 67 | 	idLiteral
 68 | 	idRange
 69 | 	idClass
 70 | 	idIdentifier
 71 | 	idIdentStart
 72 | 	idIdentCont
 73 | 	idChar
 74 | 	idAND
 75 | 	idNOT
 76 | 	idQUESTION
 77 | 	idSTAR
 78 | 	idPLUS
 79 | 	idDOT
 80 | 	idCARAT
 81 | 	idOPEN
 82 | 	idBRACEO
 83 | 	idBRACEPO
 84 | )
 85 | 
 86 | var grammar = map[string]p.Pattern{
 87 | 	"Pattern": p.Cap(p.Concat(
 88 | 		p.NonTerm("Spacing"),
 89 | 		p.Or(
 90 | 			p.NonTerm("Grammar"),
 91 | 			p.NonTerm("Expression"),
 92 | 		),
 93 | 		p.NonTerm("EndOfFile"),
 94 | 	), idPattern),
 95 | 	"Grammar": p.Cap(p.Plus(p.NonTerm("Definition")), idGrammar),
 96 | 	"Definition": p.Cap(p.Concat(
 97 | 		p.NonTerm("Identifier"),
 98 | 		p.NonTerm("LEFTARROW"),
 99 | 		p.NonTerm("Expression"),
100 | 	), idDefinition),
101 | 
102 | 	"Expression": p.Cap(p.Concat(
103 | 		p.NonTerm("Sequence"),
104 | 		p.Star(p.Concat(
105 | 			p.NonTerm("SLASH"),
106 | 			p.NonTerm("Sequence"),
107 | 		)),
108 | 	), idExpression),
109 | 	"Sequence": p.Cap(p.Star(p.NonTerm("Prefix")), idSequence),
110 | 	"Prefix": p.Cap(p.Concat(
111 | 		p.Optional(p.Or(
112 | 			p.NonTerm("AND"),
113 | 			p.NonTerm("NOT"),
114 | 		)),
115 | 		p.NonTerm("Suffix"),
116 | 	), idPrefix),
117 | 	"Suffix": p.Cap(p.Concat(
118 | 		p.NonTerm("Primary"),
119 | 		p.Optional(p.Or(
120 | 			p.NonTerm("QUESTION"),
121 | 			p.NonTerm("STAR"),
122 | 			p.NonTerm("PLUS"),
123 | 		)),
124 | 	), idSuffix),
125 | 	"Primary": p.Cap(p.Or(
126 | 		p.Concat(
127 | 			p.NonTerm("Identifier"),
128 | 			p.Not(p.NonTerm("LEFTARROW")),
129 | 		),
130 | 		p.Concat(
131 | 			p.NonTerm("OPEN"),
132 | 			p.NonTerm("Expression"),
133 | 			p.NonTerm("CLOSE"),
134 | 		),
135 | 		p.Concat(
136 | 			p.NonTerm("BRACEPO"),
137 | 			p.NonTerm("Expression"),
138 | 			p.NonTerm("BRACEPC"),
139 | 		),
140 | 		p.Concat(
141 | 			p.NonTerm("BRACEO"),
142 | 			p.NonTerm("Expression"),
143 | 			p.NonTerm("BRACEC"),
144 | 		),
145 | 		p.NonTerm("Literal"),
146 | 		p.NonTerm("Class"),
147 | 		p.NonTerm("DOT"),
148 | 	), idPrimary),
149 | 
150 | 	"Identifier": p.Cap(p.Concat(
151 | 		p.NonTerm("IdentStart"),
152 | 		p.Star(p.NonTerm("IdentCont")),
153 | 		p.NonTerm("Spacing"),
154 | 	), idIdentifier),
155 | 	"IdentStart": p.Cap(
156 | 		p.Set(charset.Range('a', 'z').
157 | 			Add(charset.Range('A', 'Z')).
158 | 			Add(charset.New([]byte{'_'})),
159 | 		), idIdentStart),
160 | 	"IdentCont": p.Cap(p.Or(
161 | 		p.NonTerm("IdentStart"),
162 | 		p.Set(charset.Range('0', '9')),
163 | 	), idIdentCont),
164 | 
165 | 	"Literal": p.Cap(p.Or(
166 | 		p.Concat(
167 | 			p.Literal("'"),
168 | 			p.Star(p.Concat(
169 | 				p.Not(p.Literal("'")),
170 | 				p.NonTerm("Char"),
171 | 			)),
172 | 			p.Literal("'"),
173 | 			p.NonTerm("Spacing"),
174 | 		),
175 | 		p.Concat(
176 | 			p.Literal("\""),
177 | 			p.Star(p.Concat(
178 | 				p.Not(p.Literal("\"")),
179 | 				p.NonTerm("Char"),
180 | 			)),
181 | 			p.Literal("\""),
182 | 			p.NonTerm("Spacing"),
183 | 		),
184 | 	), idLiteral),
185 | 	"Class": p.Cap(p.Concat(
186 | 		p.Literal("["),
187 | 		p.Optional(p.NonTerm("CARAT")),
188 | 		p.Star(p.Concat(
189 | 			p.Not(p.Literal("]")),
190 | 			p.NonTerm("Range"),
191 | 		)),
192 | 		p.Literal("]"),
193 | 		p.NonTerm("Spacing"),
194 | 	), idClass),
195 | 	"Range": p.Cap(p.Or(
196 | 		p.Concat(
197 | 			p.NonTerm("Char"),
198 | 			p.Literal("-"),
199 | 			p.NonTerm("Char"),
200 | 		),
201 | 		p.NonTerm("Char"),
202 | 	), idRange),
203 | 	"Char": p.Cap(p.Or(
204 | 		p.Concat(
205 | 			p.Literal("\\"),
206 | 			p.Set(charset.New([]byte{'n', 'r', 't', '\'', '"', '[', ']', '\\', '-'})),
207 | 		),
208 | 		p.Concat(
209 | 			p.Literal("\\"),
210 | 			p.Set(charset.Range('0', '2')),
211 | 			p.Set(charset.Range('0', '7')),
212 | 			p.Set(charset.Range('0', '7')),
213 | 		),
214 | 		p.Concat(
215 | 			p.Literal("\\"),
216 | 			p.Set(charset.Range('0', '7')),
217 | 			p.Optional(p.Set(charset.Range('0', '7'))),
218 | 		),
219 | 		p.Concat(
220 | 			p.Not(p.Literal("\\")),
221 | 			p.Any(1),
222 | 		),
223 | 	), idChar),
224 | 
225 | 	"AND": p.Cap(p.Concat(
226 | 		p.Literal("&"),
227 | 		p.NonTerm("Spacing"),
228 | 	), idAND),
229 | 	"NOT": p.Cap(p.Concat(
230 | 		p.Literal("!"),
231 | 		p.NonTerm("Spacing"),
232 | 	), idNOT),
233 | 	"QUESTION": p.Cap(p.Concat(
234 | 		p.Literal("?"),
235 | 		p.NonTerm("Spacing"),
236 | 	), idQUESTION),
237 | 	"STAR": p.Cap(p.Concat(
238 | 		p.Literal("*"),
239 | 		p.NonTerm("Spacing"),
240 | 	), idSTAR),
241 | 	"PLUS": p.Cap(p.Concat(
242 | 		p.Literal("+"),
243 | 		p.NonTerm("Spacing"),
244 | 	), idPLUS),
245 | 	"DOT": p.Cap(p.Concat(
246 | 		p.Literal("."),
247 | 		p.NonTerm("Spacing"),
248 | 	), idDOT),
249 | 	"CARAT": p.Cap(p.Concat(
250 | 		p.Literal("^"),
251 | 		p.NonTerm("Spacing"),
252 | 	), idCARAT),
253 | 	"OPEN": p.Cap(p.Concat(
254 | 		p.Literal("("),
255 | 		p.NonTerm("Spacing"),
256 | 	), idOPEN),
257 | 	"CLOSE": p.Concat(
258 | 		p.Literal(")"),
259 | 		p.NonTerm("Spacing"),
260 | 	),
261 | 	"BRACEO": p.Cap(p.Concat(
262 | 		p.Literal("{"),
263 | 		p.NonTerm("Spacing"),
264 | 	), idBRACEO),
265 | 	"BRACEC": p.Concat(
266 | 		p.Literal("}"),
267 | 		p.NonTerm("Spacing"),
268 | 	),
269 | 	"BRACEPO": p.Cap(p.Concat(
270 | 		p.Literal("{{"),
271 | 		p.NonTerm("Spacing"),
272 | 	), idBRACEPO),
273 | 	"BRACEPC": p.Concat(
274 | 		p.Literal("}}"),
275 | 		p.NonTerm("Spacing"),
276 | 	),
277 | 	"SLASH": p.Concat(
278 | 		p.Literal("/"),
279 | 		p.NonTerm("Spacing"),
280 | 	),
281 | 	"LEFTARROW": p.Concat(
282 | 		p.Literal("<-"),
283 | 		p.NonTerm("Spacing"),
284 | 	),
285 | 
286 | 	"Spacing": p.Star(p.Or(
287 | 		p.NonTerm("Space"),
288 | 		p.NonTerm("Comment"),
289 | 	)),
290 | 	"Comment": p.Concat(
291 | 		p.Literal("#"),
292 | 		p.Star(p.Concat(
293 | 			p.Not(p.NonTerm("EndOfLine")),
294 | 			p.Any(1),
295 | 		)),
296 | 		p.NonTerm("EndOfLine"),
297 | 	),
298 | 	"Space": p.Or(
299 | 		p.Set(charset.New([]byte{' ', '\t'})),
300 | 		p.NonTerm("EndOfLine"),
301 | 	),
302 | 	"EndOfLine": p.Or(
303 | 		p.Literal("\r\n"),
304 | 		p.Literal("\n"),
305 | 		p.Literal("\r"),
306 | 	),
307 | 	"EndOfFile": p.Not(p.Any(1)),
308 | }
309 | 


--------------------------------------------------------------------------------
/re/re.go:
--------------------------------------------------------------------------------
  1 | // Package re provides functions for compiling 're' patterns (given as strings)
  2 | // into standard patterns.
  3 | package re
  4 | 
  5 | import (
  6 | 	"bytes"
  7 | 	"fmt"
  8 | 	"strconv"
  9 | 	"strings"
 10 | 
 11 | 	"github.com/zyedidia/gpeg/charset"
 12 | 	"github.com/zyedidia/gpeg/memo"
 13 | 	"github.com/zyedidia/gpeg/pattern"
 14 | 	"github.com/zyedidia/gpeg/vm"
 15 | )
 16 | 
 17 | var parser vm.Code
 18 | 
 19 | func init() {
 20 | 	prog := pattern.MustCompile(pattern.Grammar("Pattern", grammar))
 21 | 	parser = vm.Encode(prog)
 22 | }
 23 | 
 24 | func compile(root *memo.Capture, s string, capg bool, ids map[string]int) pattern.Pattern {
 25 | 	var p pattern.Pattern
 26 | 	switch root.Id() {
 27 | 	case idPattern:
 28 | 		p = compile(root.Child(0), s, capg, ids)
 29 | 	case idGrammar:
 30 | 		nonterms := make(map[string]pattern.Pattern)
 31 | 		var first string
 32 | 		it := root.ChildIterator(0)
 33 | 		for c := it(); c != nil; c = it() {
 34 | 			k, v := compileDef(c, s, capg, ids)
 35 | 			if first == "" {
 36 | 				first = k
 37 | 			}
 38 | 			nonterms[k] = v
 39 | 		}
 40 | 		if capg {
 41 | 			p = pattern.CapGrammar(first, nonterms, ids)
 42 | 		} else {
 43 | 			p = pattern.Grammar(first, nonterms)
 44 | 		}
 45 | 	case idExpression:
 46 | 		alternations := make([]pattern.Pattern, 0, root.NumChildren())
 47 | 		it := root.ChildIterator(0)
 48 | 		for c := it(); c != nil; c = it() {
 49 | 			alternations = append(alternations, compile(c, s, capg, ids))
 50 | 		}
 51 | 		p = pattern.Or(alternations...)
 52 | 	case idSequence:
 53 | 		concats := make([]pattern.Pattern, 0, root.NumChildren())
 54 | 		it := root.ChildIterator(0)
 55 | 		for c := it(); c != nil; c = it() {
 56 | 			concats = append(concats, compile(c, s, capg, ids))
 57 | 		}
 58 | 		p = pattern.Concat(concats...)
 59 | 	case idPrefix:
 60 | 		c := root.Child(0)
 61 | 		switch c.Id() {
 62 | 		case idAND:
 63 | 			p = pattern.And(compile(root.Child(1), s, capg, ids))
 64 | 		case idNOT:
 65 | 			p = pattern.Not(compile(root.Child(1), s, capg, ids))
 66 | 		default:
 67 | 			p = compile(root.Child(0), s, capg, ids)
 68 | 		}
 69 | 	case idSuffix:
 70 | 		if root.NumChildren() == 2 {
 71 | 			c := root.Child(1)
 72 | 			switch c.Id() {
 73 | 			case idQUESTION:
 74 | 				p = pattern.Optional(compile(root.Child(0), s, capg, ids))
 75 | 			case idSTAR:
 76 | 				p = pattern.Star(compile(root.Child(0), s, capg, ids))
 77 | 			case idPLUS:
 78 | 				p = pattern.Plus(compile(root.Child(0), s, capg, ids))
 79 | 			}
 80 | 		} else {
 81 | 			p = compile(root.Child(0), s, capg, ids)
 82 | 		}
 83 | 	case idPrimary:
 84 | 		switch root.Child(0).Id() {
 85 | 		case idIdentifier, idLiteral, idClass:
 86 | 			p = compile(root.Child(0), s, capg, ids)
 87 | 		case idOPEN:
 88 | 			p = compile(root.Child(1), s, capg, ids)
 89 | 		case idBRACEPO:
 90 | 			p = pattern.Memo(compile(root.Child(1), s, capg, ids))
 91 | 		case idDOT:
 92 | 			p = pattern.Any(1)
 93 | 		}
 94 | 	case idLiteral:
 95 | 		lit := &bytes.Buffer{}
 96 | 		it := root.ChildIterator(0)
 97 | 		for c := it(); c != nil; c = it() {
 98 | 			lit.WriteByte(parseChar(s[c.Start():c.End()]))
 99 | 		}
100 | 		p = pattern.Literal(lit.String())
101 | 	case idClass:
102 | 		var set charset.Set
103 | 		if root.NumChildren() <= 0 {
104 | 			break
105 | 		}
106 | 		complement := false
107 | 		if root.Child(0).Id() == idCARAT {
108 | 			complement = true
109 | 		}
110 | 		it := root.ChildIterator(0)
111 | 		i := 0
112 | 		for c := it(); c != nil; c = it() {
113 | 			if i == 0 && complement {
114 | 				i++
115 | 				continue
116 | 			}
117 | 			set = set.Add(compileSet(c, s))
118 | 		}
119 | 		if complement {
120 | 			set = set.Complement()
121 | 		}
122 | 		p = pattern.Set(set)
123 | 	case idIdentifier:
124 | 		p = pattern.NonTerm(parseId(root, s))
125 | 	}
126 | 	return p
127 | }
128 | 
129 | var special = map[byte]byte{
130 | 	'n':  '\n',
131 | 	'r':  '\r',
132 | 	't':  '\t',
133 | 	'\'': '\'',
134 | 	'"':  '"',
135 | 	'[':  '[',
136 | 	']':  ']',
137 | 	'\\': '\\',
138 | 	'-':  '-',
139 | }
140 | 
141 | func parseChar(char string) byte {
142 | 	switch char[0] {
143 | 	case '\\':
144 | 		for k, v := range special {
145 | 			if char[1] == k {
146 | 				return v
147 | 			}
148 | 		}
149 | 
150 | 		i, _ := strconv.ParseInt(string(char[1:]), 8, 8)
151 | 		return byte(i)
152 | 	default:
153 | 		return char[0]
154 | 	}
155 | }
156 | 
157 | func parseId(root *memo.Capture, s string) string {
158 | 	ident := &bytes.Buffer{}
159 | 	it := root.ChildIterator(0)
160 | 	for c := it(); c != nil; c = it() {
161 | 		ident.WriteString(s[c.Start():c.End()])
162 | 	}
163 | 	return ident.String()
164 | }
165 | 
166 | func compileDef(root *memo.Capture, s string, capg bool, ids map[string]int) (string, pattern.Pattern) {
167 | 	id := root.Child(0)
168 | 	exp := root.Child(1)
169 | 	return parseId(id, s), compile(exp, s, capg, ids)
170 | }
171 | 
172 | func compileSet(root *memo.Capture, s string) charset.Set {
173 | 	switch root.NumChildren() {
174 | 	case 1:
175 | 		c := root.Child(0)
176 | 		return charset.New([]byte{parseChar(s[c.Start():c.End()])})
177 | 	case 2:
178 | 		c1, c2 := root.Child(0), root.Child(1)
179 | 		return charset.Range(parseChar(s[c1.Start():c1.End()]), parseChar(s[c2.Start():c2.End()]))
180 | 	}
181 | 	return charset.Set{}
182 | }
183 | 
184 | func Compile(s string) (pattern.Pattern, error) {
185 | 	match, n, ast, errs := parser.Exec(strings.NewReader(s), memo.NoneTable{})
186 | 	if len(errs) != 0 {
187 | 		return nil, errs[0]
188 | 	}
189 | 	if !match {
190 | 		return nil, fmt.Errorf("Invalid PEG: failed at %d", n)
191 | 	}
192 | 
193 | 	return compile(ast.Child(0), s, false, nil), nil
194 | }
195 | 
196 | func MustCompile(s string) pattern.Pattern {
197 | 	p, err := Compile(s)
198 | 	if err != nil {
199 | 		panic(err)
200 | 	}
201 | 	return p
202 | }
203 | 
204 | func CompileCap(s string, ids map[string]int) (pattern.Pattern, error) {
205 | 	match, n, ast, errs := parser.Exec(strings.NewReader(s), memo.NoneTable{})
206 | 	if len(errs) != 0 {
207 | 		return nil, errs[0]
208 | 	}
209 | 	if !match {
210 | 		return nil, fmt.Errorf("Invalid PEG: failed at %d", n)
211 | 	}
212 | 
213 | 	return compile(ast.Child(0), s, true, ids), nil
214 | }
215 | 
216 | func MustCompileCap(s string, ids map[string]int) pattern.Pattern {
217 | 	p, err := CompileCap(s, ids)
218 | 	if err != nil {
219 | 		panic(err)
220 | 	}
221 | 	return p
222 | }
223 | 


--------------------------------------------------------------------------------
/re_test.go:
--------------------------------------------------------------------------------
 1 | package gpeg
 2 | 
 3 | import (
 4 | 	"io/ioutil"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/zyedidia/gpeg/re"
 8 | )
 9 | 
10 | func TestRe(t *testing.T) {
11 | 	p := re.MustCompile("ID <- [a-zA-Z][a-zA-Z0-9_]*")
12 | 	tests := []PatternTest{
13 | 		{"hello", 5},
14 | 		{"test_1", 6},
15 | 		{"_not_allowed", -1},
16 | 		{"123", -1},
17 | 	}
18 | 	check(p, tests, t)
19 | }
20 | 
21 | func TestReExtra(t *testing.T) {
22 | 	p := re.MustCompile("[^a-zA-Z]*")
23 | 	tests := []PatternTest{
24 | 		{"hello", 0},
25 | 		{"123", 3},
26 | 		{"_*&##@0abc", 7},
27 | 	}
28 | 	check(p, tests, t)
29 | }
30 | 
31 | func TestJson(t *testing.T) {
32 | 	peg, err := ioutil.ReadFile("grammars/json.peg")
33 | 	if err != nil {
34 | 		t.Error(err)
35 | 	}
36 | 	p := re.MustCompile(string(peg))
37 | 
38 | 	json, err := ioutil.ReadFile("testdata/test.json")
39 | 	if err != nil {
40 | 		t.Error(err)
41 | 	}
42 | 
43 | 	tests := []PatternTest{
44 | 		{string(json), len(json)},
45 | 	}
46 | 
47 | 	check(p, tests, t)
48 | }
49 | 
50 | func TestJava(t *testing.T) {
51 | 	peg, err := ioutil.ReadFile("grammars/java.peg")
52 | 	if err != nil {
53 | 		t.Error(err)
54 | 	}
55 | 	p := re.MustCompile(string(peg))
56 | 
57 | 	java, err := ioutil.ReadFile("testdata/test.java")
58 | 	if err != nil {
59 | 		t.Error(err)
60 | 	}
61 | 
62 | 	tests := []PatternTest{
63 | 		{string(java), len(java)},
64 | 	}
65 | 
66 | 	check(p, tests, t)
67 | }
68 | 


--------------------------------------------------------------------------------
/recover_test.go:
--------------------------------------------------------------------------------
 1 | package gpeg
 2 | 
 3 | import (
 4 | 	"strings"
 5 | 	"testing"
 6 | 
 7 | 	"github.com/zyedidia/gpeg/charset"
 8 | 	"github.com/zyedidia/gpeg/memo"
 9 | 	. "github.com/zyedidia/gpeg/pattern"
10 | 	"github.com/zyedidia/gpeg/vm"
11 | )
12 | 
13 | func sync(p Pattern) Pattern {
14 | 	return Star(Concat(Not(p), Any(1)))
15 | }
16 | 
17 | func TestRecover(t *testing.T) {
18 | 	id := Plus(Set(charset.Range('a', 'z')))
19 | 	p := Grammar("S", map[string]Pattern{
20 | 		"S": Or(NonTerm("List"), Concat(Any(1), Error("expecting a list of identifiers", NonTerm("ErrList")))),
21 | 		"List": Concat(
22 | 			NonTerm("Id"),
23 | 			Star(Concat(And(Any(1)),
24 | 				NonTerm("Comma"),
25 | 				Or(NonTerm("Id"),
26 | 					Error("expecting an identifier", NonTerm("ErrId")))),
27 | 			),
28 | 		),
29 | 		"Id":       Concat(NonTerm("Sp"), id),
30 | 		"Comma":    Or(Concat(NonTerm("Sp"), Literal(",")), Error("expecting ','", NonTerm("ErrComma"))),
31 | 		"Sp":       Star(Set(charset.New([]byte{' ', '\n', '\t'}))),
32 | 		"ErrId":    sync(Literal(",")),
33 | 		"ErrComma": sync(id),
34 | 		"ErrList":  sync(Not(Any(1))),
35 | 	})
36 | 
37 | 	peg := MustCompile(p)
38 | 	code := vm.Encode(peg)
39 | 	in := strings.NewReader("one two three,")
40 | 	_, _, _, errs := code.Exec(in, memo.NoneTable{})
41 | 
42 | 	if len(errs) != 3 {
43 | 		t.Error("Incorrect list of errors:", errs)
44 | 	}
45 | }
46 | 


--------------------------------------------------------------------------------
/rxconv/rxconv.go:
--------------------------------------------------------------------------------
  1 | // Package rxconv provides functions to convert a Go regexp into a PEG so that
  2 | // it can be used for incremental parsing.
  3 | package rxconv
  4 | 
  5 | import (
  6 | 	"fmt"
  7 | 	"regexp/syntax"
  8 | 	"strconv"
  9 | 
 10 | 	"github.com/zyedidia/gpeg/charset"
 11 | 	p "github.com/zyedidia/gpeg/pattern"
 12 | )
 13 | 
 14 | var num = 0
 15 | 
 16 | func uniq() string {
 17 | 	num++
 18 | 	return "a" + strconv.Itoa(num)
 19 | }
 20 | 
 21 | func star(r *syntax.Regexp, k p.Pattern) p.Pattern {
 22 | 	nterm := uniq()
 23 | 	nonterms := make(map[string]p.Pattern)
 24 | 	nonterms[nterm] = p.Or(pi(r, p.NonTerm(nterm)), k)
 25 | 	return p.Grammar(nterm, nonterms)
 26 | }
 27 | 
 28 | // continuation-based conversion
 29 | func pi(e *syntax.Regexp, k p.Pattern) p.Pattern {
 30 | 	switch e.Op {
 31 | 	case syntax.OpEmptyMatch:
 32 | 		return k
 33 | 	case syntax.OpLiteral:
 34 | 		return p.Concat(p.Literal(string(e.Rune)), k)
 35 | 	case syntax.OpCharClass:
 36 | 		lits := make([]p.Pattern, 0, len(e.Rune))
 37 | 		for i := 0; i < len(e.Rune); i += 2 {
 38 | 			start := e.Rune[i]
 39 | 			end := e.Rune[i+1]
 40 | 			var patt p.Pattern
 41 | 			if start < 256 && end < 256 {
 42 | 				patt = p.Set(charset.Range(byte(start), byte(end)))
 43 | 				lits = append(lits, p.Concat(patt, k))
 44 | 			} else {
 45 | 				for ; start <= end; start++ {
 46 | 					lits = append(lits, p.Concat(p.Literal(string(start)), k))
 47 | 				}
 48 | 			}
 49 | 		}
 50 | 		return p.Or(lits...)
 51 | 	case syntax.OpAnyChar:
 52 | 		// TODO: unicode
 53 | 		return p.Concat(p.Any(1), k)
 54 | 	case syntax.OpAnyCharNotNL:
 55 | 		return p.Concat(p.Set(charset.New([]byte{'\n'}).Complement()), k)
 56 | 	case syntax.OpConcat:
 57 | 		patt := k
 58 | 		for i := len(e.Sub) - 1; i >= 0; i-- {
 59 | 			patt = pi(e.Sub[i], patt)
 60 | 		}
 61 | 		return patt
 62 | 	case syntax.OpAlternate:
 63 | 		alts := make([]p.Pattern, 0, len(e.Sub))
 64 | 		for _, s := range e.Sub {
 65 | 			alts = append(alts, pi(s, k))
 66 | 		}
 67 | 		return p.Or(alts...)
 68 | 	case syntax.OpCapture:
 69 | 		return pi(e.Sub[0], k)
 70 | 	case syntax.OpStar:
 71 | 		return star(e.Sub[0], k)
 72 | 	case syntax.OpPlus:
 73 | 		return pi(e.Sub[0], star(e.Sub[0], k))
 74 | 	case syntax.OpQuest:
 75 | 		return p.Or(pi(e.Sub[0], k), k)
 76 | 	case syntax.OpBeginLine:
 77 | 		return p.Concat(p.EmptyOp(syntax.EmptyBeginLine), k)
 78 | 	case syntax.OpEndLine:
 79 | 		return p.Concat(p.EmptyOp(syntax.EmptyEndLine), k)
 80 | 	case syntax.OpBeginText:
 81 | 		return p.Concat(p.EmptyOp(syntax.EmptyBeginText), k)
 82 | 	case syntax.OpEndText:
 83 | 		return p.Concat(p.EmptyOp(syntax.EmptyEndText), k)
 84 | 	case syntax.OpWordBoundary:
 85 | 		return p.Concat(p.EmptyOp(syntax.EmptyWordBoundary), k)
 86 | 	case syntax.OpNoWordBoundary:
 87 | 		return p.Concat(p.EmptyOp(syntax.EmptyNoWordBoundary), k)
 88 | 	}
 89 | 	panic(fmt.Sprintf("unimplemented %s", e.Op))
 90 | }
 91 | 
 92 | func convert(r *syntax.Regexp) p.Pattern {
 93 | 	return pi(r, &p.EmptyNode{})
 94 | }
 95 | 
 96 | func FromRegexp(s string, flags syntax.Flags) (p.Pattern, error) {
 97 | 	re, err := syntax.Parse(s, flags)
 98 | 	if err != nil {
 99 | 		return nil, err
100 | 	}
101 | 	if !verify(re) {
102 | 		return nil, fmt.Errorf("invalid regexp (repeat not supported)")
103 | 	}
104 | 	return p.Search(p.Cap(convert(re), 0)), nil
105 | }
106 | 
107 | func verify(e *syntax.Regexp) bool {
108 | 	switch e.Op {
109 | 	case syntax.OpEmptyMatch, syntax.OpLiteral, syntax.OpCharClass,
110 | 		syntax.OpAnyChar, syntax.OpAnyCharNotNL, syntax.OpBeginLine,
111 | 		syntax.OpEndLine, syntax.OpBeginText, syntax.OpEndText,
112 | 		syntax.OpWordBoundary, syntax.OpNoWordBoundary:
113 | 		return true
114 | 	case syntax.OpConcat, syntax.OpAlternate:
115 | 		yes := true
116 | 		for _, s := range e.Sub {
117 | 			yes = yes && verify(s)
118 | 		}
119 | 		return yes
120 | 	case syntax.OpCapture, syntax.OpStar, syntax.OpPlus, syntax.OpQuest:
121 | 		return verify(e.Sub[0])
122 | 	}
123 | 	return false
124 | }
125 | 


--------------------------------------------------------------------------------
/rxconv/rxconv_test.go:
--------------------------------------------------------------------------------
  1 | package rxconv_test
  2 | 
  3 | import (
  4 | 	"regexp/syntax"
  5 | 	"strings"
  6 | 	"testing"
  7 | 
  8 | 	"github.com/zyedidia/gpeg/memo"
  9 | 	. "github.com/zyedidia/gpeg/pattern"
 10 | 	"github.com/zyedidia/gpeg/rxconv"
 11 | 	"github.com/zyedidia/gpeg/vm"
 12 | )
 13 | 
 14 | type PatternTest struct {
 15 | 	in    string
 16 | 	match int
 17 | }
 18 | 
 19 | func check(p Pattern, tests []PatternTest, t *testing.T) {
 20 | 	code := vm.Encode(MustCompile(p))
 21 | 	for _, tt := range tests {
 22 | 		name := tt.in[:min(10, len(tt.in))]
 23 | 		t.Run(name, func(t *testing.T) {
 24 | 			match, off, _, _ := code.Exec(strings.NewReader(tt.in), memo.NoneTable{})
 25 | 			if tt.match == -1 && match || tt.match != -1 && !match || tt.match != -1 && tt.match != off {
 26 | 				t.Errorf("%s: got: (%t, %d), but expected (%d)\n", tt.in, match, off, tt.match)
 27 | 			}
 28 | 		})
 29 | 	}
 30 | }
 31 | 
 32 | func TestSimple(t *testing.T) {
 33 | 	peg, err := rxconv.FromRegexp("(a|ab)c", syntax.Perl)
 34 | 	if err != nil {
 35 | 		t.Fatal(err)
 36 | 	}
 37 | 
 38 | 	tests := []PatternTest{
 39 | 		{"abc", 3},
 40 | 		{"ac", 2},
 41 | 		{"ab", -1},
 42 | 	}
 43 | 	check(peg, tests, t)
 44 | }
 45 | 
 46 | func TestStar(t *testing.T) {
 47 | 	peg, err := rxconv.FromRegexp("(ba|a)*a", syntax.Perl)
 48 | 	if err != nil {
 49 | 		t.Fatal(err)
 50 | 	}
 51 | 
 52 | 	tests := []PatternTest{
 53 | 		{"abaabaa", 7},
 54 | 	}
 55 | 
 56 | 	check(peg, tests, t)
 57 | }
 58 | 
 59 | func TestMultiOr(t *testing.T) {
 60 | 	peg, err := rxconv.FromRegexp("aa|bb|dd|ff", syntax.Perl)
 61 | 	if err != nil {
 62 | 		t.Fatal(err)
 63 | 	}
 64 | 	tests := []PatternTest{
 65 | 		{"aa", 2},
 66 | 		{"bb", 2},
 67 | 		{"af", -1},
 68 | 		{"ff", 2},
 69 | 	}
 70 | 
 71 | 	check(peg, tests, t)
 72 | }
 73 | 
 74 | func TestCharClass(t *testing.T) {
 75 | 	peg, err := rxconv.FromRegexp("[a-z0-9]+", syntax.Perl)
 76 | 	if err != nil {
 77 | 		t.Fatal(err)
 78 | 	}
 79 | 	tests := []PatternTest{
 80 | 		{"", -1},
 81 | 		{"hello123", 8},
 82 | 		{"foo", 3},
 83 | 		{"123", 3},
 84 | 		{"_&_", -1},
 85 | 	}
 86 | 	check(peg, tests, t)
 87 | }
 88 | 
 89 | func TestEmptyOp(t *testing.T) {
 90 | 	peg, err := rxconv.FromRegexp("^foo", syntax.Perl)
 91 | 	if err != nil {
 92 | 		t.Fatal(err)
 93 | 	}
 94 | 	tests := []PatternTest{
 95 | 		{"foohello", 3},
 96 | 		{" foo ", -1},
 97 | 	}
 98 | 	check(peg, tests, t)
 99 | 
100 | 	peg, err = rxconv.FromRegexp("\\bfoo\\b", syntax.Perl)
101 | 	if err != nil {
102 | 		t.Fatal(err)
103 | 	}
104 | 	tests = []PatternTest{
105 | 		{"foohello", -1},
106 | 		{" foo ", 4},
107 | 	}
108 | 	check(peg, tests, t)
109 | }
110 | 
111 | func min(a, b int) int {
112 | 	if a < b {
113 | 		return a
114 | 	}
115 | 	return b
116 | }
117 | 


--------------------------------------------------------------------------------
/testdata/test.java:
--------------------------------------------------------------------------------
1 | public class Hello {
2 |     public static void main(String[] args) {
3 |         System.out.println("Hello world");
4 |     }
5 | }
6 | 


--------------------------------------------------------------------------------
/vm/code.go:
--------------------------------------------------------------------------------
  1 | package vm
  2 | 
  3 | import (
  4 | 	"bytes"
  5 | 	"compress/gzip"
  6 | 	"encoding/binary"
  7 | 	"encoding/gob"
  8 | 	"encoding/json"
  9 | 	"fmt"
 10 | 
 11 | 	"github.com/zyedidia/gpeg/charset"
 12 | 	"github.com/zyedidia/gpeg/isa"
 13 | )
 14 | 
 15 | // Code is the representation of VM bytecode.
 16 | type Code struct {
 17 | 	data code
 18 | }
 19 | 
 20 | type code struct {
 21 | 	// list of charsets
 22 | 	Sets []charset.Set
 23 | 	// list of error messages
 24 | 	Errors []string
 25 | 	// list of checker functions
 26 | 	Checkers []isa.Checker
 27 | 
 28 | 	// the encoded instructions
 29 | 	Insns []byte
 30 | }
 31 | 
 32 | // Size returns the size of the encoded instructions.
 33 | func (c *Code) Size() int {
 34 | 	return len(c.data.Insns)
 35 | }
 36 | 
 37 | func init() {
 38 | 	gob.Register(isa.MapChecker{})
 39 | 	gob.Register(isa.BackReference{})
 40 | }
 41 | 
 42 | // ToBytes serializes and compresses this Code.
 43 | func (c *Code) ToBytes() ([]byte, error) {
 44 | 	var buf bytes.Buffer
 45 | 	fz := gzip.NewWriter(&buf)
 46 | 	enc := gob.NewEncoder(fz)
 47 | 	err := enc.Encode(c.data)
 48 | 	fz.Close()
 49 | 	return buf.Bytes(), err
 50 | }
 51 | 
 52 | // FromBytes loads a Code from a compressed and serialized object.
 53 | func FromBytes(b []byte) (Code, error) {
 54 | 	var c code
 55 | 	fz, err := gzip.NewReader(bytes.NewReader(b))
 56 | 	if err != nil {
 57 | 		return Code{}, err
 58 | 	}
 59 | 	dec := gob.NewDecoder(fz)
 60 | 	err = dec.Decode(&c)
 61 | 	fz.Close()
 62 | 	return Code{
 63 | 		data: c,
 64 | 	}, err
 65 | }
 66 | 
 67 | // ToJson returns this Code serialized to JSON form.
 68 | func (c *Code) ToJson() ([]byte, error) {
 69 | 	return json.Marshal(c.data)
 70 | }
 71 | 
 72 | // FromJson returns a Code loaded from JSON form.
 73 | func FromJson(b []byte) (Code, error) {
 74 | 	var c code
 75 | 	err := json.Unmarshal(b, &c)
 76 | 	return Code{
 77 | 		data: c,
 78 | 	}, err
 79 | }
 80 | 
 81 | // Encode transforms a program into VM bytecode.
 82 | func Encode(insns isa.Program) Code {
 83 | 	code := Code{
 84 | 		data: code{
 85 | 			Sets:  make([]charset.Set, 0),
 86 | 			Insns: make([]byte, 0),
 87 | 		},
 88 | 	}
 89 | 
 90 | 	var bcount uint
 91 | 	labels := make(map[isa.Label]uint)
 92 | 	for _, insn := range insns {
 93 | 		switch t := insn.(type) {
 94 | 		case isa.Nop:
 95 | 			continue
 96 | 		case isa.Label:
 97 | 			labels[t] = bcount
 98 | 			continue
 99 | 		default:
100 | 			bcount += size(insn)
101 | 		}
102 | 	}
103 | 
104 | 	for _, insn := range insns {
105 | 		var op byte
106 | 		var args []byte
107 | 
108 | 		switch t := insn.(type) {
109 | 		case isa.Label, isa.Nop:
110 | 			continue
111 | 		case isa.Char:
112 | 			op = opChar
113 | 			args = []byte{t.Byte}
114 | 		case isa.Jump:
115 | 			op = opJump
116 | 			args = encodeLabel(labels[t.Lbl])
117 | 		case isa.Choice:
118 | 			op = opChoice
119 | 			args = encodeLabel(labels[t.Lbl])
120 | 		case isa.Call:
121 | 			op = opCall
122 | 			args = encodeLabel(labels[t.Lbl])
123 | 		case isa.Commit:
124 | 			op = opCommit
125 | 			args = encodeLabel(labels[t.Lbl])
126 | 		case isa.Return:
127 | 			op = opReturn
128 | 		case isa.Fail:
129 | 			op = opFail
130 | 		case isa.Set:
131 | 			op = opSet
132 | 			args = encodeU8(addSet(&code, t.Chars))
133 | 		case isa.Any:
134 | 			op = opAny
135 | 			args = []byte{t.N}
136 | 		case isa.PartialCommit:
137 | 			op = opPartialCommit
138 | 			args = encodeLabel(labels[t.Lbl])
139 | 		case isa.Span:
140 | 			op = opSpan
141 | 			args = encodeU8(addSet(&code, t.Chars))
142 | 		case isa.BackCommit:
143 | 			op = opBackCommit
144 | 			args = encodeLabel(labels[t.Lbl])
145 | 		case isa.FailTwice:
146 | 			op = opFailTwice
147 | 		case isa.Empty:
148 | 			op = opEmpty
149 | 			args = []byte{uint8(t.Op)}
150 | 		case isa.TestChar:
151 | 			op = opTestChar
152 | 			args = append([]byte{t.Byte}, encodeLabel(labels[t.Lbl])...)
153 | 		case isa.TestCharNoChoice:
154 | 			op = opTestCharNoChoice
155 | 			args = append([]byte{t.Byte}, encodeLabel(labels[t.Lbl])...)
156 | 		case isa.TestSet:
157 | 			op = opTestSet
158 | 			args = append(encodeU8(addSet(&code, t.Chars)), encodeLabel(labels[t.Lbl])...)
159 | 		case isa.TestSetNoChoice:
160 | 			op = opTestSetNoChoice
161 | 			args = append(encodeU8(addSet(&code, t.Chars)), encodeLabel(labels[t.Lbl])...)
162 | 		case isa.TestAny:
163 | 			op = opTestAny
164 | 			args = append([]byte{t.N}, encodeLabel(labels[t.Lbl])...)
165 | 		case isa.CaptureBegin:
166 | 			op = opCaptureBegin
167 | 			args = encodeI16(int(t.Id))
168 | 		case isa.CaptureEnd:
169 | 			op = opCaptureEnd
170 | 		case isa.CaptureLate:
171 | 			op = opCaptureLate
172 | 			args = append([]byte{t.Back}, encodeI16(int(t.Id))...)
173 | 		case isa.CaptureFull:
174 | 			op = opCaptureFull
175 | 			args = append([]byte{t.Back}, encodeI16(int(t.Id))...)
176 | 		case isa.MemoOpen:
177 | 			op = opMemoOpen
178 | 			args = append(encodeLabel(labels[t.Lbl]), encodeI16(int(t.Id))...)
179 | 		case isa.MemoClose:
180 | 			op = opMemoClose
181 | 		case isa.MemoTreeOpen:
182 | 			op = opMemoTreeOpen
183 | 			args = append(encodeLabel(labels[t.Lbl]), encodeI16(int(t.Id))...)
184 | 		case isa.MemoTreeInsert:
185 | 			op = opMemoTreeInsert
186 | 		case isa.MemoTree:
187 | 			op = opMemoTree
188 | 		case isa.MemoTreeClose:
189 | 			op = opMemoTreeClose
190 | 			args = encodeI16(int(t.Id))
191 | 		case isa.CheckBegin:
192 | 			op = opCheckBegin
193 | 			args = append(encodeI16(t.Id), encodeI16(t.Flag)...)
194 | 		case isa.CheckEnd:
195 | 			op = opCheckEnd
196 | 			args = encodeU24(addChecker(&code, t.Checker))
197 | 		case isa.Error:
198 | 			op = opError
199 | 			args = encodeU24(addError(&code, t.Message))
200 | 		case isa.End:
201 | 			op = opEnd
202 | 			args = encodeBool(t.Fail)
203 | 		default:
204 | 			panic(fmt.Sprintf("invalid instruction during encoding: %v", t))
205 | 		}
206 | 
207 | 		code.data.Insns = append(code.data.Insns, op)
208 | 
209 | 		// need padding to align the args if they are divisible by 16 bits
210 | 		if len(args)%2 == 0 {
211 | 			code.data.Insns = append(code.data.Insns, 0)
212 | 		}
213 | 
214 | 		code.data.Insns = append(code.data.Insns, args...)
215 | 	}
216 | 	code.data.Insns = append(code.data.Insns, opEnd, 0)
217 | 
218 | 	return code
219 | }
220 | 
221 | func encodeU8(x uint) []byte {
222 | 	if x >= 256 {
223 | 		panic("U8 out of bounds")
224 | 	}
225 | 
226 | 	return []byte{uint8(x)}
227 | }
228 | 
229 | func encodeI8(x int) []byte {
230 | 	if x < -128 || x >= 128 {
231 | 		panic("I8 out of bounds")
232 | 	}
233 | 
234 | 	return []byte{byte(x)}
235 | }
236 | 
237 | func encodeU16(x uint) []byte {
238 | 	if x >= (1 << 16) {
239 | 		panic("U16 out of bounds")
240 | 	}
241 | 
242 | 	b := make([]byte, 2)
243 | 	binary.LittleEndian.PutUint16(b[0:], uint16(x))
244 | 	return b
245 | }
246 | 
247 | func encodeI16(x int) []byte {
248 | 	if x < -(1<<15) || x >= (1<<15) {
249 | 		panic("I16 out of bounds")
250 | 	}
251 | 
252 | 	b := make([]byte, 2)
253 | 	binary.LittleEndian.PutUint16(b[0:], uint16(x))
254 | 	return b
255 | }
256 | 
257 | func encodeU24(x uint) []byte {
258 | 	if x >= (1 << 24) {
259 | 		panic("I24 out of bounds")
260 | 	}
261 | 
262 | 	b := make([]byte, 4)
263 | 	i1 := uint16((x >> 16) & 0xff)
264 | 	i2 := uint16(x)
265 | 
266 | 	binary.BigEndian.PutUint16(b[0:], i1)
267 | 	binary.LittleEndian.PutUint16(b[2:], i2)
268 | 	return b[1:4]
269 | }
270 | 
271 | func encodeLabel(x uint) []byte {
272 | 	return encodeU24(x)
273 | }
274 | 
275 | func encodeBool(b bool) []byte {
276 | 	if b {
277 | 		return []byte{1}
278 | 	}
279 | 	return []byte{0}
280 | }
281 | 
282 | // Adds the set to the code's list of charsets, and returns the index it was
283 | // added at. If there are duplicate charsets, this may not actually insert
284 | // the new charset.
285 | func addSet(code *Code, set charset.Set) uint {
286 | 	for i, s := range code.data.Sets {
287 | 		if set == s {
288 | 			return uint(i)
289 | 		}
290 | 	}
291 | 
292 | 	code.data.Sets = append(code.data.Sets, set)
293 | 	return uint(len(code.data.Sets) - 1)
294 | }
295 | 
296 | func addError(code *Code, msg string) uint {
297 | 	for i, s := range code.data.Errors {
298 | 		if msg == s {
299 | 			return uint(i)
300 | 		}
301 | 	}
302 | 
303 | 	code.data.Errors = append(code.data.Errors, msg)
304 | 	return uint(len(code.data.Errors) - 1)
305 | }
306 | 
307 | func addChecker(code *Code, checker isa.Checker) uint {
308 | 	code.data.Checkers = append(code.data.Checkers, checker)
309 | 	return uint(len(code.data.Checkers) - 1)
310 | }
311 | 


--------------------------------------------------------------------------------
/vm/code_test.go:
--------------------------------------------------------------------------------
 1 | package vm
 2 | 
 3 | import (
 4 | 	"testing"
 5 | 
 6 | 	"github.com/zyedidia/gpeg/charset"
 7 | 	. "github.com/zyedidia/gpeg/pattern"
 8 | )
 9 | 
10 | func TestBytes(t *testing.T) {
11 | 	p := Grammar("Expr", map[string]Pattern{
12 | 		"Expr":   Concat(NonTerm("Factor"), Star(Concat(Set(charset.New([]byte{'+', '-'})), NonTerm("Factor")))),
13 | 		"Factor": Concat(NonTerm("Term"), Star(Concat(Set(charset.New([]byte{'*', '/'})), NonTerm("Term")))),
14 | 		"Term":   Or(NonTerm("Number"), Concat(Concat(Literal("("), NonTerm("Expr")), Literal(")"))),
15 | 		"Number": Plus(Set(charset.Range('0', '9'))),
16 | 	})
17 | 
18 | 	code := Encode(MustCompile(p))
19 | 	b, err := code.ToBytes()
20 | 	if err != nil {
21 | 		t.Error(err)
22 | 	}
23 | 	load, err := FromBytes(b)
24 | 	if err != nil {
25 | 		t.Error(err)
26 | 	}
27 | 
28 | 	if load.Size() != code.Size() {
29 | 		t.Error("Saved and loaded code not equivalent")
30 | 	}
31 | 
32 | 	for i := range code.data.Insns {
33 | 		if load.data.Insns[i] != code.data.Insns[i] {
34 | 			t.Errorf("Code byte %d does not match", i)
35 | 		}
36 | 	}
37 | }
38 | 
39 | func TestJson(t *testing.T) {
40 | 	p := Grammar("Expr", map[string]Pattern{
41 | 		"Expr":   Concat(NonTerm("Factor"), Star(Concat(Set(charset.New([]byte{'+', '-'})), NonTerm("Factor")))),
42 | 		"Factor": Concat(NonTerm("Term"), Star(Concat(Set(charset.New([]byte{'*', '/'})), NonTerm("Term")))),
43 | 		"Term":   Or(NonTerm("Number"), Concat(Concat(Literal("("), NonTerm("Expr")), Literal(")"))),
44 | 		"Number": Plus(Set(charset.Range('0', '9'))),
45 | 	})
46 | 
47 | 	code := Encode(MustCompile(p))
48 | 	b, err := code.ToJson()
49 | 	if err != nil {
50 | 		t.Error(err)
51 | 	}
52 | 	load, err := FromJson(b)
53 | 	if err != nil {
54 | 		t.Error(err)
55 | 	}
56 | 
57 | 	if load.Size() != code.Size() {
58 | 		t.Error("Saved and loaded code not equivalent")
59 | 	}
60 | 
61 | 	for i := range code.data.Insns {
62 | 		if load.data.Insns[i] != code.data.Insns[i] {
63 | 			t.Errorf("Code byte %d does not match", i)
64 | 		}
65 | 	}
66 | }
67 | 


--------------------------------------------------------------------------------
/vm/op.go:
--------------------------------------------------------------------------------
  1 | package vm
  2 | 
  3 | import (
  4 | 	"github.com/zyedidia/gpeg/isa"
  5 | )
  6 | 
  7 | const (
  8 | 	// base instruction set
  9 | 	opChar byte = iota
 10 | 	opJump
 11 | 	opChoice
 12 | 	opCall
 13 | 	opCommit
 14 | 	opReturn
 15 | 	opFail
 16 | 	opSet
 17 | 	opAny
 18 | 	opPartialCommit
 19 | 	opSpan
 20 | 	opBackCommit
 21 | 	opFailTwice
 22 | 	opEmpty
 23 | 	opTestChar
 24 | 	opTestCharNoChoice
 25 | 	opTestSet
 26 | 	opTestSetNoChoice
 27 | 	opTestAny
 28 | 	opEnd
 29 | 	opNop
 30 | 	opCaptureBegin
 31 | 	opCaptureLate
 32 | 	opCaptureEnd
 33 | 	opCaptureFull
 34 | 	opCheckBegin
 35 | 	opCheckEnd
 36 | 	opMemoOpen
 37 | 	opMemoClose
 38 | 	opMemoTreeOpen
 39 | 	opMemoTreeInsert
 40 | 	opMemoTree
 41 | 	opMemoTreeClose
 42 | 	opError
 43 | )
 44 | 
 45 | // instruction sizes
 46 | const (
 47 | 	// base instruction set
 48 | 	szChar           = 2
 49 | 	szReturn         = 2
 50 | 	szFail           = 2
 51 | 	szSet            = 2
 52 | 	szAny            = 2
 53 | 	szSpan           = 2
 54 | 	szFailTwice      = 2
 55 | 	szEnd            = 2
 56 | 	szNop            = 0
 57 | 	szEmpty          = 2
 58 | 	szCaptureBegin   = 4
 59 | 	szCaptureLate    = 4
 60 | 	szCaptureEnd     = 2
 61 | 	szCaptureFull    = 4
 62 | 	szMemoClose      = 2
 63 | 	szMemoTreeInsert = 2
 64 | 	szMemoTree       = 2
 65 | 	szMemoTreeClose  = 4
 66 | 	szCheckBegin     = 6
 67 | 	szCheckEnd       = 4
 68 | 	szError          = 4
 69 | 
 70 | 	// jumps
 71 | 	szJump             = 4
 72 | 	szChoice           = 4
 73 | 	szCall             = 4
 74 | 	szCommit           = 4
 75 | 	szPartialCommit    = 4
 76 | 	szBackCommit       = 4
 77 | 	szTestChar         = 6
 78 | 	szTestCharNoChoice = 6
 79 | 	szTestSet          = 6
 80 | 	szTestSetNoChoice  = 6
 81 | 	szTestAny          = 6
 82 | 	szMemoOpen         = 6
 83 | 	szMemoTreeOpen     = 6
 84 | )
 85 | 
 86 | // returns the size in bytes of the encoded version of this instruction
 87 | func size(insn isa.Insn) uint {
 88 | 	var sz uint
 89 | 	switch insn.(type) {
 90 | 	case isa.Label, isa.Nop:
 91 | 		return 0
 92 | 	case isa.JumpType, isa.CheckBegin:
 93 | 		sz += 4
 94 | 	default:
 95 | 		sz += 2
 96 | 	}
 97 | 
 98 | 	// handle instructions with extra args
 99 | 	switch insn.(type) {
100 | 	case isa.MemoOpen, isa.MemoTreeOpen, isa.MemoTreeClose, isa.CaptureBegin, isa.CaptureLate,
101 | 		isa.CaptureFull, isa.TestChar, isa.TestCharNoChoice, isa.TestSet,
102 | 		isa.TestSetNoChoice, isa.TestAny, isa.Error, isa.CheckBegin, isa.CheckEnd:
103 | 		sz += 2
104 | 	}
105 | 
106 | 	return sz
107 | }
108 | 
109 | var names = map[byte]string{
110 | 	opChar:             "Char",
111 | 	opJump:             "Jump",
112 | 	opChoice:           "Choice",
113 | 	opCall:             "Call",
114 | 	opCommit:           "Commit",
115 | 	opReturn:           "Return",
116 | 	opFail:             "Fail",
117 | 	opSet:              "Set",
118 | 	opAny:              "Any",
119 | 	opPartialCommit:    "PartialCommit",
120 | 	opSpan:             "Span",
121 | 	opBackCommit:       "BackCommit",
122 | 	opFailTwice:        "FailTwice",
123 | 	opTestChar:         "TestChar",
124 | 	opTestCharNoChoice: "TestCharNoChoice",
125 | 	opTestSet:          "TestSet",
126 | 	opTestSetNoChoice:  "TestSetNoChoice",
127 | 	opTestAny:          "TestAny",
128 | 	opEnd:              "End",
129 | 	opNop:              "Nop",
130 | 	opCaptureBegin:     "CaptureBegin",
131 | 	opCaptureLate:      "CaptureLate",
132 | 	opCaptureEnd:       "CaptureEnd",
133 | 	opCaptureFull:      "CaptureFull",
134 | 	opCheckBegin:       "CheckBegin",
135 | 	opCheckEnd:         "CheckEnd",
136 | 	opMemoOpen:         "MemoOpen",
137 | 	opMemoClose:        "MemoClose",
138 | 	opMemoTreeOpen:     "MemoTreeOpen",
139 | 	opMemoTreeInsert:   "MemoTreeInsert",
140 | 	opMemoTree:         "MemoTree",
141 | 	opMemoTreeClose:    "MemoTreeClose",
142 | 	opError:            "Error",
143 | 	opEmpty:            "Empty",
144 | }
145 | 
146 | func opstr(op byte) string {
147 | 	return names[op]
148 | }
149 | 


--------------------------------------------------------------------------------
/vm/stack.go:
--------------------------------------------------------------------------------
  1 | package vm
  2 | 
  3 | import (
  4 | 	"github.com/zyedidia/gpeg/memo"
  5 | )
  6 | 
  7 | type stack struct {
  8 | 	entries []stackEntry
  9 | 	capt    []*memo.Capture
 10 | }
 11 | 
 12 | func (s *stack) addCapt(capt ...*memo.Capture) {
 13 | 	if len(s.entries) == 0 {
 14 | 		s.capt = append(s.capt, capt...)
 15 | 	} else {
 16 | 		s.entries[len(s.entries)-1].addCapt(capt)
 17 | 	}
 18 | }
 19 | 
 20 | func (s *stack) propCapt() {
 21 | 	if len(s.entries) == 0 {
 22 | 		return
 23 | 	}
 24 | 
 25 | 	top := s.entries[len(s.entries)-1]
 26 | 	if top.capt != nil && len(top.capt) > 0 {
 27 | 		if len(s.entries) == 1 {
 28 | 			s.capt = append(s.capt, top.capt...)
 29 | 		} else {
 30 | 			s.entries[len(s.entries)-2].addCapt(top.capt)
 31 | 		}
 32 | 	}
 33 | }
 34 | 
 35 | const (
 36 | 	stRet = iota
 37 | 	stBtrack
 38 | 	stMemo
 39 | 	stMemoTree
 40 | 	stCapt
 41 | 	stCheck
 42 | )
 43 | 
 44 | type stackEntry struct {
 45 | 	stype byte
 46 | 	// we could use a union to avoid the space cost but I have found this
 47 | 	// doesn't impact performance and the space cost itself is quite small
 48 | 	// because the stack is usually small.
 49 | 	ret    stackRet // stackRet is reused for stCheck
 50 | 	btrack stackBacktrack
 51 | 	memo   stackMemo // stackMemo is reused for stCapt
 52 | 
 53 | 	capt []*memo.Capture
 54 | }
 55 | 
 56 | func (se *stackEntry) addCapt(capt []*memo.Capture) {
 57 | 	if len(capt) == 0 {
 58 | 		return
 59 | 	}
 60 | 	if len(se.capt) == 0 {
 61 | 		se.capt = capt
 62 | 	} else {
 63 | 		se.capt = append(se.capt, capt...)
 64 | 	}
 65 | }
 66 | 
 67 | type stackRet int
 68 | 
 69 | type stackBacktrack struct {
 70 | 	ip  int
 71 | 	off int
 72 | }
 73 | 
 74 | type stackMemo struct {
 75 | 	id    int16
 76 | 	pos   int
 77 | 	count int
 78 | }
 79 | 
 80 | func newStack() *stack {
 81 | 	return &stack{
 82 | 		entries: make([]stackEntry, 0, 4),
 83 | 		capt:    make([]*memo.Capture, 0),
 84 | 	}
 85 | }
 86 | 
 87 | func (s *stack) reset() {
 88 | 	s.capt = nil
 89 | 	// need to complete remake the slice so that the underlying captures can be
 90 | 	// released to the garbage collector if the user has no references to them
 91 | 	// (unused stack entries shouldn't keep references to those captures).
 92 | 	s.entries = make([]stackEntry, 0, 4)
 93 | }
 94 | 
 95 | func (s *stack) push(ent stackEntry) {
 96 | 	s.entries = append(s.entries, ent)
 97 | }
 98 | 
 99 | // propagate marks whether captures should be propagated up the stack.
100 | func (s *stack) pop(propagate bool) *stackEntry {
101 | 	if len(s.entries) == 0 {
102 | 		return nil
103 | 	}
104 | 
105 | 	ret := &s.entries[len(s.entries)-1]
106 | 	s.entries = s.entries[:len(s.entries)-1]
107 | 	// For non-capture entries, propagate the captures upward.
108 | 	// For capture entries, we create a new node with the corresponding
109 | 	// children, and this is manually handled by the caller.
110 | 	if propagate && ret.capt != nil {
111 | 		s.addCapt(ret.capt...)
112 | 	}
113 | 	return ret
114 | }
115 | 
116 | func (s *stack) peek() *stackEntry {
117 | 	return s.peekn(0)
118 | }
119 | 
120 | func (s *stack) peekn(n int) *stackEntry {
121 | 	if len(s.entries) <= n {
122 | 		return nil
123 | 	}
124 | 	return &s.entries[len(s.entries)-n-1]
125 | }
126 | 
127 | func (s *stack) pushRet(r stackRet) {
128 | 	s.push(stackEntry{
129 | 		stype: stRet,
130 | 		ret:   r,
131 | 	})
132 | }
133 | 
134 | func (s *stack) pushBacktrack(b stackBacktrack) {
135 | 	s.push(stackEntry{
136 | 		stype:  stBtrack,
137 | 		btrack: b,
138 | 	})
139 | }
140 | 
141 | func (s *stack) pushMemo(m stackMemo) {
142 | 	s.push(stackEntry{
143 | 		stype: stMemo,
144 | 		memo:  m,
145 | 	})
146 | }
147 | 
148 | func (s *stack) pushMemoTree(m stackMemo) {
149 | 	s.push(stackEntry{
150 | 		stype: stMemoTree,
151 | 		memo:  m,
152 | 	})
153 | }
154 | 
155 | func (s *stack) pushCapt(m stackMemo) {
156 | 	s.push(stackEntry{
157 | 		stype: stCapt,
158 | 		memo:  m,
159 | 	})
160 | }
161 | 
162 | func (s *stack) pushCheck(m stackMemo) {
163 | 	s.push(stackEntry{
164 | 		stype: stCheck,
165 | 		memo:  m,
166 | 	})
167 | }
168 | 


--------------------------------------------------------------------------------
/vm/vm.go:
--------------------------------------------------------------------------------
  1 | // Package vm implements the GPeg virtual machine.
  2 | package vm
  3 | 
  4 | import (
  5 | 	"encoding/binary"
  6 | 	"fmt"
  7 | 	"io"
  8 | 	"regexp/syntax"
  9 | 
 10 | 	"github.com/zyedidia/gpeg/charset"
 11 | 	"github.com/zyedidia/gpeg/input"
 12 | 	"github.com/zyedidia/gpeg/memo"
 13 | )
 14 | 
 15 | type ParseError struct {
 16 | 	Message string
 17 | 	Pos     int
 18 | }
 19 | 
 20 | type Interval struct {
 21 | 	Low, High int
 22 | }
 23 | 
 24 | func (e ParseError) Error() string {
 25 | 	return fmt.Sprintf("%v: %s", e.Pos, e.Message)
 26 | }
 27 | 
 28 | // Exec executes the parsing program this virtual machine was created with. It
 29 | // returns whether the parse was a match, the last position in the subject
 30 | // string that was matched, and any captures that were created.
 31 | func (vm *Code) Exec(r io.ReaderAt, memtbl memo.Table) (bool, int, *memo.Capture, []ParseError) {
 32 | 	ip := 0
 33 | 	st := newStack()
 34 | 	src := input.NewInput(r)
 35 | 
 36 | 	// parse in parallel?
 37 | 	// if memtbl.Size() == 0 {
 38 | 	// 	srccopy := input.NewInput(r)
 39 | 	// 	srccopy.SeekTo(1000000)
 40 | 	// 	go vm.exec(0, newStack(), srccopy, memtbl)
 41 | 	// }
 42 | 
 43 | 	return vm.exec(ip, st, src, memtbl, nil)
 44 | }
 45 | 
 46 | func (vm *Code) ExecInterval(r io.ReaderAt, memtbl memo.Table, intrvl *Interval) (bool, int, *memo.Capture, []ParseError) {
 47 | 	ip := 0
 48 | 	st := newStack()
 49 | 	src := input.NewInput(r)
 50 | 
 51 | 	return vm.exec(ip, st, src, memtbl, intrvl)
 52 | }
 53 | 
 54 | func (vm *Code) exec(ip int, st *stack, src *input.Input, memtbl memo.Table, intrvl *Interval) (bool, int, *memo.Capture, []ParseError) {
 55 | 	idata := vm.data.Insns
 56 | 
 57 | 	if ip < 0 || ip >= len(idata) {
 58 | 		return true, 0, memo.NewCaptureDummy(0, 0, nil), nil
 59 | 	}
 60 | 
 61 | 	var caprange Interval
 62 | 
 63 | 	if intrvl != nil {
 64 | 		caprange = *intrvl
 65 | 		// Apply an edit that clears all memoized entries in the interval
 66 | 		// we are capturing. This ensures that we find all captures in the
 67 | 		// requested interval.
 68 | 		memtbl.ApplyEdit(memo.Edit{
 69 | 			Start: intrvl.Low,
 70 | 			End:   intrvl.High,
 71 | 			Len:   intrvl.High - intrvl.Low,
 72 | 		})
 73 | 	}
 74 | 
 75 | 	memoize := func(id, pos, mlen, count int, capt []*memo.Capture) {
 76 | 		if intrvl != nil {
 77 | 			capt = nil
 78 | 		}
 79 | 		mexam := max(src.Furthest(), src.Pos()) - pos + 1
 80 | 		memtbl.Put(id, pos, mlen, mexam, count, capt)
 81 | 	}
 82 | 
 83 | 	success := true
 84 | 	var errs []ParseError = nil
 85 | 
 86 | loop:
 87 | 	for {
 88 | 		op := idata[ip]
 89 | 		switch op {
 90 | 		case opChar:
 91 | 			b := decodeU8(idata[ip+1:])
 92 | 			in, ok := src.Peek()
 93 | 			if ok && b == in {
 94 | 				src.Advance(1)
 95 | 				ip += szChar
 96 | 			} else {
 97 | 				goto fail
 98 | 			}
 99 | 		case opJump:
100 | 			lbl := decodeU24(idata[ip+1:])
101 | 			ip = int(lbl)
102 | 		case opChoice:
103 | 			lbl := decodeU24(idata[ip+1:])
104 | 			st.pushBacktrack(stackBacktrack{int(lbl), src.Pos()})
105 | 			ip += szChoice
106 | 		case opCall:
107 | 			lbl := decodeU24(idata[ip+1:])
108 | 			st.pushRet(stackRet(ip + szCall))
109 | 			ip = int(lbl)
110 | 		case opCommit:
111 | 			lbl := decodeU24(idata[ip+1:])
112 | 			st.pop(true)
113 | 			ip = int(lbl)
114 | 		case opReturn:
115 | 			ent := st.pop(true)
116 | 			if ent != nil && ent.stype == stRet {
117 | 				ip = int(ent.ret)
118 | 			} else {
119 | 				panic("Return failed")
120 | 			}
121 | 		case opFail:
122 | 			goto fail
123 | 		case opSet:
124 | 			set := decodeSet(idata[ip+1:], vm.data.Sets)
125 | 			in, ok := src.Peek()
126 | 			if ok && set.Has(in) {
127 | 				src.Advance(1)
128 | 				ip += szSet
129 | 			} else {
130 | 				goto fail
131 | 			}
132 | 		case opAny:
133 | 			n := decodeU8(idata[ip+1:])
134 | 			ok := src.Advance(int(n))
135 | 			if ok {
136 | 				ip += szAny
137 | 			} else {
138 | 				goto fail
139 | 			}
140 | 		case opPartialCommit:
141 | 			lbl := decodeU24(idata[ip+1:])
142 | 			ent := st.peek()
143 | 			if ent != nil && ent.stype == stBtrack {
144 | 				ent.btrack.off = src.Pos()
145 | 				st.propCapt()
146 | 				ent.capt = nil
147 | 				ip = int(lbl)
148 | 			} else {
149 | 				panic("PartialCommit failed")
150 | 			}
151 | 		case opSpan:
152 | 			set := decodeSet(idata[ip+1:], vm.data.Sets)
153 | 			in, ok := src.Peek()
154 | 			for ok && set.Has(in) {
155 | 				src.Advance(1)
156 | 				in, ok = src.Peek()
157 | 			}
158 | 			ip += szSpan
159 | 		case opBackCommit:
160 | 			lbl := decodeU24(idata[ip+1:])
161 | 			ent := st.pop(true)
162 | 			if ent != nil && ent.stype == stBtrack {
163 | 				src.SeekTo(ent.btrack.off)
164 | 				ip = int(lbl)
165 | 			} else {
166 | 				panic("BackCommit failed")
167 | 			}
168 | 		case opFailTwice:
169 | 			st.pop(false)
170 | 			goto fail
171 | 		case opEmpty:
172 | 			op := syntax.EmptyOp(decodeU8(idata[ip+1:]))
173 | 			r1, r2 := rune(-1), rune(-1)
174 | 			// TODO: PeekBefore may cause problems with incremental parsing
175 | 			b1, ok := src.PeekBefore()
176 | 			if ok {
177 | 				r1 = rune(b1)
178 | 			}
179 | 			b2, ok := src.Peek()
180 | 			if ok {
181 | 				r2 = rune(b2)
182 | 			}
183 | 			sat := syntax.EmptyOpContext(r1, r2)
184 | 			if (sat & op) != 0 {
185 | 				ip += szEmpty
186 | 			} else {
187 | 				goto fail
188 | 			}
189 | 		case opTestChar:
190 | 			b := decodeU8(idata[ip+2:])
191 | 			lbl := decodeU24(idata[ip+3:])
192 | 			in, ok := src.Peek()
193 | 			if ok && in == b {
194 | 				st.pushBacktrack(stackBacktrack{int(lbl), src.Pos()})
195 | 				src.Advance(1)
196 | 				ip += szTestChar
197 | 			} else {
198 | 				ip = int(lbl)
199 | 			}
200 | 		case opTestCharNoChoice:
201 | 			b := decodeU8(idata[ip+2:])
202 | 			in, ok := src.Peek()
203 | 			if ok && in == b {
204 | 				src.Advance(1)
205 | 				ip += szTestCharNoChoice
206 | 			} else {
207 | 				lbl := decodeU24(idata[ip+3:])
208 | 				ip = int(lbl)
209 | 			}
210 | 		case opTestSet:
211 | 			lbl := decodeU24(idata[ip+3:])
212 | 			set := decodeSet(idata[ip+2:], vm.data.Sets)
213 | 			in, ok := src.Peek()
214 | 			if ok && set.Has(in) {
215 | 				st.pushBacktrack(stackBacktrack{int(lbl), src.Pos()})
216 | 				src.Advance(1)
217 | 				ip += szTestSet
218 | 			} else {
219 | 				ip = int(lbl)
220 | 			}
221 | 		case opTestSetNoChoice:
222 | 			set := decodeSet(idata[ip+2:], vm.data.Sets)
223 | 			in, ok := src.Peek()
224 | 			if ok && set.Has(in) {
225 | 				src.Advance(1)
226 | 				ip += szTestSetNoChoice
227 | 			} else {
228 | 				lbl := decodeU24(idata[ip+3:])
229 | 				ip = int(lbl)
230 | 			}
231 | 		case opTestAny:
232 | 			n := decodeU8(idata[ip+2:])
233 | 			lbl := decodeU24(idata[ip+3:])
234 | 			ent := stackBacktrack{int(lbl), src.Pos()}
235 | 			ok := src.Advance(int(n))
236 | 			if ok {
237 | 				st.pushBacktrack(ent)
238 | 				ip += szTestAny
239 | 			} else {
240 | 				ip = int(lbl)
241 | 			}
242 | 		case opCaptureBegin:
243 | 			id := decodeI16(idata[ip+2:])
244 | 			st.pushCapt(stackMemo{
245 | 				id:  id,
246 | 				pos: src.Pos(),
247 | 			})
248 | 			ip += szCaptureBegin
249 | 		case opCaptureLate:
250 | 			back := decodeU8(idata[ip+1:])
251 | 			id := decodeI16(idata[ip+2:])
252 | 			st.pushCapt(stackMemo{
253 | 				id:  id,
254 | 				pos: src.Pos() - int(back),
255 | 			})
256 | 			ip += szCaptureLate
257 | 		case opCaptureFull:
258 | 			back := int(decodeU8(idata[ip+1:]))
259 | 			id := decodeI16(idata[ip+2:])
260 | 			pos := src.Pos()
261 | 
262 | 			if overlaps(intrvl, pos-back, pos) {
263 | 				caprange.Low = min(caprange.Low, pos-back)
264 | 				caprange.High = max(caprange.High, pos)
265 | 				capt := memo.NewCaptureNode(int(id), pos-back, back, nil)
266 | 				st.addCapt(capt)
267 | 			}
268 | 
269 | 			ip += szCaptureFull
270 | 		case opCaptureEnd:
271 | 			ent := st.pop(false)
272 | 
273 | 			if ent == nil || ent.stype != stCapt {
274 | 				panic("CaptureEnd did not find capture entry")
275 | 			}
276 | 
277 | 			end := src.Pos()
278 | 			if overlaps(intrvl, ent.memo.pos, end) {
279 | 				caprange.Low = min(caprange.Low, ent.memo.pos)
280 | 				caprange.High = max(caprange.High, end)
281 | 				capt := memo.NewCaptureNode(int(ent.memo.id), ent.memo.pos, end-ent.memo.pos, ent.capt)
282 | 				st.addCapt(capt)
283 | 			}
284 | 			ip += szCaptureEnd
285 | 		case opEnd:
286 | 			fail := decodeU8(idata[ip+1:])
287 | 			success = fail != 1
288 | 			break loop
289 | 		case opMemoOpen:
290 | 			lbl := decodeU24(idata[ip+1:])
291 | 			id := decodeI16(idata[ip+4:])
292 | 
293 | 			ment, ok := memtbl.Get(int(id), src.Pos())
294 | 			if ok {
295 | 				if ment.Length() == -1 {
296 | 					goto fail
297 | 				}
298 | 				capt := ment.Captures()
299 | 				if capt != nil {
300 | 					st.addCapt(capt...)
301 | 				}
302 | 				src.Advance(ment.Length())
303 | 				ip = int(lbl)
304 | 			} else {
305 | 				st.pushMemo(stackMemo{
306 | 					id:  id,
307 | 					pos: src.Pos(),
308 | 				})
309 | 				ip += szMemoOpen
310 | 			}
311 | 		case opMemoClose:
312 | 			ent := st.pop(true)
313 | 			if ent != nil && ent.stype == stMemo {
314 | 				mlen := src.Pos() - ent.memo.pos
315 | 				memoize(int(ent.memo.id), ent.memo.pos, mlen, 1, ent.capt)
316 | 			} else {
317 | 				panic("memo close failed")
318 | 			}
319 | 			ip += szMemoClose
320 | 		case opMemoTreeOpen:
321 | 			lbl := decodeU24(idata[ip+1:])
322 | 			id := decodeI16(idata[ip+4:])
323 | 
324 | 			ment, ok := memtbl.Get(int(id), src.Pos())
325 | 			if ok {
326 | 				if ment.Length() == -1 {
327 | 					goto fail
328 | 				}
329 | 				st.pushMemoTree(stackMemo{
330 | 					id:    id,
331 | 					pos:   src.Pos(),
332 | 					count: ment.Count(),
333 | 				})
334 | 				capt := ment.Captures()
335 | 				if capt != nil {
336 | 					st.addCapt(capt...)
337 | 				}
338 | 				src.Advance(ment.Length())
339 | 				src.Peek()
340 | 				ip = int(lbl)
341 | 			} else {
342 | 				st.pushMemoTree(stackMemo{
343 | 					id:  id,
344 | 					pos: src.Pos(),
345 | 				})
346 | 				ip += szMemoTreeOpen
347 | 			}
348 | 		case opMemoTreeClose:
349 | 			id := decodeI16(idata[ip+2:])
350 | 			for p := st.peek(); p != nil && p.stype == stMemoTree && p.memo.id == id; p = st.peek() {
351 | 				st.pop(true)
352 | 			}
353 | 			ip += szMemoTreeClose
354 | 		case opMemoTreeInsert:
355 | 			ent := st.peek()
356 | 			if ent == nil || ent.stype != stMemoTree {
357 | 				panic("no memo entry on stack")
358 | 			}
359 | 			mlen := src.Pos() - ent.memo.pos
360 | 			ent.memo.count++
361 | 			memoize(int(ent.memo.id), ent.memo.pos, mlen, ent.memo.count, ent.capt)
362 | 			ip += szMemoTreeInsert
363 | 		case opMemoTree:
364 | 			seen := 0
365 | 			accum := 0
366 | 			for {
367 | 				top := st.peekn(seen)
368 | 				next := st.peekn(seen + 1)
369 | 
370 | 				if top == nil || next == nil || top.stype != stMemoTree || next.stype != stMemoTree {
371 | 					break
372 | 				}
373 | 
374 | 				seen++
375 | 				accum += top.memo.count
376 | 
377 | 				if accum < next.memo.count {
378 | 					continue
379 | 				}
380 | 
381 | 				for i := 0; i < seen-1; i++ {
382 | 					st.pop(true)
383 | 				}
384 | 				ent := st.pop(false) // next is now top of stack
385 | 
386 | 				if len(ent.capt) > 0 && intrvl == nil {
387 | 					dummy := memo.NewCaptureDummy(ent.memo.pos, src.Pos()-ent.memo.pos, ent.capt)
388 | 					st.addCapt(dummy)
389 | 				} else if len(ent.capt) > 0 {
390 | 					st.addCapt(ent.capt...)
391 | 				}
392 | 
393 | 				next.memo.count = accum + next.memo.count
394 | 				mlen := src.Pos() - next.memo.pos
395 | 				memoize(int(next.memo.id), next.memo.pos, mlen, next.memo.count, next.capt)
396 | 
397 | 				accum = 0
398 | 				seen = 0
399 | 			}
400 | 
401 | 			ip += szMemoTree
402 | 		case opCheckBegin:
403 | 			id := decodeI16(idata[ip+2:])
404 | 			flag := decodeI16(idata[ip+4:])
405 | 			st.pushCheck(stackMemo{
406 | 				id:    id,
407 | 				count: int(flag),
408 | 				pos:   src.Pos(),
409 | 			})
410 | 			ip += szCheckBegin
411 | 		case opCheckEnd:
412 | 			ent := st.pop(true)
413 | 			if ent == nil || ent.stype != stCheck {
414 | 				panic("check end needs check stack entry")
415 | 			}
416 | 			checkid := decodeU24(idata[ip+1:])
417 | 			checker := vm.data.Checkers[checkid]
418 | 
419 | 			id := int(ent.memo.id)
420 | 			flag := ent.memo.count
421 | 			n := checker.Check(src.Slice(int(ent.memo.pos), src.Pos()), src, id, flag)
422 | 			if n == -1 {
423 | 				goto fail
424 | 			} else {
425 | 				src.Advance(n)
426 | 			}
427 | 
428 | 			ip += szCheckEnd
429 | 		case opError:
430 | 			errid := decodeU24(idata[ip+1:])
431 | 			msg := vm.data.Errors[errid]
432 | 			errs = append(errs, ParseError{
433 | 				Pos:     src.Pos(),
434 | 				Message: msg,
435 | 			})
436 | 			ip += szError
437 | 		default:
438 | 			panic("Invalid opcode")
439 | 		}
440 | 	}
441 | 
442 | 	if intrvl != nil {
443 | 		return success, src.Pos(), memo.NewCaptureDummy(caprange.Low, caprange.High-caprange.Low, st.capt), errs
444 | 	}
445 | 	return success, src.Pos(), memo.NewCaptureDummy(0, src.Pos(), st.capt), errs
446 | 
447 | fail:
448 | 	ent := st.pop(false)
449 | 	if ent == nil {
450 | 		// match failed
451 | 		return false, src.Pos(), nil, errs
452 | 	}
453 | 
454 | 	switch ent.stype {
455 | 	case stBtrack:
456 | 		ip = ent.btrack.ip
457 | 		src.SeekTo(ent.btrack.off)
458 | 		ent.capt = nil
459 | 	case stMemo:
460 | 		// Mark this position in the memoTable as a failed match
461 | 		memoize(int(ent.memo.id), ent.memo.pos, -1, 0, nil)
462 | 		ent.capt = nil
463 | 		goto fail
464 | 	case stRet, stCapt, stCheck:
465 | 		ent.capt = nil
466 | 		goto fail
467 | 	}
468 | 
469 | 	goto loop
470 | }
471 | 
472 | func decodeU8(b []byte) byte {
473 | 	return b[0]
474 | }
475 | 
476 | func decodeI8(b []byte) int8 {
477 | 	return int8(b[0])
478 | }
479 | 
480 | func decodeU16(b []byte) uint16 {
481 | 	return binary.LittleEndian.Uint16(b[0:])
482 | }
483 | 
484 | func decodeI16(b []byte) int16 {
485 | 	return int16(binary.LittleEndian.Uint16(b[0:]))
486 | }
487 | 
488 | func decodeU24(b []byte) uint32 {
489 | 	i1 := uint32(decodeU8(b))
490 | 	i2 := uint32(decodeU16(b[1:]))
491 | 	i := (i1 << 16) | i2
492 | 	return i
493 | }
494 | 
495 | func decodeSet(b []byte, sets []charset.Set) charset.Set {
496 | 	i := decodeU8(b)
497 | 	return sets[i]
498 | }
499 | 
500 | func overlaps(i *Interval, low2, high2 int) bool {
501 | 	if i == nil {
502 | 		return true
503 | 	}
504 | 	return i.Low < high2 && i.High > low2
505 | }
506 | 
507 | func min(a, b int) int {
508 | 	if a < b {
509 | 		return a
510 | 	}
511 | 	return b
512 | }
513 | func max(a, b int) int {
514 | 	if a > b {
515 | 		return a
516 | 	}
517 | 	return b
518 | }
519 | 


--------------------------------------------------------------------------------