├── .gitignore
├── LICENCE.txt
├── Makefile
├── Readme.md
├── ast.hpp
├── docs
    ├── CHANGELOG.md
    ├── Developer_Guide.md
    ├── Egg_Grammar_Guide.md
    ├── Roadmap.md
    └── TODO.md
├── egg-bak.hpp
├── egg.egg
├── egg.hpp
├── grammars
    ├── .gitignore
    ├── Makefile
    ├── abc.egg
    ├── anbncn.egg
    ├── calc.egg
    └── tests
    │   ├── abc.in.txt
    │   ├── abc.out.txt
    │   ├── anbncn.in.txt
    │   ├── anbncn.out.txt
    │   ├── calc.in.txt
    │   └── calc.out.txt
├── main.cpp
├── parser.hpp
├── utils
    └── strings.hpp
└── visitors
    ├── compiler.hpp
    ├── normalizer.hpp
    └── printer.hpp


/.gitignore:
--------------------------------------------------------------------------------
1 | egg
2 | 


--------------------------------------------------------------------------------
/LICENCE.txt:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2013 Aaron Moss
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | of this software and associated documentation files (the "Software"), to deal
 5 | in the Software without restriction, including without limitation the rights
 6 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | copies of the Software, and to permit persons to whom the Software is
 8 | furnished to do so, subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in
11 | all copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | THE SOFTWARE.


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2013 Aaron Moss
 2 | # 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | # 
10 | # The above copyright notice and this permission notice shall be included in
11 | # all copies or substantial portions of the Software.
12 | # 
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | # THE SOFTWARE.
20 | 
21 | #CXXFLAGS = -O0 -ggdb --std=c++0x
22 | CXXFLAGS = -O0 --std=c++0x
23 | #CXXFLAGS = -O1 --std=c++0x
24 | #CXXFLAGS = -O2 --std=c++0x
25 | #CXXFLAGS = -O3 --std=c++0x
26 | 
27 | egg:  main.cpp egg.hpp parser.hpp visitors/printer.hpp visitors/compiler.hpp visitors/normalizer.hpp
28 | 	$(CXX) $(CXXFLAGS) -o egg main.cpp $(OBJS) $(LDFLAGS)
29 | 
30 | clean:  
31 | 	-rm egg
32 | 


--------------------------------------------------------------------------------
/Readme.md:
--------------------------------------------------------------------------------
 1 | # Egg Parsing Expression Grammar Generator #
 2 | 
 3 | Egg is a parser generator for parsing expression grammars (PEGs). 
 4 | Its grammar is based on the grammar of Ian Piumarta's [`leg`](http://piumarta.com/software/peg/). 
 5 | Parsing expression grammars are a formalization of recursive top-down parsers; they are similar to context free grammars, with the primary difference being that alternation in a PEG is ordered, while it is unordered in context free grammars. 
 6 | Due to this quality, a PEG-based parser generator such as Egg does not need a separate lexer, and can do parsing in one pass.
 7 | 
 8 | Egg is written in C++11, using modern constructs. 
 9 | Egg supports inclusion of C++ code inline in rules, as well as returning objects of any default-constructable type from a rule match (this type may be different for different rules).
10 | 
11 | ## Usage ##
12 | 
13 | This Readme contains only summary information, full documentation can be found in the `docs` folder; a detailed description of the grammar constructs can be found in the Egg Grammar Guide. Those who want to modify or contribute to the project should also read the Developer Guide.
14 | 
15 | ### Usage Summary ###
16 | 
17 |     egg [command] [flags] [input-file [output-file]]
18 |     
19 | Supported flags are
20 | 
21 | - `-i --input`		input file (default stdin)
22 | - `-o --output`		output file (default stdout)
23 | - `-c --command`	command - either compile or print (default compile)
24 | - `-n --name`		grammar name - if none given, takes the longest prefix of the input or output file name (output preferred) which is a valid Egg identifier (default empty)
25 | - `--no-norm`       turns off grammar normalization
26 | - `--no-memo`       turns off memoization in the generated parser
27 | 
28 | ### Grammar Summary ###
29 | 
30 | A more complete grammar may be found in the Grammar Guide, and some simple example grammars may be found in the `grammars` directory. 
31 | 
32 | - A grammar is a sequence of rules
33 | - Rules are of the form ``name (":" type)? ("`" error "`")? "%no-memo"? "=" matcher``. 
34 |   `name` may be used in other rules (or even recursively in the matcher) to match the rule; if a type is given for the rule, `name ":" id` is a matcher that will bind `id` to a variable of type `type` returned by the rule.
35 |   Rule names are composed of alphanumeric characters and underscores, where the first character may not be a digit.
36 |   If an `error` string is provided, the rule will set an "expected" message with the error string if it fails; as a shorthand, an empty error string is interpreted as the name of the rule. 
37 |   If the `%no-memo` annotation is provided, the rule will not be memoized. 
38 | - Matchers can be combined in sequence simply by writing them in sequence, `matcher_1 matcher_2`
39 | - Choice between matchers is represented as `choice_1 "|" choice_2`; this choice is _ordered_, that is, if `choice_1` matches, no attempt will be made to match `choice_2`.
40 | - Matchers can be grouped into a larger matcher by surrounding them with parentheses, `"(" matcher_1 matcher_2 ... ")"`
41 | - Matchers can be made optional by appending a `?`, repeatable by appending a `*`, or repeatable at least once by appending a `+`.
42 | - `"&" matcher` provides lookahead - the matcher will run, but no input will be consumed. 
43 |   `!` works similarly, except `"!" matcher` only matches if `matcher` _doesn't_.
44 | - Character literals and string literals are matchers for those characters or strings, and are denoted by surrounding them in single `'` or double `"` quotes, respectively. 
45 |   ''', '"', and '\' are backslash-escaped as in C, the escapes "\n", "\r", and "\t" also work.
46 | - A character class obeys the following syntax: `"[" (char_1 '-' char_2 | char)* "]"`. 
47 |   `char_1 '-' char_2` will match any character between `char_1` and `char_2`, while `char` matches the given character. 
48 |   Character classes may bind their matched character using `:` like rules.
49 | - `.` matches any character, and may be bound with `:` as well, `;` is an empty matcher that always matches without consuming any input.
50 | - An action consists of C++ code surrounded with curly braces `{ }`. 
51 |   Any C++ code that can be placed in a function is permitted, assuming that it is syntactically complete. 
52 |   Any variables bound from rule matchers are available in this code, as well as `psVal`, the return value for typed rules, and `ps`, the current parser state (`ps.posn()` is the current index, `ps.string(p,n)` is the `n` characters starting at position `p`, other public functions can be found in the Grammar Guide).
53 | - An matcher can be surrounded with angle brackets `< >` to capture the string which is matched. 
54 |   This capture matcher must be bound to a string using `:`
55 | - An expression can be given a name to be reported as "expected" if it fails by appending `` "@" "`" name "`" ``; character and string literals may have `@` prepended to set the error name to a representation of the literal.
56 | - A failure matcher takes the form `` "~" "`" message "`" ``, and always fails, reporting the given message.
57 | - One-line comments start with a `#`
58 | - Whitespace is not significant except to delimit tokens
59 | 
60 | ### Using Generated Headers ###
61 | 
62 | Egg generates C++ headers implementing the input grammar; the generated code is in a namespace which is by default the same name as the input file (less extensions and any other suffix which is not a valid Egg identifier). 
63 | These headers depend on the Egg header `parser.hpp`, which defines the `parser` namespace, and must be located in the same folder. 
64 | (At some point this header may be inlined, but I need to determine how to address the licencing considerations of this first.) 
65 | Each grammar rule generates a function with the same name; this function takes a `parser::state` reference as a parameter, and returns a boolean. 
66 | If the rule is typed, there is a second `T&` parameter `psVal`, which is the return value of the rule. 
67 | 
68 | A `parser::state` object encapsulates the current parser state. 
69 | Its constructor takes a `std::istream` reference as a parameter, which it will read from. 
70 | It exposes its current `parser::posn` position object in the stream with the methods `posn()` and `set_posn(p)`. 
71 | This position object exposes its character index, line, and column as `index()`, `line()`, and `col()`, as well defining the standard relational operators and a difference operator. 
72 | The error result from the parse can be accessed by the `err` member, of type `parser::error`; this member has a `pos` position member, and two sets of error strings `expected` (things the parser failed to parse) and `messages` (error messages set by the programmer). 
73 | `parser::state` also has a variety of public methods: `operator()` takes a position and returns the character at that position (the position can be omitted to return the character at the current position), `range(begin, len)` returns a `std::pair` of iterators pointing to the input character at position `begin` and the character at most `len` characters later, and `string(begin, len)` returns the `std::string` represented by `range(begin, len)`.
74 | 
75 | ## Installation ##
76 | 
77 | Run `make egg` from the main directory. 
78 | Make is obviously required, but the only other requirement is a relatively modern C++ compiler with support for C++11 constructs; Egg is tested on clang++ 3.3, but may work with other compilers. 
79 | Egg doesn't yet install to the system path; it's on the TODO list.
80 | 
81 | ## Testing ##
82 | 
83 | Run `make test` from the `grammars` directory. 
84 | This may result in a fair bit of output, but if the last line reads "TESTS PASSED" then they have been successful.
85 | 
86 | ## Licence ##
87 | 
88 | Egg is released under the MIT licence (see the included LICENCE file for details). 
89 | Egg-generated parsers have no licence imposed on them, but do depend on some MIT licenced headers from the project - subject to further legal and technical consideration, Egg may be modified in the future to inline these headers with a GNU Bison-style licence exception.


--------------------------------------------------------------------------------
/ast.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /*
  4 |  * Copyright (c) 2013 Aaron Moss
  5 |  * 
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to deal
  8 |  * in the Software without restriction, including without limitation the rights
  9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  * 
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  * 
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22 |  * THE SOFTWARE.
 23 |  */
 24 | 
 25 | #include <memory>
 26 | #include <string>
 27 | #include <unordered_map>
 28 | #include <vector>
 29 | 
 30 | #include "utils/strings.hpp"
 31 | 
 32 | namespace ast {
 33 | 	using std::string;
 34 | 	using std::unordered_map;
 35 | 	using std::vector;
 36 | 	using std::shared_ptr;
 37 | 
 38 | 	template<typename T, typename... Args>
 39 | 	shared_ptr<T> make_ptr(Args... args) { return std::make_shared<T>(args...); }
 40 | 
 41 | 	template<typename T, typename U>
 42 | 	shared_ptr<T> as_ptr(const shared_ptr<U>& r) { return std::static_pointer_cast<T>(r); }
 43 | 
 44 | 	/** Represents a character range. */
 45 | 	class char_range {
 46 | 	public:
 47 | 		char_range(char from, char to) : from(from), to(to) {}
 48 | 		char_range(char c) : from(c), to(c) {}
 49 | 		char_range(const char_range& o) : from(o.from), to(o.to) {}
 50 | 		char_range() : from('\0'), to('\0') {}
 51 | 
 52 | 		bool single() const { return from == to; }
 53 | 
 54 | 		char from;	/**< The first character in the range */
 55 | 		char to;	/**< The last character in the range. If this is the same 
 56 | 					 *   as the first character, represents a single character 
 57 | 					 */
 58 | 	}; /* class char_range */
 59 | 	typedef shared_ptr<char_range> char_range_ptr;
 60 | 
 61 | 	class char_matcher;
 62 | 	class str_matcher;
 63 | 	class range_matcher;
 64 | 	class rule_matcher;
 65 | 	class any_matcher;
 66 | 	class empty_matcher;
 67 | 	class action_matcher;
 68 | 	class opt_matcher;
 69 | 	class many_matcher;
 70 | 	class some_matcher;
 71 | 	class seq_matcher;
 72 | 	class alt_matcher;
 73 | 	class look_matcher;
 74 | 	class not_matcher;
 75 | 	class capt_matcher;
 76 | 	class named_matcher;
 77 | 	class fail_matcher;
 78 | 
 79 | 	/** Type of AST node. */
 80 | 	enum matcher_type {
 81 | 		char_type,
 82 | 		str_type,
 83 | 		range_type,
 84 | 		rule_type,
 85 | 		any_type,
 86 | 		empty_type,
 87 | 		action_type,
 88 | 		opt_type,
 89 | 		many_type,
 90 | 		some_type,
 91 | 		seq_type,
 92 | 		alt_type,
 93 | 		look_type,
 94 | 		not_type,
 95 | 		capt_type,
 96 | 		named_type,
 97 | 		fail_type
 98 | 	}; /* enum matcher_type */
 99 | 	
100 | 	/** Abstract base class of all matcher visitors.
101 | 	 *  Implements visitor pattern. */
102 | 	class visitor {
103 | 	public:
104 | 		virtual void visit(char_matcher&) = 0;
105 | 		virtual void visit(str_matcher&) = 0;
106 | 		virtual void visit(range_matcher&) = 0;
107 | 		virtual void visit(rule_matcher&) = 0;
108 | 		virtual void visit(any_matcher&) = 0;
109 | 		virtual void visit(empty_matcher&) = 0;
110 | 		virtual void visit(action_matcher&) = 0;
111 | 		virtual void visit(opt_matcher&) = 0;
112 | 		virtual void visit(many_matcher&) = 0;
113 | 		virtual void visit(some_matcher&) = 0;
114 | 		virtual void visit(seq_matcher&) = 0;
115 | 		virtual void visit(alt_matcher&) = 0;
116 | 		virtual void visit(look_matcher&) = 0;
117 | 		virtual void visit(not_matcher&) = 0;
118 | 		virtual void visit(capt_matcher&) = 0;
119 | 		virtual void visit(named_matcher&) = 0;
120 | 		virtual void visit(fail_matcher&) = 0;
121 | 	}; /* class visitor */
122 | 	
123 | 	/** Abstract base class of all matchers.
124 | 	 *  Implements visitor pattern. */
125 | 	class matcher {
126 | 	public:
127 | 		/** Implements visitor pattern. */
128 | 		virtual void accept(visitor*) = 0;
129 | 		/** Gets type tag. */
130 | 		virtual matcher_type type() = 0;
131 | 	}; /* class matcher */
132 | 	typedef shared_ptr<matcher> matcher_ptr;
133 | 	
134 | 	/** Matches a character literal. */
135 | 	class char_matcher : public matcher {
136 | 	public:
137 | 		char_matcher(char c) : c(c) {}
138 | 		char_matcher() : c('\0') {}
139 | 		
140 | 		void accept(visitor* v) { v->visit(*this); }
141 | 		matcher_type type() { return char_type; }
142 | 		
143 | 		char c; /**< char to match */
144 | 	}; /* class char_matcher */
145 | 	typedef shared_ptr<char_matcher> char_matcher_ptr;
146 | 
147 | 	/** Matches a string literal. */
148 | 	class str_matcher : public matcher {
149 | 	public:
150 | 		str_matcher(string s) : s(s) {}
151 | 		str_matcher() : s("") {}
152 | 
153 | 		void accept(visitor* v) { v->visit(*this); }
154 | 		matcher_type type() { return str_type; }
155 | 
156 | 		string s; /**< string to match */
157 | 	}; /* class str_matcher */
158 | 	typedef shared_ptr<str_matcher> str_matcher_ptr;
159 | 
160 | 	/** Matches a character range. */
161 | 	class range_matcher : public matcher {
162 | 	public:
163 | 		range_matcher(string var) : var(var) {}
164 | 		range_matcher() : var("") {}
165 | 
166 | 		void accept(visitor* v) { v->visit(*this); }
167 | 		matcher_type type() { return range_type; }
168 | 
169 | 		range_matcher& operator += (char_range r) { rs.push_back(r); return *this; }
170 | 
171 | 		vector<char_range> rs;  /**< contained character ranges */
172 | 		string var;             /**< variable to bind to the captured character.
173 | 		                         *   Empty if unset. */
174 | 	}; /* class range_matcher */
175 | 	typedef shared_ptr<range_matcher> range_matcher_ptr;
176 | 
177 | 	/** Matches a grammar rule invocation. */
178 | 	class rule_matcher : public matcher {
179 | 	public:
180 | 		rule_matcher(string rule) : rule(rule), var("") {}
181 | 		rule_matcher(string rule, string var) : rule(rule), var(var) {}
182 | 		rule_matcher() : rule(""), var("") {}
183 | 
184 | 		void accept(visitor* v) { v->visit(*this); }
185 | 		matcher_type type() { return rule_type; }
186 | 
187 | 		string rule;	/**< The name of the rule to match */
188 | 		string var;		/**< Variable to bind to the rule return. 
189 | 						 *   Empty if unset. */
190 | 	}; /* class rule_matcher */
191 | 	typedef shared_ptr<rule_matcher> rule_matcher_ptr;
192 | 
193 | 	/** Matches any character. */
194 | 	class any_matcher : public matcher {
195 | 	public:
196 | 		any_matcher(string var) : var(var) {}
197 | 		any_matcher() : var("") {}
198 | 
199 | 		void accept(visitor* v) { v->visit(*this); }
200 | 		matcher_type type() { return any_type; }
201 | 		
202 | 		string var;  /**< variable to bind to the captured character.
203 | 		              *   Empty if unset. */
204 | 	}; /* class any_matcher */
205 | 	typedef shared_ptr<any_matcher> any_matcher_ptr;
206 | 
207 | 	/** Always matches without consuming a character. */
208 | 	class empty_matcher : public matcher {
209 | 	public:
210 | 		empty_matcher() {}
211 | 
212 | 		void accept(visitor* v) { v->visit(*this); }
213 | 		matcher_type type() { return empty_type; }
214 | 	}; /* class empty_matcher */
215 | 	typedef shared_ptr<empty_matcher> empty_matcher_ptr;
216 | 
217 | 	/** Semantic action; not actually a matcher. */
218 | 	class action_matcher : public matcher {
219 | 	public:
220 | 		action_matcher(string a) : a(a) {}
221 | 		action_matcher() : a("") {}
222 | 
223 | 		void accept(visitor* v) { v->visit(*this); }
224 | 		matcher_type type() { return action_type; }
225 | 
226 | 		string a; /**< The string representing the action */
227 | 	}; /* class action_matcher */
228 | 	typedef shared_ptr<action_matcher> action_matcher_ptr;
229 | 
230 | 	/** An optional matcher */
231 | 	class opt_matcher : public matcher {
232 | 	public:
233 | 		opt_matcher(shared_ptr<matcher> m) : m(m) {}
234 | 		opt_matcher() {}
235 | 
236 | 		void accept(visitor* v) { v->visit(*this); }
237 | 		matcher_type type() { return opt_type; }
238 | 
239 | 		shared_ptr<matcher> m; /**< contained matcher */
240 | 	}; /* class opt_matcher */
241 | 	typedef shared_ptr<opt_matcher> opt_matcher_ptr;
242 | 
243 | 	/** Matches any number of times */
244 | 	class many_matcher : public matcher {
245 | 	public:
246 | 		many_matcher(shared_ptr<matcher> m) : m(m) {}
247 | 		many_matcher() {}
248 | 
249 | 		void accept(visitor* v) { v->visit(*this); }
250 | 		matcher_type type() { return many_type; }
251 | 
252 | 		shared_ptr<matcher> m; /**< contained matcher */
253 | 	}; /* class many_matcher */
254 | 	typedef shared_ptr<many_matcher> many_matcher_ptr;
255 | 
256 | 	/** Matches some non-zero number of times */
257 | 	class some_matcher : public matcher {
258 | 	public:
259 | 		some_matcher(shared_ptr<matcher> m) : m(m) {}
260 | 		some_matcher() {}
261 | 
262 | 		void accept(visitor* v) { v->visit(*this); }
263 | 		matcher_type type() { return some_type; }
264 | 
265 | 		shared_ptr<matcher> m; /**< contained matcher */
266 | 	}; /* class some_matcher */
267 | 	typedef shared_ptr<some_matcher> some_matcher_ptr;
268 | 
269 | 	/** Sequence of matchers. */
270 | 	class seq_matcher : public matcher {
271 | 	public:
272 | 		seq_matcher() {}
273 | 
274 | 		void accept(visitor* v) { v->visit(*this); }
275 | 		matcher_type type() { return seq_type; }
276 | 
277 | 		seq_matcher& operator += (shared_ptr<matcher> m) { ms.push_back(m); return *this; }
278 | 
279 | 		vector<shared_ptr<matcher>> ms; /**< The matchers in the sequence */
280 | 	}; /* class seq_matcher */
281 | 	typedef shared_ptr<seq_matcher> seq_matcher_ptr;
282 | 
283 | 	/** Alternation matcher. */
284 | 	class alt_matcher : public matcher {
285 | 	public:
286 | 		alt_matcher() {}
287 | 
288 | 		void accept(visitor* v) { v->visit(*this); }
289 | 		matcher_type type() { return alt_type; }
290 | 
291 | 		alt_matcher& operator += (shared_ptr<matcher> m) { ms.push_back(m); return *this; }
292 | 
293 | 		vector<shared_ptr<matcher>> ms; /**< The alternate matchers */
294 | 	}; /* class alt_matcher */
295 | 	typedef shared_ptr<alt_matcher> alt_matcher_ptr;
296 | 
297 | 	/** Lookahead matcher. */
298 | 	class look_matcher : public matcher {
299 | 	public:
300 | 		look_matcher(shared_ptr<matcher> m) : m(m) {}
301 | 		look_matcher() {}
302 | 
303 | 		void accept(visitor* v) { v->visit(*this); }
304 | 		matcher_type type() { return look_type; }
305 | 
306 | 		shared_ptr<matcher> m; /**< The matcher to check on lookahead */
307 | 	}; /* class look_matcher */
308 | 	typedef shared_ptr<look_matcher> look_matcher_ptr;
309 | 
310 | 	/** Negative lookahead matcher. */
311 | 	class not_matcher : public matcher {
312 | 	public:
313 | 		not_matcher(shared_ptr<matcher> m) : m(m) {}
314 | 		not_matcher() {}
315 | 
316 | 		void accept(visitor* v) { v->visit(*this); }
317 | 		matcher_type type() { return not_type; }
318 | 
319 | 		shared_ptr<matcher> m; /**< The matcher to check on lookahead */
320 | 	}; /* class not_matcher */
321 | 	typedef shared_ptr<not_matcher> not_matcher_ptr;
322 | 
323 | 	/** String-capturing matcher. */
324 | 	class capt_matcher : public matcher {
325 | 	public:
326 | 		capt_matcher(shared_ptr<matcher> m, string var) : m(m), var(var) {}
327 | 		capt_matcher() {}
328 | 
329 | 		void accept(visitor* v) { v->visit(*this); }
330 | 		matcher_type type() { return capt_type; }
331 | 
332 | 		shared_ptr<matcher> m; /**< Captured matcher */
333 | 		string var;            /**< Variable to bind to the captured string.
334 | 		                        *   Empty if unset. */
335 | 	}; /* class capt_matcher */
336 | 	typedef shared_ptr<capt_matcher> capt_matcher_ptr;
337 | 	
338 | 	/** Named-error matcher. */
339 | 	class named_matcher : public matcher {
340 | 	public:
341 | 		named_matcher(shared_ptr<matcher> m, string error) : m(m), error(error) {}
342 | 		named_matcher() {}
343 | 		
344 | 		void accept(visitor* v) { v->visit(*this); }
345 | 		matcher_type type() { return named_type; }
346 | 		
347 | 		shared_ptr<matcher> m;  /**< Matcher to name on failure */
348 | 		string error;           /**< Name of matcher in case of error */
349 | 	}; /* class named_matcher */
350 | 	typedef shared_ptr<named_matcher> named_matcher_ptr;
351 | 	
352 | 	/** Error matcher */
353 | 	class fail_matcher : public matcher {
354 | 	public:
355 | 		fail_matcher(string error) : error(error) {}
356 | 		fail_matcher() {}
357 | 		
358 | 		void accept(visitor* v) { v->visit(*this); }
359 | 		matcher_type type() { return fail_type; }
360 | 		
361 | 		string error;  /**< Error string to emit */
362 | 	}; /* class fail_matcher */
363 | 	typedef shared_ptr<fail_matcher> fail_matcher_ptr;
364 | 
365 | 	/** Empty visitor class; provides a default implementation of each of the 
366 | 	 *  methods. */
367 | 	class default_visitor : public visitor {
368 | 	public:
369 | 		virtual void visit(char_matcher& m) {}
370 | 		virtual void visit(str_matcher& m) {}
371 | 		virtual void visit(range_matcher& m) {}
372 | 		virtual void visit(rule_matcher& m) {}
373 | 		virtual void visit(any_matcher& m) {}
374 | 		virtual void visit(empty_matcher& m) {}
375 | 		virtual void visit(action_matcher& m) {}
376 | 		virtual void visit(opt_matcher& m) {}
377 | 		virtual void visit(many_matcher& m) {}
378 | 		virtual void visit(some_matcher& m) {}
379 | 		virtual void visit(seq_matcher& m) {}
380 | 		virtual void visit(alt_matcher& m) {}
381 | 		virtual void visit(look_matcher& m) {}
382 | 		virtual void visit(not_matcher& m) {}
383 | 		virtual void visit(capt_matcher& m) {}
384 | 		virtual void visit(named_matcher& m) {}
385 | 		virtual void visit(fail_matcher& m) {}
386 | 	}; /* class default_visitor */
387 | 	
388 | 	/** Default visitor which visits the entire tree. */
389 | 	class tree_visitor : public default_visitor {
390 | 		virtual void visit(opt_matcher& m) { m.m->accept(this); }
391 | 		virtual void visit(many_matcher& m) { m.m->accept(this); }
392 | 		virtual void visit(some_matcher& m) { m.m->accept(this); }
393 | 		virtual void visit(seq_matcher& m) {
394 | 			for (auto it = m.ms.begin(); it != m.ms.end(); ++it) {
395 | 				(*it)->accept(this);
396 | 			}
397 | 		}
398 | 		virtual void visit(alt_matcher& m) {
399 | 			for (auto it = m.ms.begin(); it != m.ms.end(); ++it) {
400 | 				(*it)->accept(this);
401 | 			}
402 | 		}
403 | 		virtual void visit(look_matcher& m) { m.m->accept(this); }
404 | 		virtual void visit(not_matcher& m) { m.m->accept(this); }
405 | 		virtual void visit(capt_matcher& m) { m.m->accept(this); }
406 | 		virtual void visit(named_matcher& m) { m.m->accept(this); }
407 | 	};
408 | 
409 | 	/** Represents a grammar rule.
410 | 	 *  Pairs a name and optional type with a matching rule. The contained rule 
411 | 	 *  will be deleted on destruction. */
412 | 	class grammar_rule {
413 | 	public:
414 | 		grammar_rule(string name) : name(name), memo(true) {}
415 | 		grammar_rule(string name, shared_ptr<matcher> m) : name(name), memo(true), m(m) {}
416 | 		grammar_rule(string name, string type, shared_ptr<matcher> m)
417 | 			: name(name), type(type), memo(true), m(m) {}
418 | 		grammar_rule(string name, string type, string error, shared_ptr<matcher> m) 
419 | 			: name(name), type(type), error(error), memo(true), m(m) {}
420 | 		grammar_rule(string name, string type, string error, bool memo, shared_ptr<matcher> m) 
421 | 			: name(name), type(type), error(error), memo(memo), m(m) {}
422 | 		grammar_rule() {}
423 | 		
424 | 		string name;            /**< Name of the grammar rule */
425 | 		string type;            /**< Type of the grammar rule's return (empty for none) */
426 | 		string error;           /**< "Expected" error if the rule doesn't match */
427 | 		bool memo;              /**< Should this rule be memoized [default true] */
428 | 		shared_ptr<matcher> m;  /**< Grammar matching rule */
429 | 	}; /* class grammar_rule */
430 | 	typedef shared_ptr<grammar_rule> grammar_rule_ptr;
431 | 
432 | 	/** Represents a Egg grammar. 
433 | 	 *  Deletes the contained grammar rules on destruction. */
434 | 	class grammar {
435 | 	public:
436 | 		grammar() {}
437 | 
438 | 		grammar& operator += (shared_ptr<grammar_rule> r) {
439 | 			rs.push_back(r);
440 | 			names.insert(std::make_pair(r->name, r));
441 | 			return *this;
442 | 		}
443 | 
444 | 		vector<shared_ptr<grammar_rule>> rs;	/**< list of grammar rules */
445 | 		unordered_map<string, shared_ptr<grammar_rule>> names;
446 | 										/**< lookup table of grammar rules by name */
447 | 		string pre, post;				/**< pre and post-actions */
448 | 	}; /* class grammar */
449 | 	typedef shared_ptr<grammar> grammar_ptr;
450 | 	
451 | } /* namespace ast */
452 | 
453 | 


--------------------------------------------------------------------------------
/docs/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog #
 2 | 
 3 | ## v0.3.1 ##
 4 | - Added memoization for repeated '*' and '+' rule
 5 | - Added new "ast::tree_visitor" default base class for tree traversals
 6 | 
 7 | ## v0.3.0 ##
 8 | 
 9 | - Added grammar rule memoization
10 | - Added "--no-memo" argument to egg and `%no-memo` rule annotation to supress memoization
11 | 
12 | ## v0.2.1 ##
13 | 
14 | - Added `@` and `~` error message inserters to Egg grammar.
15 | - Added `` `error` `` rule error names to the Egg grammar.
16 | 
17 | ## v0.2.0 ##
18 | 
19 | - Ground-up rewrite to parser code, too many breaking changes to list here (though there are now syntactic constructs for a lot of the references to parser internals in the Egg grammar, so hopefuly future breaking changes can be done more behind the scenes). 
20 | - Changed parser namespace to `parser` from `parse`.
21 | - Changed position type to `parser::posn` struct with line and column numbers.
22 | - Eliminated `parse::result<T>` type, replaced it with an extra reference parameter to nonterminals (which now return `bool`).
23 | - Added error reporting structure to parser state with "expected" and "messages" error sets (uses Ford's "last non-empty error" heuristic for merging errors).
24 | - Changed parsers to be based on parser combinators typed as `std::function<bool(parser::state&)>` - this is likely less performant, and removes the ability to usefully define the old `psStart` start position variable, but does allow quicker iteration on the codebase.
25 |   I intend to re-implement a direct code generator at some point in the future, and move the combinators off into an optional header.
26 | - Added ability to bind variables to `.` expression, character classes, and capturing matchers. 
27 |   It is now mandatory to bind a string variable to capturing matchers, this replaces the old `psCatch`, `psCatchLen`, and `psCapture` variables.
28 | 
29 | ## v0.1.0 ##
30 | 
31 | - Initial release.


--------------------------------------------------------------------------------
/docs/Developer_Guide.md:
--------------------------------------------------------------------------------
 1 | # Egg Developer Guide #
 2 | 
 3 | This document is for people who wish to modify or contribute to the Egg project; developers who wish to use Egg should read the Readme and the Grammar Guide instead. 
 4 | Developers who wish to contribute to the project should read the Readme and the Grammar Guide first.
 5 | 
 6 | ## Project Overview ##
 7 | 
 8 | Any Egg-generated parser (including the parser for Egg itself) uses the contents of the `parser` namespace from `parser.hpp` to provide a common interface and encapsulate input state. 
 9 | Egg grammars compile to headers which depend on `parser.hpp`; these generated headers define a namespace for the grammar (generally named the same as the grammar) which contains a function for each grammar rule. 
10 | These functions take a `parser::state` reference encapsulating the input state as a parameter, and return a boolean. 
11 | Rules which define a return value of type `T` have a `T&` named `psVal` passed in as their second parameter. 
12 | The Readme file and `parse.hpp` both have further information on these classes. 
13 | 
14 | The current implementation uses parser combinators of type `parser::combinator`, an alias for `std::function<bool(parser::state&)>`; there are a variety of combinators defined in `parser.hpp`. 
15 | The parser combinator style is used for ease of implementation and iteration velocity; this approach likely has performance issues, but there is a TODO item to re-implement a direct code generator to fix these.
16 | 
17 | The Egg grammar for Egg (defined in `egg.egg` and compiled to `egg.hpp`) builds an abstract syntax tree for the Egg grammar file it parses. 
18 | The AST classes are all in the `ast` namespace defined in `ast.hpp`, and use the visitor pattern. 
19 | All AST nodes inherit from `ast::matcher`, which defines a `void accept(ast::visitor*)` method. 
20 | `ast::visitor` is the abstract base class of all visitors, but the `ast::default_visitor` method is defined with empty implementations of all the methods if desired. 
21 | `ast::grammar_rule` and `ast::grammar` are not subclasses of `ast::matcher`, and must be handled differently - see `ast.hpp` for details.
22 | 
23 | Various visitors for the Egg AST are defined in the `visitors` directory. 
24 | `printer.hpp` contains `visitor::printer`, a pretty-printer for Egg grammars, `normalizer.hpp` contains `visitor::normalizer`, which performs some basic simplifications on an Egg AST, and `compiler.hpp` contains `visitor::compiler` and some related classes, which together form a code generator for compiling Egg grammars. 
25 | The Parsing Expression Grammar model that Egg uses is a formalization of recursive descent parsing, so the generated code follows this pattern. 
26 | Grammar rules are memoized by default, in an approach based on Ford's packrat parsing algorithm, an approach which trades space for execution time.
27 | 
28 | The Egg executable itself is defined in `main.cpp` in the root directory; this file is mostly concerned with command line argument parsing, and provides an executable interface to either pretty-print or compile an Egg grammar.
29 | 
30 | Finally, the `utils` directory contains some utility headers common to various parts of the project (currently just string manipulation), and the `grammars` directory contains some example grammars and tests for Egg. 
31 | These grammars include simple test harnesses in their post-action for the sake of brevity; this is not reccomended usage, as Egg is designed to generate headers, and compilers complain about the `#pragma once` directive employed in a main file. 
32 | The `grammars/tests` directory contains sample input (`*.in.txt`) and correct output (`*.out.txt`) for each grammar; these may be used for regression testing with the `test` target of `grammars/Makefile`. 
33 | 
34 | ## Contributing ##
35 | 
36 | Egg is fairly relaxed in terms of coding style - emulate the existing code and you should be fine. 
37 | As guidelines, I tend to use K&R style braces, 100 character lines, 4 character tabs for indentation and spaces after that for alignment as neccessary. 
38 | The other main quirk of my personal coding style which this project follows is placing spaces on both sides of the parentheses for `if`, `while`, and `switch` statements (but only the outside side for `for` loops).
39 | 
40 | Egg is released under a MIT licence, so naturally all contributions should be available under that licence. 
41 | 
42 | If you fix a bug please include test cases that cover it int the `test` target of `grammars/Makefile` (more tests in general are welcome). 
43 | 
44 | If you are looking for a contribution to make, the `TODO` file in this directory contains a current list of known bugs, desired features, and possible code refactorings; check it out for ideas. 


--------------------------------------------------------------------------------
/docs/Egg_Grammar_Guide.md:
--------------------------------------------------------------------------------
  1 | # Egg Parsing Expression Grammar Guide #
  2 | 
  3 | Egg is a parser generator for parsing expression grammars (PEGs). 
  4 | Its grammar is based on the grammar of Ian Piumarta's [`leg`](http://piumarta.com/software/peg/). 
  5 | Parsing expression grammars are a formalization of recursive top-down parsers; they are similar to context free grammars, with the primary difference being that alternation in a PEG is ordered, while it is unordered in context free grammars. 
  6 | Due to this property, a PEG-based parser generator such as Egg does not need a separate lexer, and can do parsing in one pass.
  7 | 
  8 | ## Matching Rules ##
  9 | 
 10 | An Egg grammar consists of a list of rules, where each rule gives an identifier to a sequence of matching statements. 
 11 | Rule identifiers consist of a letter or underscore followed by any number of further letters, digits, or underscores. 
 12 | The most basic matching statements are character and string literals, surrounded by single or double quotes, respectively; a period `.` matches any single character. 
 13 | A set of characters can be matched with a character range statement; this statement is enclosed in square brackets and contains a set of characters (or ranges of characters) to match; `[abcxyz]` and `[a-cx-z]` match the same sets of characters. 
 14 | A semicolon `;` is an empty matcher; it always matches without consuming any input; it can be safely placed at the end of any grammar rule for stylistic purposes, or used at the end of an alternation to match an empty case. 
 15 | Grammar rules can also be matched (possibly recursively) by writing their identifier. 
 16 | Matching statements can be made optional by following them with a `?`, repeatable by following them with `*`, or repeatable at least once with `+`; statements can also be grouped with parentheses. 
 17 | The `|` operator can be used to introduce alternation into grammar rules; this alternation is ordered - if a sequence is matched, then a later sequence will not be tested. As an example, consider the following two grammar rules: 
 18 | 
 19 |     g1 = ( 'a'* | "ab" ) 'c'
 20 |     g2 = ( "ab" | 'a'* ) 'c'
 21 | 
 22 | Of the above two rules, `g2` will match "abc", while `g1` will not, because the `'a'*` matcher will match, consuming the initial 'a', and then the following 'c' will not match, as the 'b' has yet to be consumed. 
 23 | 
 24 | PEGs also provide lookahead matchers, which match a given rule without consuming it; These can be constructed by prefixing a grammar rule with `&`. 
 25 | Similarly, a matcher prefixed with `!` does not consume the input, and only succeeds if the prefixed matcher doesn't match. 
 26 | These lookahead capabilities allow PEGs to match some grammars that cannot be represented by CFGs, such as the well-known a^n b^n c^n (n > 0), which can be matched by the following Egg grammar: 
 27 | 
 28 |     G = &(A 'c') 'a'+ B !.
 29 |     A = 'a' A 'b' | "ab"
 30 |     B = 'b' B 'c' | "bc"
 31 | 
 32 | A grammar rule may optionally be assigned a type by following the rule identifier with a colon and a second identifier. 
 33 | This second identifier can be a C++ type - namespaces & member typedefs are supported, as are templated classes, though pointer and reference types are not supported for implementation reasons (you may, however, use smart pointer classes). 
 34 | The type of a rule must also be default-constructable. 
 35 | The return value of this type can be accessed as the variable `psVal` in semantic actions inside the rule. 
 36 | Similarly, a matcher for a typed grammar rule can be bound to a variable by following it with a colon and a second identifer; the return value of the rule will be bound to the given variable. 
 37 | Character range matchers and the `.` any matcher may be bound to character variables using the same syntax. 
 38 | The following simple grammar functions as a basic calculator (an executable version of this calculator that also handles whitespace is available in `grammars/calc.egg`):
 39 | 
 40 |     sum : int =  prod : i { psVal = i; } ( 
 41 |                  '+' prod : i { psVal += i; } 
 42 |                  | '-' prod : i { psVal -= i; } )*
 43 |     prod : int = elem : i { psVal = i; } (
 44 |                  '*' elem : i { psVal *= i; } 
 45 |                  | '/' elem : i { psVal /= i; } )*
 46 |     elem : int = '(' sum : i ')' { psVal = i; }
 47 |                  | < '-'?[0-9]+ > : s { psVal = atoi(s.c_str()); }
 48 | 
 49 | A sequence of matching rules can also be surrounded by angle brackets `<` and `>`, denoting a capturing block; the closing bracket must be followed by a `:` bound string variable to bind the matched string to.
 50 | 
 51 | Finally, comments can be started with a `#`, they end at end-of-line.
 52 | 
 53 | ## Error Handling ##
 54 | 
 55 | Rules can be given a name for error reporting by placing an _error string_ after the rule name (and type). 
 56 | An error string begins and ends with `` ` `` characters, and may contain any character except tabs and newlines (`` ` `` and `\` must be escaped as `` \` `` and `\\`, respectively). 
 57 | If a rule that is so annotated fails to match it will add its error string to the list of "expected" messages in the parser's error object at the current position. 
 58 | As a convenience, if you specify an empty error string, the name of the rule will be used. 
 59 | 
 60 | Parsing expressions may also be followed by `@` < _error string_ >, and if the rule fails then the error string will be reported in the list of "expected" messages. 
 61 | For instance, `` ( !. . ) @`contradiction` `` would add "contradiction" to the list of expected errors when it was parsed.
 62 | As a shorthand, a character or string literal can be prefixed with `@`; this syntax sets the error string to the escaped form of the literal.
 63 | 
 64 | You may also insert error messages at the current position with a _failure expression_. 
 65 | A failure expression takes the form `` ~`...` ``, and produces a matcher which never matches, instead inserting the given string into the parser state's error object.
 66 | 
 67 | ## Semantic Actions ##
 68 | 
 69 | Egg grammars may include semantic actions in a sequence of matching rules. 
 70 | These actions are surrounded with curly braces, and will be included in the generated parser at their place of insertion. 
 71 | Semantic actions have access to any bound variables in the current rule, as well as `ps`, the parser state object, and `psVal`, the return value for a typed rule. 
 72 | A subset of the interface for the parser state variables is below:
 73 | 
 74 | - `ps` - the state object - public interface is as follows:
 75 |   - `ps.posn()` - the current parser position
 76 |     - `p.line()` - the input line of position `p`
 77 |     - `p.col()` - the input column of position `p`
 78 |     - `p.index()` - the character index of position `p`
 79 |   - `ps.err` - The error information at the current point in the parse
 80 |     - `e.pos` - The position of the error `e`
 81 |     - `e.expected` - A set of things expected at the error position
 82 |     - `e.messages` - A set of error messages at the error position
 83 |   - `ps()` - the character at the current position
 84 |   - `ps(p)` - the character at position p
 85 |   - `ps.range(p, n)` - returns a pair of iterators representing position `p` and `n` characters after position `p` (or the end of the input stream, if less than `n` characters)
 86 |   - `ps.string(p, n)` - the string represented by `ps.range(p, n)`
 87 | - `psVal` - the variable containing the return value of a typed rule; will be default-constructed by caller on its rule start.
 88 | 
 89 | You may also include a special semantic action before and after the grammar rules; these rules are delimited with `{$` and `$}` and will be placed before and after the generated rules. 
 90 | The usual `ps` variables are not defined in these actions.
 91 | 
 92 | ## Optimizations ##
 93 | 
 94 | Egg-generated parsers use a hybrid recursive-descent/packrat parsing algorithm. 
 95 | By default each rule corresponds to a memoized function which will be evaluated at most once for each position in the input, with the result of that parsing attempt stored for later attempts. 
 96 | This behaviour can be suppressed on a rule-by-rule basis by adding a `%no-memo` annotation to the rule definition before the `=`, or for the entire parser by calling egg with the `--no-memo` command line flag. 
 97 | You may wish to suppress memoization on rules that will never be retried at a given position, or for rules with large return types to avoid storing a possibly linear number of copies. 
 98 | '*' and '+' repetitive matchers are also memoized if possible; a repetitive matcher can be safely memoized if it doesn't bind any variables or include any semantic actions.
 99 | Due to the inclusion of semantic actions and arbitrary rule types, Egg-generated parsers cannot guarantee the linear time or space bounds of packrat parsers, but careful grammar design and use of `%no-memo` should address these issues in practice.
100 | 
101 | ## Egg Grammar ##
102 | 
103 | The following is an Egg grammar for Egg grammars - it is an authoritative representation of Egg syntax, and should also be an illustrative example of a moderately complex grammar (see `egg.egg` for how to build an abstract syntax tree from this grammar): 
104 | 
105 |     grammar =		_ out_action? rule+ out_action? end_of_file
106 | 
107 |     out_action =	OUT_BEGIN ( !OUT_END . )* OUT_END _
108 |     
109 |     rule =			rule_lhs choice
110 |     
111 |     rule_lhs =		identifier ( BIND type_id )? err_string? "%no-memo"? EQUAL
112 |     
113 |     identifier =	[A-Za-z_][A-Za-z_0-9]* _
114 | 
115 |     type_id =		identifier ( "::" _ type_id )* 
116 |     					( '<' _ type_id ( ',' _ type_id )* '>' _ )?
117 |     
118 |     err_string =	'`' ( "\\\\" | "\\`" | ![`\t\n\r] . )* '`' _
119 |     
120 |     choice =		sequence ( PIPE sequence )*
121 |     
122 |     sequence =		( expression | action )+
123 |     
124 |     expression =	AND primary
125 |     				| NOT primary 
126 |     				| primary ( OPT | STAR | PLUS | EXPECT err_string )? 
127 |     
128 |     primary =		!rule_lhs identifier ( BIND identifier )?
129 |     					# above rule avoids parsing rule def'n as invocation
130 |     				| OPEN choice CLOSE
131 |     				| char_literal
132 |     				| str_literal
133 |     				| char_class ( BIND identifier )?
134 |     				| ANY ( BIND identifier )?
135 |     				| EMPTY
136 |     				| BEGIN sequence END BIND identifier
137 |     				| EXPECT ( char_literal | str_literal )
138 |     				| FAIL err_string
139 |     
140 |     action =		!OUT_BEGIN '{' ( action | !'}' . )* '}' _
141 |     
142 |     char_literal =	'\'' character '\'' _
143 |     
144 |     str_literal =	'\"' character* '\"' _
145 |     
146 |     char_class =	'[' ( !']' char_range )* ']' _
147 |     
148 |     char_range =	character '-' character 
149 |     				| character
150 |     
151 |     character =		'\\' [nrt\'\"\\]
152 |     				| ![\'\"\\] .
153 |     
154 | 	OUT_BEGIN =		"{%"
155 |     OUT_END =		"%}"
156 |     BIND =			':' _
157 |     EQUAL =			'=' _
158 |     PIPE =			'|' _
159 |     AND =			'&' _
160 |     NOT =			'!' _
161 |     OPT =			'?' _
162 |     STAR =			'*' _
163 |     PLUS =			'+' _
164 |     OPEN =			'(' _
165 |     CLOSE =			')' _
166 |     ANY =			'.' _
167 |     EMPTY =			';' _
168 |     BEGIN =			'<' _
169 |     END =			'>' _
170 |     EXPECT =        '@' _
171 |     FAIL =          '~' _
172 |     
173 |     _ =		 		( space | comment )*
174 |     space =			' ' | '\t' | end_of_line
175 |     comment =		'#' ( !end_of_line . )* end_of_line
176 |     end_of_line = 	"\r\n" | '\n' | '\r'
177 |     end_of_file = 	!.
178 | 
179 | 


--------------------------------------------------------------------------------
/docs/Roadmap.md:
--------------------------------------------------------------------------------
 1 | # Roadmap #
 2 | 
 3 | ## v0.3.2 ##
 4 | 
 5 | Expected mid-November 2013
 6 | 
 7 | - Add "cut" operators to trim unneeded parser state
 8 | 
 9 | ## v0.4.0 ##
10 | 
11 | Likely December 2013
12 | 
13 | - Implement some form of automatic cut insertion.
14 | 
15 | ## v1.0.0 ##
16 | 
17 | Maybe summer 2014
18 | 
19 | - Implement all pending features, code cleanup, and bugfixes in TODO
20 | 


--------------------------------------------------------------------------------
/docs/TODO.md:
--------------------------------------------------------------------------------
 1 | ## Feature Wishlist ##
 2 | - add &{ ... } semantic predicates to the language
 3 | - add ~{ ... } failure actions to the language
 4 | - Add cut syntax
 5 | - Rewrite compiler to have a code generator again, rather than just using the combinators (investigate performance)
 6 | - Unicode string support
 7 |   - Include Unicode escapes for character literals
 8 |   - Normalize input Unicode
 9 | - Add interpreter visitor (this may be non-trivial)
10 | - Add doxygen-generated docs to the docs folder
11 | - Install to system path
12 | - Better escaping for character classes; also perhaps support for semantic tests, e.g. whitespace
13 | 
14 | ## Bugs ##
15 | - Cannot include ']' in a character class - should include an escape.
16 | - Non-syntactic '{' and '}' characters in actions (e.g. those in comments or string literals) may break the parser if unmatched.
17 | - Parens in grammar pretty-printer are not entirely correct
18 | - Actions that modify psVal rather than assigning to it may behave differently under memoization than not
19 | - Should modify "many" and "some" combinators to break their loop on empty match (i.e. silently treat the language matched by their subexpression e as _L(e) \ epsilon_)
20 | 
21 | ## Code Cleanup ##
22 | - Maybe move to `unique_ptr` from `shared_ptr`
23 | - Replace `ast::make_ptr()` and `ast::as_ptr()` with standard library equivalents
24 | - Inline parse.hpp in generated grammars
25 |   - This may have licencing ramifications - consider a Bison-style exception
26 | - Modify makefile to remake `egg.hpp` from `egg.egg` or `egg-bak.hpp` as appropriate
27 | - Move redundant checks from compiler to normalizer
28 | - Rewrite normalizer to flatten nested sequences/choices (might fail for sequences if you re-introduce psStart)
29 | - Add flag to make "#pragma once" optional in generated files
30 | - Rewrite `parser::state.matches(string)` to use the deque iterators instead of generating a second string object
31 | - Maybe make Egg-based argument parsing grammar (might be more work to make input stream that inputs (argc, argv) than it's worth)
32 | - 1-index line numbers
33 | - add hash table to consolidate memoization entries for identical '*' and '+' grammars


--------------------------------------------------------------------------------
/egg-bak.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /* THE FOLLOWING HAS BEEN AUTOMATICALLY GENERATED BY THE EGG PARSER GENERATOR.
  4 |  * DO NOT EDIT. */
  5 | 
  6 | // {%
  7 | 
  8 | /*
  9 |  * Copyright (c) 2013 Aaron Moss
 10 |  * 
 11 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 12 |  * of this software and associated documentation files (the "Software"), to deal
 13 |  * in the Software without restriction, including without limitation the rights
 14 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 15 |  * copies of the Software, and to permit persons to whom the Software is
 16 |  * furnished to do so, subject to the following conditions:
 17 |  * 
 18 |  * The above copyright notice and this permission notice shall be included in
 19 |  * all copies or substantial portions of the Software.
 20 |  * 
 21 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 22 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 24 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 25 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 26 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 27 |  * THE SOFTWARE.
 28 |  */
 29 |  
 30 | #include <string>
 31 | 
 32 | #include "ast.hpp"
 33 | #include "utils/strings.hpp"
 34 | 
 35 | // %}
 36 | 
 37 | #include <string>
 38 | #include "parser.hpp"
 39 | 
 40 | namespace egg {
 41 | 
 42 | 	bool grammar(parser::state&, ast::grammar_ptr &);
 43 | 	bool out_action(parser::state&, std::string &);
 44 | 	bool rule(parser::state&, ast::grammar_rule_ptr &);
 45 | 	bool rule_lhs(parser::state&, ast::grammar_rule_ptr &);
 46 | 	bool identifier(parser::state&, std::string &);
 47 | 	bool type_id(parser::state&, std::string &);
 48 | 	bool err_string(parser::state&, std::string &);
 49 | 	bool choice(parser::state&, ast::alt_matcher_ptr &);
 50 | 	bool sequence(parser::state&, ast::seq_matcher_ptr &);
 51 | 	bool expression(parser::state&, ast::matcher_ptr &);
 52 | 	bool primary(parser::state&, ast::matcher_ptr &);
 53 | 	bool action(parser::state&, ast::action_matcher_ptr &);
 54 | 	bool char_literal(parser::state&, ast::char_matcher_ptr &);
 55 | 	bool str_literal(parser::state&, ast::str_matcher_ptr &);
 56 | 	bool char_class(parser::state&, ast::range_matcher_ptr &);
 57 | 	bool characters(parser::state&, ast::char_range &);
 58 | 	bool character(parser::state&, char &);
 59 | 	bool OUT_BEGIN(parser::state&);
 60 | 	bool OUT_END(parser::state&);
 61 | 	bool BIND(parser::state&);
 62 | 	bool EQUAL(parser::state&);
 63 | 	bool PIPE(parser::state&);
 64 | 	bool AND(parser::state&);
 65 | 	bool NOT(parser::state&);
 66 | 	bool OPT(parser::state&);
 67 | 	bool STAR(parser::state&);
 68 | 	bool PLUS(parser::state&);
 69 | 	bool OPEN(parser::state&);
 70 | 	bool CLOSE(parser::state&);
 71 | 	bool ANY(parser::state&);
 72 | 	bool EMPTY(parser::state&);
 73 | 	bool BEGIN(parser::state&);
 74 | 	bool END(parser::state&);
 75 | 	bool EXPECT(parser::state&);
 76 | 	bool FAIL(parser::state&);
 77 | 	bool _(parser::state&);
 78 | 	bool space(parser::state&);
 79 | 	bool comment(parser::state&);
 80 | 	bool end_of_line(parser::state&);
 81 | 	bool end_of_file(parser::state&);
 82 | 
 83 | 	bool grammar(parser::state& ps, ast::grammar_ptr & psVal) {
 84 | 		ast::grammar_rule_ptr  r;
 85 | 		std::string  s;
 86 | 
 87 | 		return parser::named("grammar", 
 88 | 			parser::sequence({
 89 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::grammar>();  return true; },
 90 | 				_,
 91 | 				parser::option(
 92 | 					parser::sequence({
 93 | 						parser::bind(s, out_action),
 94 | 						[&](parser::state& ps) { psVal->pre = s;  return true; }})),
 95 | 				parser::some(
 96 | 					parser::sequence({
 97 | 						parser::bind(r, rule),
 98 | 						[&](parser::state& ps) { *psVal += r;  return true; }})),
 99 | 				parser::option(
100 | 					parser::sequence({
101 | 						parser::bind(s, out_action),
102 | 						[&](parser::state& ps) { psVal->post = s;  return true; }})),
103 | 				end_of_file}))(ps);
104 | 	}
105 | 
106 | 	bool out_action(parser::state& ps, std::string & psVal) {
107 | 		return parser::named("out action", 
108 | 			parser::sequence({
109 | 				OUT_BEGIN,
110 | 				parser::capture(psVal, parser::memoize_many(1, 
111 | 					parser::sequence({
112 | 						parser::look_not(OUT_END),
113 | 						parser::any()}))),
114 | 				OUT_END,
115 | 				_}))(ps);
116 | 	}
117 | 
118 | 	bool rule(parser::state& ps, ast::grammar_rule_ptr & psVal) {
119 | 		ast::alt_matcher_ptr  m;
120 | 
121 | 		return parser::named("rule", 
122 | 			parser::sequence({
123 | 				parser::bind(psVal, rule_lhs),
124 | 				parser::bind(m, choice),
125 | 				[&](parser::state& ps) { psVal->m = m;  return true; }}))(ps);
126 | 	}
127 | 
128 | 	bool rule_lhs(parser::state& ps, ast::grammar_rule_ptr & psVal) {
129 | 		std::string  s;
130 | 		std::string  t;
131 | 
132 | 		return parser::memoize(2, psVal, 
133 | 			parser::sequence({
134 | 				parser::bind(s, identifier),
135 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::grammar_rule>(s);  return true; },
136 | 				parser::option(
137 | 					parser::sequence({
138 | 						BIND,
139 | 						parser::bind(t, type_id),
140 | 						[&](parser::state& ps) { psVal->type = t;  return true; }})),
141 | 				parser::option(
142 | 					parser::sequence({
143 | 						parser::bind(t, err_string),
144 | 						[&](parser::state& ps) { psVal->error = t.empty() ? s : t;  return true; }})),
145 | 				parser::option(
146 | 					parser::sequence({
147 | 						parser::named("\"%no-memo\"", parser::literal("%no-memo")),
148 | 						_,
149 | 						[&](parser::state& ps) { psVal->memo = false;  return true; }})),
150 | 				EQUAL}))(ps);
151 | 	}
152 | 
153 | 	bool identifier(parser::state& ps, std::string & psVal) {
154 | 		return parser::memoize(3, psVal, parser::named("identifier", 
155 | 			parser::sequence({
156 | 				parser::capture(psVal, 
157 | 					parser::sequence({
158 | 						
159 | 							parser::choice({
160 | 								parser::between('A', 'Z'),
161 | 								parser::between('a', 'z'),
162 | 								parser::literal('_')}),
163 | 						parser::memoize_many(4, 
164 | 							parser::choice({
165 | 								parser::between('A', 'Z'),
166 | 								parser::between('a', 'z'),
167 | 								parser::literal('_'),
168 | 								parser::between('0', '9')}))})),
169 | 				_})))(ps);
170 | 	}
171 | 
172 | 	bool type_id(parser::state& ps, std::string & psVal) {
173 | 		return parser::memoize(5, psVal, parser::named("type ID", parser::capture(psVal, 
174 | 			parser::sequence({
175 | 				parser::unbind(identifier),
176 | 				parser::memoize_many(6, 
177 | 					parser::sequence({
178 | 						parser::literal("::"),
179 | 						_,
180 | 						parser::unbind(type_id)})),
181 | 				parser::option(
182 | 					parser::sequence({
183 | 						parser::literal('<'),
184 | 						_,
185 | 						parser::unbind(type_id),
186 | 						parser::memoize_many(7, 
187 | 							parser::sequence({
188 | 								parser::literal(','),
189 | 								_,
190 | 								parser::unbind(type_id)})),
191 | 						parser::literal('>'),
192 | 						_}))}))))(ps);
193 | 	}
194 | 
195 | 	bool err_string(parser::state& ps, std::string & psVal) {
196 | 		std::string s;
197 | 
198 | 		return parser::memoize(8, psVal, parser::named("error string", 
199 | 			parser::sequence({
200 | 				parser::literal('`'),
201 | 				parser::capture(s, parser::memoize_many(9, 
202 | 					parser::choice({
203 | 						parser::literal("\\\\"),
204 | 						parser::literal("\\`"),
205 | 						
206 | 							parser::sequence({
207 | 								parser::look_not(
208 | 									parser::choice({
209 | 										parser::literal('`'),
210 | 										parser::literal('\t'),
211 | 										parser::literal('\n'),
212 | 										parser::literal('\r')})),
213 | 								parser::any()})}))),
214 | 				parser::literal('`'),
215 | 				_,
216 | 				[&](parser::state& ps) { psVal = strings::unescape_error(s);  return true; }})))(ps);
217 | 	}
218 | 
219 | 	bool choice(parser::state& ps, ast::alt_matcher_ptr & psVal) {
220 | 		ast::seq_matcher_ptr  m;
221 | 
222 | 		return parser::memoize(10, psVal, 
223 | 			parser::sequence({
224 | 				parser::bind(m, sequence),
225 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::alt_matcher>(); *psVal += m;  return true; },
226 | 				parser::many(
227 | 					parser::sequence({
228 | 						PIPE,
229 | 						parser::bind(m, sequence),
230 | 						[&](parser::state& ps) { *psVal += m;  return true; }}))}))(ps);
231 | 	}
232 | 
233 | 	bool sequence(parser::state& ps, ast::seq_matcher_ptr & psVal) {
234 | 		ast::action_matcher_ptr  a;
235 | 		ast::matcher_ptr  e;
236 | 
237 | 		return parser::memoize(11, psVal, 
238 | 			parser::sequence({
239 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::seq_matcher>();  return true; },
240 | 				parser::some(
241 | 					parser::choice({
242 | 						
243 | 							parser::sequence({
244 | 								parser::bind(e, expression),
245 | 								[&](parser::state& ps) { *psVal += e;  return true; }}),
246 | 						
247 | 							parser::sequence({
248 | 								parser::bind(a, action),
249 | 								[&](parser::state& ps) { *psVal += a;  return true; }})}))}))(ps);
250 | 	}
251 | 
252 | 	bool expression(parser::state& ps, ast::matcher_ptr & psVal) {
253 | 		ast::matcher_ptr  m;
254 | 		std::string  s;
255 | 
256 | 		return parser::memoize(12, psVal, parser::named("expression", 
257 | 			parser::choice({
258 | 				
259 | 					parser::sequence({
260 | 						AND,
261 | 						parser::bind(m, primary),
262 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::look_matcher>(m);  return true; }}),
263 | 				
264 | 					parser::sequence({
265 | 						NOT,
266 | 						parser::bind(m, primary),
267 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::not_matcher>(m);  return true; }}),
268 | 				
269 | 					parser::sequence({
270 | 						parser::bind(m, primary),
271 | 						[&](parser::state& ps) { psVal = m;  return true; },
272 | 						parser::option(
273 | 							parser::choice({
274 | 								
275 | 									parser::sequence({
276 | 										OPT,
277 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::opt_matcher>(m);  return true; }}),
278 | 								
279 | 									parser::sequence({
280 | 										STAR,
281 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::many_matcher>(m);  return true; }}),
282 | 								
283 | 									parser::sequence({
284 | 										PLUS,
285 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::some_matcher>(m);  return true; }}),
286 | 								
287 | 									parser::sequence({
288 | 										EXPECT,
289 | 										parser::bind(s, err_string),
290 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::named_matcher>(m, s);  return true; }})}))})})))(ps);
291 | 	}
292 | 
293 | 	bool primary(parser::state& ps, ast::matcher_ptr & psVal) {
294 | 		ast::alt_matcher_ptr  am;
295 | 		ast::seq_matcher_ptr  bm;
296 | 		ast::char_matcher_ptr  cm;
297 | 		ast::range_matcher_ptr  rm;
298 | 		std::string  s;
299 | 		ast::str_matcher_ptr  sm;
300 | 
301 | 		return parser::memoize(13, psVal, 
302 | 			parser::choice({
303 | 				parser::named("nonterminal expression", 
304 | 					parser::sequence({
305 | 						parser::look_not(parser::unbind(rule_lhs)),
306 | 						parser::bind(s, identifier),
307 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::rule_matcher>(s);  return true; },
308 | 						parser::option(
309 | 							parser::sequence({
310 | 								BIND,
311 | 								parser::bind(s, identifier),
312 | 								[&](parser::state& ps) { ast::as_ptr<ast::rule_matcher>(psVal)->var = s;  return true; }}))})),
313 | 				parser::named("parenthesized subexpression", 
314 | 					parser::sequence({
315 | 						OPEN,
316 | 						parser::bind(am, choice),
317 | 						CLOSE,
318 | 						[&](parser::state& ps) { psVal = am;  return true; }})),
319 | 				
320 | 					parser::sequence({
321 | 						parser::bind(cm, char_literal),
322 | 						[&](parser::state& ps) { psVal = cm;  return true; }}),
323 | 				
324 | 					parser::sequence({
325 | 						parser::bind(sm, str_literal),
326 | 						[&](parser::state& ps) { psVal = sm;  return true; }}),
327 | 				
328 | 					parser::sequence({
329 | 						parser::bind(rm, char_class),
330 | 						[&](parser::state& ps) { psVal = rm;  return true; },
331 | 						parser::option(
332 | 							parser::sequence({
333 | 								BIND,
334 | 								parser::bind(s, identifier),
335 | 								[&](parser::state& ps) { ast::as_ptr<ast::range_matcher>(psVal)->var = s;  return true; }}))}),
336 | 				
337 | 					parser::sequence({
338 | 						ANY,
339 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::any_matcher>();  return true; },
340 | 						parser::option(
341 | 							parser::sequence({
342 | 								BIND,
343 | 								parser::bind(s, identifier),
344 | 								[&](parser::state& ps) { ast::as_ptr<ast::any_matcher>(psVal)->var = s;  return true; }}))}),
345 | 				
346 | 					parser::sequence({
347 | 						EMPTY,
348 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::empty_matcher>();  return true; }}),
349 | 				parser::named("capturing expression", 
350 | 					parser::sequence({
351 | 						BEGIN,
352 | 						parser::bind(bm, sequence),
353 | 						END,
354 | 						BIND,
355 | 						parser::bind(s, identifier),
356 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::capt_matcher>(bm, s);  return true; }})),
357 | 				
358 | 					parser::sequence({
359 | 						EXPECT,
360 | 						
361 | 							parser::choice({
362 | 								
363 | 									parser::sequence({
364 | 										parser::bind(cm, char_literal),
365 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::named_matcher>(cm, strings::quoted_escape(cm->c));  return true; }}),
366 | 								
367 | 									parser::sequence({
368 | 										parser::bind(sm, str_literal),
369 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::named_matcher>(sm, strings::quoted_escape(sm->s));  return true; }})})}),
370 | 				
371 | 					parser::sequence({
372 | 						FAIL,
373 | 						parser::bind(s, err_string),
374 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::fail_matcher>(s);  return true; }})}))(ps);
375 | 	}
376 | 
377 | 	bool action(parser::state& ps, ast::action_matcher_ptr & psVal) {
378 | 		std::string s;
379 | 
380 | 		return parser::memoize(14, psVal, parser::named("action", 
381 | 			parser::sequence({
382 | 				parser::look_not(OUT_BEGIN),
383 | 				parser::literal('{'),
384 | 				parser::capture(s, parser::memoize_many(15, 
385 | 					parser::choice({
386 | 						parser::unbind(action),
387 | 						
388 | 							parser::sequence({
389 | 								parser::look_not(parser::literal('}')),
390 | 								parser::any()})}))),
391 | 				parser::literal('}'),
392 | 				_,
393 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::action_matcher>(s);  return true; }})))(ps);
394 | 	}
395 | 
396 | 	bool char_literal(parser::state& ps, ast::char_matcher_ptr & psVal) {
397 | 		char  c;
398 | 
399 | 		return parser::memoize(16, psVal, parser::named("character literal", 
400 | 			parser::sequence({
401 | 				parser::literal('\''),
402 | 				parser::bind(c, character),
403 | 				parser::literal('\''),
404 | 				_,
405 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::char_matcher>(c);  return true; }})))(ps);
406 | 	}
407 | 
408 | 	bool str_literal(parser::state& ps, ast::str_matcher_ptr & psVal) {
409 | 		std::string s;
410 | 
411 | 		return parser::memoize(17, psVal, parser::named("string literal", 
412 | 			parser::sequence({
413 | 				parser::literal('\"'),
414 | 				parser::capture(s, parser::memoize_many(18, parser::unbind(character))),
415 | 				parser::literal('\"'),
416 | 				_,
417 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::str_matcher>(strings::unescape(s));  return true; }})))(ps);
418 | 	}
419 | 
420 | 	bool char_class(parser::state& ps, ast::range_matcher_ptr & psVal) {
421 | 		ast::char_range  r;
422 | 
423 | 		return parser::memoize(19, psVal, parser::named("character class", 
424 | 			parser::sequence({
425 | 				parser::literal('['),
426 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::range_matcher>();  return true; },
427 | 				parser::many(
428 | 					parser::sequence({
429 | 						parser::look_not(parser::literal(']')),
430 | 						parser::bind(r, characters),
431 | 						[&](parser::state& ps) { *psVal += r;  return true; }})),
432 | 				parser::literal(']'),
433 | 				_})))(ps);
434 | 	}
435 | 
436 | 	bool characters(parser::state& ps, ast::char_range & psVal) {
437 | 		char  c;
438 | 		char  f;
439 | 		char  t;
440 | 
441 | 		return parser::memoize(20, psVal, 
442 | 			parser::choice({
443 | 				
444 | 					parser::sequence({
445 | 						parser::bind(f, character),
446 | 						parser::literal('-'),
447 | 						parser::bind(t, character),
448 | 						[&](parser::state& ps) { psVal = ast::char_range(f,t);  return true; }}),
449 | 				
450 | 					parser::sequence({
451 | 						parser::bind(c, character),
452 | 						[&](parser::state& ps) { psVal = ast::char_range(c);  return true; }})}))(ps);
453 | 	}
454 | 
455 | 	bool character(parser::state& ps, char & psVal) {
456 | 		char c;
457 | 
458 | 		return parser::memoize(21, psVal, 
459 | 			parser::choice({
460 | 				
461 | 					parser::sequence({
462 | 						parser::literal('\\'),
463 | 						
464 | 							parser::choice({
465 | 								parser::literal('n', c),
466 | 								parser::literal('r', c),
467 | 								parser::literal('t', c),
468 | 								parser::literal('\'', c),
469 | 								parser::literal('\"', c),
470 | 								parser::literal('\\', c)}),
471 | 						[&](parser::state& ps) { psVal = strings::unescaped_char(c);  return true; }}),
472 | 				
473 | 					parser::sequence({
474 | 						parser::look_not(
475 | 							parser::choice({
476 | 								parser::literal('\''),
477 | 								parser::literal('\"'),
478 | 								parser::literal('\\')})),
479 | 						parser::any(psVal)})}))(ps);
480 | 	}
481 | 
482 | 	bool OUT_BEGIN(parser::state& ps) {
483 | 		return parser::memoize(22, parser::named("\"{%\"", parser::literal("{%")))(ps);
484 | 	}
485 | 
486 | 	bool OUT_END(parser::state& ps) {
487 | 		return parser::memoize(23, parser::named("\"%}\"", parser::literal("%}")))(ps);
488 | 	}
489 | 
490 | 	bool BIND(parser::state& ps) {
491 | 		return parser::memoize(24, 
492 | 			parser::sequence({
493 | 				parser::named("\':\'", parser::literal(':')),
494 | 				_}))(ps);
495 | 	}
496 | 
497 | 	bool EQUAL(parser::state& ps) {
498 | 		return parser::memoize(25, 
499 | 			parser::sequence({
500 | 				parser::named("\'=\'", parser::literal('=')),
501 | 				_}))(ps);
502 | 	}
503 | 
504 | 	bool PIPE(parser::state& ps) {
505 | 		return parser::memoize(26, 
506 | 			parser::sequence({
507 | 				parser::named("\'|\'", parser::literal('|')),
508 | 				_}))(ps);
509 | 	}
510 | 
511 | 	bool AND(parser::state& ps) {
512 | 		return parser::memoize(27, 
513 | 			parser::sequence({
514 | 				parser::named("\'&\'", parser::literal('&')),
515 | 				_}))(ps);
516 | 	}
517 | 
518 | 	bool NOT(parser::state& ps) {
519 | 		return parser::memoize(28, 
520 | 			parser::sequence({
521 | 				parser::named("\'!\'", parser::literal('!')),
522 | 				_}))(ps);
523 | 	}
524 | 
525 | 	bool OPT(parser::state& ps) {
526 | 		return parser::memoize(29, 
527 | 			parser::sequence({
528 | 				parser::named("\'?\'", parser::literal('?')),
529 | 				_}))(ps);
530 | 	}
531 | 
532 | 	bool STAR(parser::state& ps) {
533 | 		return parser::memoize(30, 
534 | 			parser::sequence({
535 | 				parser::named("\'*\'", parser::literal('*')),
536 | 				_}))(ps);
537 | 	}
538 | 
539 | 	bool PLUS(parser::state& ps) {
540 | 		return parser::memoize(31, 
541 | 			parser::sequence({
542 | 				parser::named("\'+\'", parser::literal('+')),
543 | 				_}))(ps);
544 | 	}
545 | 
546 | 	bool OPEN(parser::state& ps) {
547 | 		return parser::memoize(32, 
548 | 			parser::sequence({
549 | 				parser::named("\'(\'", parser::literal('(')),
550 | 				_}))(ps);
551 | 	}
552 | 
553 | 	bool CLOSE(parser::state& ps) {
554 | 		return parser::memoize(33, 
555 | 			parser::sequence({
556 | 				parser::named("\')\'", parser::literal(')')),
557 | 				_}))(ps);
558 | 	}
559 | 
560 | 	bool ANY(parser::state& ps) {
561 | 		return parser::memoize(34, 
562 | 			parser::sequence({
563 | 				parser::named("\'.\'", parser::literal('.')),
564 | 				_}))(ps);
565 | 	}
566 | 
567 | 	bool EMPTY(parser::state& ps) {
568 | 		return parser::memoize(35, 
569 | 			parser::sequence({
570 | 				parser::named("\';\'", parser::literal(';')),
571 | 				_}))(ps);
572 | 	}
573 | 
574 | 	bool BEGIN(parser::state& ps) {
575 | 		return parser::memoize(36, 
576 | 			parser::sequence({
577 | 				parser::named("\'<\'", parser::literal('<')),
578 | 				_}))(ps);
579 | 	}
580 | 
581 | 	bool END(parser::state& ps) {
582 | 		return parser::memoize(37, 
583 | 			parser::sequence({
584 | 				parser::named("\'>\'", parser::literal('>')),
585 | 				_}))(ps);
586 | 	}
587 | 
588 | 	bool EXPECT(parser::state& ps) {
589 | 		return parser::memoize(38, 
590 | 			parser::sequence({
591 | 				parser::named("\'@\'", parser::literal('@')),
592 | 				_}))(ps);
593 | 	}
594 | 
595 | 	bool FAIL(parser::state& ps) {
596 | 		return parser::memoize(39, 
597 | 			parser::sequence({
598 | 				parser::named("\'~\'", parser::literal('~')),
599 | 				_}))(ps);
600 | 	}
601 | 
602 | 	bool _(parser::state& ps) {
603 | 		return parser::memoize(40, parser::memoize_many(41, 
604 | 			parser::choice({
605 | 				space,
606 | 				comment})))(ps);
607 | 	}
608 | 
609 | 	bool space(parser::state& ps) {
610 | 		return parser::memoize(42, 
611 | 			parser::choice({
612 | 				parser::literal(' '),
613 | 				parser::literal('\t'),
614 | 				end_of_line}))(ps);
615 | 	}
616 | 
617 | 	bool comment(parser::state& ps) {
618 | 		return parser::memoize(43, 
619 | 			parser::sequence({
620 | 				parser::literal('#'),
621 | 				parser::memoize_many(44, 
622 | 					parser::sequence({
623 | 						parser::look_not(end_of_line),
624 | 						parser::any()})),
625 | 				end_of_line}))(ps);
626 | 	}
627 | 
628 | 	bool end_of_line(parser::state& ps) {
629 | 		return parser::memoize(45, 
630 | 			parser::choice({
631 | 				parser::literal("\r\n"),
632 | 				parser::literal('\n'),
633 | 				parser::literal('\r')}))(ps);
634 | 	}
635 | 
636 | 	bool end_of_file(parser::state& ps) {
637 | 		return parser::memoize(46, parser::named("end of input", parser::look_not(parser::any())))(ps);
638 | 	}
639 | 
640 | } // namespace egg
641 | 
642 | 


--------------------------------------------------------------------------------
/egg.egg:
--------------------------------------------------------------------------------
  1 | # Egg grammar for Egg grammars.
  2 | #
  3 | # Author: Aaron Moss
  4 | 
  5 | {%
  6 | /*
  7 |  * Copyright (c) 2013 Aaron Moss
  8 |  * 
  9 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 10 |  * of this software and associated documentation files (the "Software"), to deal
 11 |  * in the Software without restriction, including without limitation the rights
 12 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 13 |  * copies of the Software, and to permit persons to whom the Software is
 14 |  * furnished to do so, subject to the following conditions:
 15 |  * 
 16 |  * The above copyright notice and this permission notice shall be included in
 17 |  * all copies or substantial portions of the Software.
 18 |  * 
 19 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 20 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 21 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 22 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 23 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 24 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 25 |  * THE SOFTWARE.
 26 |  */
 27 |  
 28 | #include <string>
 29 | 
 30 | #include "ast.hpp"
 31 | #include "utils/strings.hpp"
 32 | %}
 33 | 
 34 | grammar: ast::grammar_ptr `` %no-memo =
 35 | 		{ psVal = ast::make_ptr<ast::grammar>(); }
 36 | 			_ (out_action : s { psVal->pre = s; })? 
 37 | 			(rule : r { *psVal += r; })+ 
 38 | 			(out_action : s { psVal->post = s; } )? end_of_file
 39 | 
 40 | out_action: std::string `out action` %no-memo =
 41 | 		OUT_BEGIN < ( !OUT_END . )* > : psVal OUT_END _
 42 | 
 43 | rule: ast::grammar_rule_ptr `` %no-memo =
 44 | 		rule_lhs : psVal choice : m { psVal->m = m; }
 45 | 
 46 | rule_lhs: ast::grammar_rule_ptr = 
 47 | 		identifier : s { psVal = ast::make_ptr<ast::grammar_rule>(s); } 
 48 | 			( BIND type_id : t { psVal->type = t; } )? 
 49 | 			( err_string : t { psVal->error = t.empty() ? s : t; } )?
 50 | 			( @"%no-memo" _ { psVal->memo = false; } )?
 51 | 			EQUAL
 52 | 
 53 | identifier: std::string `` =
 54 | 		< [A-Za-z_][A-Za-z_0-9]* > : psVal _
 55 | 
 56 | type_id: std::string `type ID` =
 57 | 		< identifier ( "::" _ type_id )* 
 58 | 			( '<' _ type_id ( ',' _ type_id )* '>' _ )? > : psVal
 59 | 
 60 | err_string: std::string `error string` = 
 61 | 		'`' < ( "\\\\" | "\\`" | ![`\t\n\r] . )* > : s '`' _
 62 | 			{ psVal = strings::unescape_error(s); }
 63 | 
 64 | choice: ast::alt_matcher_ptr =
 65 | 		sequence : m { psVal = ast::make_ptr<ast::alt_matcher>(); *psVal += m; } 
 66 | 			( PIPE sequence : m { *psVal += m; } )*
 67 | 
 68 | sequence: ast::seq_matcher_ptr =
 69 | 		{ psVal = ast::make_ptr<ast::seq_matcher>(); }
 70 | 			( expression : e { *psVal += e; } | action : a { *psVal += a; } )+
 71 | 
 72 | expression: ast::matcher_ptr `` =
 73 | 		AND primary : m { psVal = ast::make_ptr<ast::look_matcher>(m); }
 74 | 		| NOT primary : m { psVal = ast::make_ptr<ast::not_matcher>(m); }
 75 | 		| primary : m { psVal = m; } ( 
 76 | 			OPT { psVal = ast::make_ptr<ast::opt_matcher>(m); }
 77 | 			| STAR { psVal = ast::make_ptr<ast::many_matcher>(m); }
 78 | 			| PLUS { psVal = ast::make_ptr<ast::some_matcher>(m); }
 79 | 			| EXPECT err_string : s { psVal = ast::make_ptr<ast::named_matcher>(m, s); } )?
 80 | 
 81 | primary: ast::matcher_ptr =
 82 | 		( !rule_lhs identifier : s  # Make sure to not match next rule definition
 83 | 			{ psVal = ast::make_ptr<ast::rule_matcher>(s); } 
 84 | 			( BIND identifier : s 
 85 | 				{ ast::as_ptr<ast::rule_matcher>(psVal)->var = s; } )? )@`nonterminal expression`
 86 | 		| ( OPEN choice : am CLOSE { psVal = am; } )@`parenthesized subexpression`
 87 | 		| char_literal : cm { psVal = cm; }
 88 | 		| str_literal : sm { psVal = sm; }
 89 | 		| char_class : rm { psVal = rm; }
 90 | 		    ( BIND identifier : s 
 91 | 		        { ast::as_ptr<ast::range_matcher>(psVal)->var = s; } )?
 92 | 		| ANY { psVal = ast::make_ptr<ast::any_matcher>(); }
 93 | 		    ( BIND identifier : s 
 94 | 		        { ast::as_ptr<ast::any_matcher>(psVal)->var = s; } )?
 95 | 		| EMPTY { psVal = ast::make_ptr<ast::empty_matcher>(); }
 96 | 		| ( BEGIN sequence : bm END BIND identifier : s 
 97 | 		    { psVal = ast::make_ptr<ast::capt_matcher>(bm, s); } )@`capturing expression`
 98 | 		| EXPECT ( 
 99 | 		    char_literal : cm 
100 | 		        { psVal = ast::make_ptr<ast::named_matcher>(cm, strings::quoted_escape(cm->c)); }
101 | 		    | str_literal : sm 
102 | 		        { psVal = ast::make_ptr<ast::named_matcher>(sm, strings::quoted_escape(sm->s)); } )
103 | 		| FAIL err_string : s { psVal = ast::make_ptr<ast::fail_matcher>(s); }
104 | 
105 | action: ast::action_matcher_ptr `` =
106 | 		!OUT_BEGIN '{' < ( action | !'}' . )* > : s '}' _ 
107 | 			{ psVal = ast::make_ptr<ast::action_matcher>(s); }
108 | 
109 | char_literal: ast::char_matcher_ptr `character literal` =
110 | 		'\'' character : c '\'' _
111 | 			{ psVal = ast::make_ptr<ast::char_matcher>(c); }
112 | 
113 | str_literal: ast::str_matcher_ptr `string literal` =
114 | 		'\"' < character* > : s '\"' _
115 | 			{ psVal = ast::make_ptr<ast::str_matcher>(strings::unescape(s)); }
116 | 
117 | char_class: ast::range_matcher_ptr `character class` =
118 | 		'[' { psVal = ast::make_ptr<ast::range_matcher>(); } 
119 | 			( !']' characters : r { *psVal += r; } )* ']' _
120 | 
121 | characters: ast::char_range =
122 | 		character : f '-' character : t { psVal = ast::char_range(f,t); }
123 | 		| character : c { psVal = ast::char_range(c); }
124 | 
125 | character: char =
126 | 		'\\' [nrt\'\"\\] : c { psVal = strings::unescaped_char(c); }
127 | 		| ![\'\"\\] . : psVal
128 | 
129 | OUT_BEGIN =		@"{%"
130 | OUT_END =		@"%}"
131 | BIND =			@':' _
132 | EQUAL =			@'=' _
133 | PIPE =			@'|' _
134 | AND =			@'&' _
135 | NOT =			@'!' _
136 | OPT =			@'?' _
137 | STAR =			@'*' _
138 | PLUS =			@'+' _
139 | OPEN =			@'(' _
140 | CLOSE =			@')' _
141 | ANY =			@'.' _
142 | EMPTY =			@';' _
143 | BEGIN =			@'<' _
144 | END =			@'>' _
145 | EXPECT =		@'@' _
146 | FAIL =			@'~' _
147 | 
148 | _ =		 		( space | comment )*
149 | space =			' ' | '\t' | end_of_line
150 | comment =		'#' ( !end_of_line . )* end_of_line
151 | end_of_line = 	"\r\n" | '\n' | '\r'
152 | 
153 | end_of_file `end of input` = !.
154 | 
155 | 


--------------------------------------------------------------------------------
/egg.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /* THE FOLLOWING HAS BEEN AUTOMATICALLY GENERATED BY THE EGG PARSER GENERATOR.
  4 |  * DO NOT EDIT. */
  5 | 
  6 | // {%
  7 | 
  8 | /*
  9 |  * Copyright (c) 2013 Aaron Moss
 10 |  * 
 11 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
 12 |  * of this software and associated documentation files (the "Software"), to deal
 13 |  * in the Software without restriction, including without limitation the rights
 14 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 15 |  * copies of the Software, and to permit persons to whom the Software is
 16 |  * furnished to do so, subject to the following conditions:
 17 |  * 
 18 |  * The above copyright notice and this permission notice shall be included in
 19 |  * all copies or substantial portions of the Software.
 20 |  * 
 21 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 22 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 23 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 24 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 25 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 26 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 27 |  * THE SOFTWARE.
 28 |  */
 29 |  
 30 | #include <string>
 31 | 
 32 | #include "ast.hpp"
 33 | #include "utils/strings.hpp"
 34 | 
 35 | // %}
 36 | 
 37 | #include <string>
 38 | #include "parser.hpp"
 39 | 
 40 | namespace egg {
 41 | 
 42 | 	bool grammar(parser::state&, ast::grammar_ptr &);
 43 | 	bool out_action(parser::state&, std::string &);
 44 | 	bool rule(parser::state&, ast::grammar_rule_ptr &);
 45 | 	bool rule_lhs(parser::state&, ast::grammar_rule_ptr &);
 46 | 	bool identifier(parser::state&, std::string &);
 47 | 	bool type_id(parser::state&, std::string &);
 48 | 	bool err_string(parser::state&, std::string &);
 49 | 	bool choice(parser::state&, ast::alt_matcher_ptr &);
 50 | 	bool sequence(parser::state&, ast::seq_matcher_ptr &);
 51 | 	bool expression(parser::state&, ast::matcher_ptr &);
 52 | 	bool primary(parser::state&, ast::matcher_ptr &);
 53 | 	bool action(parser::state&, ast::action_matcher_ptr &);
 54 | 	bool char_literal(parser::state&, ast::char_matcher_ptr &);
 55 | 	bool str_literal(parser::state&, ast::str_matcher_ptr &);
 56 | 	bool char_class(parser::state&, ast::range_matcher_ptr &);
 57 | 	bool characters(parser::state&, ast::char_range &);
 58 | 	bool character(parser::state&, char &);
 59 | 	bool OUT_BEGIN(parser::state&);
 60 | 	bool OUT_END(parser::state&);
 61 | 	bool BIND(parser::state&);
 62 | 	bool EQUAL(parser::state&);
 63 | 	bool PIPE(parser::state&);
 64 | 	bool AND(parser::state&);
 65 | 	bool NOT(parser::state&);
 66 | 	bool OPT(parser::state&);
 67 | 	bool STAR(parser::state&);
 68 | 	bool PLUS(parser::state&);
 69 | 	bool OPEN(parser::state&);
 70 | 	bool CLOSE(parser::state&);
 71 | 	bool ANY(parser::state&);
 72 | 	bool EMPTY(parser::state&);
 73 | 	bool BEGIN(parser::state&);
 74 | 	bool END(parser::state&);
 75 | 	bool EXPECT(parser::state&);
 76 | 	bool FAIL(parser::state&);
 77 | 	bool _(parser::state&);
 78 | 	bool space(parser::state&);
 79 | 	bool comment(parser::state&);
 80 | 	bool end_of_line(parser::state&);
 81 | 	bool end_of_file(parser::state&);
 82 | 
 83 | 	bool grammar(parser::state& ps, ast::grammar_ptr & psVal) {
 84 | 		ast::grammar_rule_ptr  r;
 85 | 		std::string  s;
 86 | 
 87 | 		return parser::named("grammar", 
 88 | 			parser::sequence({
 89 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::grammar>();  return true; },
 90 | 				_,
 91 | 				parser::option(
 92 | 					parser::sequence({
 93 | 						parser::bind(s, out_action),
 94 | 						[&](parser::state& ps) { psVal->pre = s;  return true; }})),
 95 | 				parser::some(
 96 | 					parser::sequence({
 97 | 						parser::bind(r, rule),
 98 | 						[&](parser::state& ps) { *psVal += r;  return true; }})),
 99 | 				parser::option(
100 | 					parser::sequence({
101 | 						parser::bind(s, out_action),
102 | 						[&](parser::state& ps) { psVal->post = s;  return true; }})),
103 | 				end_of_file}))(ps);
104 | 	}
105 | 
106 | 	bool out_action(parser::state& ps, std::string & psVal) {
107 | 		return parser::named("out action", 
108 | 			parser::sequence({
109 | 				OUT_BEGIN,
110 | 				parser::capture(psVal, parser::memoize_many(1, 
111 | 					parser::sequence({
112 | 						parser::look_not(OUT_END),
113 | 						parser::any()}))),
114 | 				OUT_END,
115 | 				_}))(ps);
116 | 	}
117 | 
118 | 	bool rule(parser::state& ps, ast::grammar_rule_ptr & psVal) {
119 | 		ast::alt_matcher_ptr  m;
120 | 
121 | 		return parser::named("rule", 
122 | 			parser::sequence({
123 | 				parser::bind(psVal, rule_lhs),
124 | 				parser::bind(m, choice),
125 | 				[&](parser::state& ps) { psVal->m = m;  return true; }}))(ps);
126 | 	}
127 | 
128 | 	bool rule_lhs(parser::state& ps, ast::grammar_rule_ptr & psVal) {
129 | 		std::string  s;
130 | 		std::string  t;
131 | 
132 | 		return parser::memoize(2, psVal, 
133 | 			parser::sequence({
134 | 				parser::bind(s, identifier),
135 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::grammar_rule>(s);  return true; },
136 | 				parser::option(
137 | 					parser::sequence({
138 | 						BIND,
139 | 						parser::bind(t, type_id),
140 | 						[&](parser::state& ps) { psVal->type = t;  return true; }})),
141 | 				parser::option(
142 | 					parser::sequence({
143 | 						parser::bind(t, err_string),
144 | 						[&](parser::state& ps) { psVal->error = t.empty() ? s : t;  return true; }})),
145 | 				parser::option(
146 | 					parser::sequence({
147 | 						parser::named("\"%no-memo\"", parser::literal("%no-memo")),
148 | 						_,
149 | 						[&](parser::state& ps) { psVal->memo = false;  return true; }})),
150 | 				EQUAL}))(ps);
151 | 	}
152 | 
153 | 	bool identifier(parser::state& ps, std::string & psVal) {
154 | 		return parser::memoize(3, psVal, parser::named("identifier", 
155 | 			parser::sequence({
156 | 				parser::capture(psVal, 
157 | 					parser::sequence({
158 | 						
159 | 							parser::choice({
160 | 								parser::between('A', 'Z'),
161 | 								parser::between('a', 'z'),
162 | 								parser::literal('_')}),
163 | 						parser::memoize_many(4, 
164 | 							parser::choice({
165 | 								parser::between('A', 'Z'),
166 | 								parser::between('a', 'z'),
167 | 								parser::literal('_'),
168 | 								parser::between('0', '9')}))})),
169 | 				_})))(ps);
170 | 	}
171 | 
172 | 	bool type_id(parser::state& ps, std::string & psVal) {
173 | 		return parser::memoize(5, psVal, parser::named("type ID", parser::capture(psVal, 
174 | 			parser::sequence({
175 | 				parser::unbind(identifier),
176 | 				parser::memoize_many(6, 
177 | 					parser::sequence({
178 | 						parser::literal("::"),
179 | 						_,
180 | 						parser::unbind(type_id)})),
181 | 				parser::option(
182 | 					parser::sequence({
183 | 						parser::literal('<'),
184 | 						_,
185 | 						parser::unbind(type_id),
186 | 						parser::memoize_many(7, 
187 | 							parser::sequence({
188 | 								parser::literal(','),
189 | 								_,
190 | 								parser::unbind(type_id)})),
191 | 						parser::literal('>'),
192 | 						_}))}))))(ps);
193 | 	}
194 | 
195 | 	bool err_string(parser::state& ps, std::string & psVal) {
196 | 		std::string s;
197 | 
198 | 		return parser::memoize(8, psVal, parser::named("error string", 
199 | 			parser::sequence({
200 | 				parser::literal('`'),
201 | 				parser::capture(s, parser::memoize_many(9, 
202 | 					parser::choice({
203 | 						parser::literal("\\\\"),
204 | 						parser::literal("\\`"),
205 | 						
206 | 							parser::sequence({
207 | 								parser::look_not(
208 | 									parser::choice({
209 | 										parser::literal('`'),
210 | 										parser::literal('\t'),
211 | 										parser::literal('\n'),
212 | 										parser::literal('\r')})),
213 | 								parser::any()})}))),
214 | 				parser::literal('`'),
215 | 				_,
216 | 				[&](parser::state& ps) { psVal = strings::unescape_error(s);  return true; }})))(ps);
217 | 	}
218 | 
219 | 	bool choice(parser::state& ps, ast::alt_matcher_ptr & psVal) {
220 | 		ast::seq_matcher_ptr  m;
221 | 
222 | 		return parser::memoize(10, psVal, 
223 | 			parser::sequence({
224 | 				parser::bind(m, sequence),
225 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::alt_matcher>(); *psVal += m;  return true; },
226 | 				parser::many(
227 | 					parser::sequence({
228 | 						PIPE,
229 | 						parser::bind(m, sequence),
230 | 						[&](parser::state& ps) { *psVal += m;  return true; }}))}))(ps);
231 | 	}
232 | 
233 | 	bool sequence(parser::state& ps, ast::seq_matcher_ptr & psVal) {
234 | 		ast::action_matcher_ptr  a;
235 | 		ast::matcher_ptr  e;
236 | 
237 | 		return parser::memoize(11, psVal, 
238 | 			parser::sequence({
239 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::seq_matcher>();  return true; },
240 | 				parser::some(
241 | 					parser::choice({
242 | 						
243 | 							parser::sequence({
244 | 								parser::bind(e, expression),
245 | 								[&](parser::state& ps) { *psVal += e;  return true; }}),
246 | 						
247 | 							parser::sequence({
248 | 								parser::bind(a, action),
249 | 								[&](parser::state& ps) { *psVal += a;  return true; }})}))}))(ps);
250 | 	}
251 | 
252 | 	bool expression(parser::state& ps, ast::matcher_ptr & psVal) {
253 | 		ast::matcher_ptr  m;
254 | 		std::string  s;
255 | 
256 | 		return parser::memoize(12, psVal, parser::named("expression", 
257 | 			parser::choice({
258 | 				
259 | 					parser::sequence({
260 | 						AND,
261 | 						parser::bind(m, primary),
262 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::look_matcher>(m);  return true; }}),
263 | 				
264 | 					parser::sequence({
265 | 						NOT,
266 | 						parser::bind(m, primary),
267 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::not_matcher>(m);  return true; }}),
268 | 				
269 | 					parser::sequence({
270 | 						parser::bind(m, primary),
271 | 						[&](parser::state& ps) { psVal = m;  return true; },
272 | 						parser::option(
273 | 							parser::choice({
274 | 								
275 | 									parser::sequence({
276 | 										OPT,
277 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::opt_matcher>(m);  return true; }}),
278 | 								
279 | 									parser::sequence({
280 | 										STAR,
281 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::many_matcher>(m);  return true; }}),
282 | 								
283 | 									parser::sequence({
284 | 										PLUS,
285 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::some_matcher>(m);  return true; }}),
286 | 								
287 | 									parser::sequence({
288 | 										EXPECT,
289 | 										parser::bind(s, err_string),
290 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::named_matcher>(m, s);  return true; }})}))})})))(ps);
291 | 	}
292 | 
293 | 	bool primary(parser::state& ps, ast::matcher_ptr & psVal) {
294 | 		ast::alt_matcher_ptr  am;
295 | 		ast::seq_matcher_ptr  bm;
296 | 		ast::char_matcher_ptr  cm;
297 | 		ast::range_matcher_ptr  rm;
298 | 		std::string  s;
299 | 		ast::str_matcher_ptr  sm;
300 | 
301 | 		return parser::memoize(13, psVal, 
302 | 			parser::choice({
303 | 				parser::named("nonterminal expression", 
304 | 					parser::sequence({
305 | 						parser::look_not(parser::unbind(rule_lhs)),
306 | 						parser::bind(s, identifier),
307 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::rule_matcher>(s);  return true; },
308 | 						parser::option(
309 | 							parser::sequence({
310 | 								BIND,
311 | 								parser::bind(s, identifier),
312 | 								[&](parser::state& ps) { ast::as_ptr<ast::rule_matcher>(psVal)->var = s;  return true; }}))})),
313 | 				parser::named("parenthesized subexpression", 
314 | 					parser::sequence({
315 | 						OPEN,
316 | 						parser::bind(am, choice),
317 | 						CLOSE,
318 | 						[&](parser::state& ps) { psVal = am;  return true; }})),
319 | 				
320 | 					parser::sequence({
321 | 						parser::bind(cm, char_literal),
322 | 						[&](parser::state& ps) { psVal = cm;  return true; }}),
323 | 				
324 | 					parser::sequence({
325 | 						parser::bind(sm, str_literal),
326 | 						[&](parser::state& ps) { psVal = sm;  return true; }}),
327 | 				
328 | 					parser::sequence({
329 | 						parser::bind(rm, char_class),
330 | 						[&](parser::state& ps) { psVal = rm;  return true; },
331 | 						parser::option(
332 | 							parser::sequence({
333 | 								BIND,
334 | 								parser::bind(s, identifier),
335 | 								[&](parser::state& ps) { ast::as_ptr<ast::range_matcher>(psVal)->var = s;  return true; }}))}),
336 | 				
337 | 					parser::sequence({
338 | 						ANY,
339 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::any_matcher>();  return true; },
340 | 						parser::option(
341 | 							parser::sequence({
342 | 								BIND,
343 | 								parser::bind(s, identifier),
344 | 								[&](parser::state& ps) { ast::as_ptr<ast::any_matcher>(psVal)->var = s;  return true; }}))}),
345 | 				
346 | 					parser::sequence({
347 | 						EMPTY,
348 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::empty_matcher>();  return true; }}),
349 | 				parser::named("capturing expression", 
350 | 					parser::sequence({
351 | 						BEGIN,
352 | 						parser::bind(bm, sequence),
353 | 						END,
354 | 						BIND,
355 | 						parser::bind(s, identifier),
356 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::capt_matcher>(bm, s);  return true; }})),
357 | 				
358 | 					parser::sequence({
359 | 						EXPECT,
360 | 						
361 | 							parser::choice({
362 | 								
363 | 									parser::sequence({
364 | 										parser::bind(cm, char_literal),
365 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::named_matcher>(cm, strings::quoted_escape(cm->c));  return true; }}),
366 | 								
367 | 									parser::sequence({
368 | 										parser::bind(sm, str_literal),
369 | 										[&](parser::state& ps) { psVal = ast::make_ptr<ast::named_matcher>(sm, strings::quoted_escape(sm->s));  return true; }})})}),
370 | 				
371 | 					parser::sequence({
372 | 						FAIL,
373 | 						parser::bind(s, err_string),
374 | 						[&](parser::state& ps) { psVal = ast::make_ptr<ast::fail_matcher>(s);  return true; }})}))(ps);
375 | 	}
376 | 
377 | 	bool action(parser::state& ps, ast::action_matcher_ptr & psVal) {
378 | 		std::string s;
379 | 
380 | 		return parser::memoize(14, psVal, parser::named("action", 
381 | 			parser::sequence({
382 | 				parser::look_not(OUT_BEGIN),
383 | 				parser::literal('{'),
384 | 				parser::capture(s, parser::memoize_many(15, 
385 | 					parser::choice({
386 | 						parser::unbind(action),
387 | 						
388 | 							parser::sequence({
389 | 								parser::look_not(parser::literal('}')),
390 | 								parser::any()})}))),
391 | 				parser::literal('}'),
392 | 				_,
393 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::action_matcher>(s);  return true; }})))(ps);
394 | 	}
395 | 
396 | 	bool char_literal(parser::state& ps, ast::char_matcher_ptr & psVal) {
397 | 		char  c;
398 | 
399 | 		return parser::memoize(16, psVal, parser::named("character literal", 
400 | 			parser::sequence({
401 | 				parser::literal('\''),
402 | 				parser::bind(c, character),
403 | 				parser::literal('\''),
404 | 				_,
405 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::char_matcher>(c);  return true; }})))(ps);
406 | 	}
407 | 
408 | 	bool str_literal(parser::state& ps, ast::str_matcher_ptr & psVal) {
409 | 		std::string s;
410 | 
411 | 		return parser::memoize(17, psVal, parser::named("string literal", 
412 | 			parser::sequence({
413 | 				parser::literal('\"'),
414 | 				parser::capture(s, parser::memoize_many(18, parser::unbind(character))),
415 | 				parser::literal('\"'),
416 | 				_,
417 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::str_matcher>(strings::unescape(s));  return true; }})))(ps);
418 | 	}
419 | 
420 | 	bool char_class(parser::state& ps, ast::range_matcher_ptr & psVal) {
421 | 		ast::char_range  r;
422 | 
423 | 		return parser::memoize(19, psVal, parser::named("character class", 
424 | 			parser::sequence({
425 | 				parser::literal('['),
426 | 				[&](parser::state& ps) { psVal = ast::make_ptr<ast::range_matcher>();  return true; },
427 | 				parser::many(
428 | 					parser::sequence({
429 | 						parser::look_not(parser::literal(']')),
430 | 						parser::bind(r, characters),
431 | 						[&](parser::state& ps) { *psVal += r;  return true; }})),
432 | 				parser::literal(']'),
433 | 				_})))(ps);
434 | 	}
435 | 
436 | 	bool characters(parser::state& ps, ast::char_range & psVal) {
437 | 		char  c;
438 | 		char  f;
439 | 		char  t;
440 | 
441 | 		return parser::memoize(20, psVal, 
442 | 			parser::choice({
443 | 				
444 | 					parser::sequence({
445 | 						parser::bind(f, character),
446 | 						parser::literal('-'),
447 | 						parser::bind(t, character),
448 | 						[&](parser::state& ps) { psVal = ast::char_range(f,t);  return true; }}),
449 | 				
450 | 					parser::sequence({
451 | 						parser::bind(c, character),
452 | 						[&](parser::state& ps) { psVal = ast::char_range(c);  return true; }})}))(ps);
453 | 	}
454 | 
455 | 	bool character(parser::state& ps, char & psVal) {
456 | 		char c;
457 | 
458 | 		return parser::memoize(21, psVal, 
459 | 			parser::choice({
460 | 				
461 | 					parser::sequence({
462 | 						parser::literal('\\'),
463 | 						
464 | 							parser::choice({
465 | 								parser::literal('n', c),
466 | 								parser::literal('r', c),
467 | 								parser::literal('t', c),
468 | 								parser::literal('\'', c),
469 | 								parser::literal('\"', c),
470 | 								parser::literal('\\', c)}),
471 | 						[&](parser::state& ps) { psVal = strings::unescaped_char(c);  return true; }}),
472 | 				
473 | 					parser::sequence({
474 | 						parser::look_not(
475 | 							parser::choice({
476 | 								parser::literal('\''),
477 | 								parser::literal('\"'),
478 | 								parser::literal('\\')})),
479 | 						parser::any(psVal)})}))(ps);
480 | 	}
481 | 
482 | 	bool OUT_BEGIN(parser::state& ps) {
483 | 		return parser::memoize(22, parser::named("\"{%\"", parser::literal("{%")))(ps);
484 | 	}
485 | 
486 | 	bool OUT_END(parser::state& ps) {
487 | 		return parser::memoize(23, parser::named("\"%}\"", parser::literal("%}")))(ps);
488 | 	}
489 | 
490 | 	bool BIND(parser::state& ps) {
491 | 		return parser::memoize(24, 
492 | 			parser::sequence({
493 | 				parser::named("\':\'", parser::literal(':')),
494 | 				_}))(ps);
495 | 	}
496 | 
497 | 	bool EQUAL(parser::state& ps) {
498 | 		return parser::memoize(25, 
499 | 			parser::sequence({
500 | 				parser::named("\'=\'", parser::literal('=')),
501 | 				_}))(ps);
502 | 	}
503 | 
504 | 	bool PIPE(parser::state& ps) {
505 | 		return parser::memoize(26, 
506 | 			parser::sequence({
507 | 				parser::named("\'|\'", parser::literal('|')),
508 | 				_}))(ps);
509 | 	}
510 | 
511 | 	bool AND(parser::state& ps) {
512 | 		return parser::memoize(27, 
513 | 			parser::sequence({
514 | 				parser::named("\'&\'", parser::literal('&')),
515 | 				_}))(ps);
516 | 	}
517 | 
518 | 	bool NOT(parser::state& ps) {
519 | 		return parser::memoize(28, 
520 | 			parser::sequence({
521 | 				parser::named("\'!\'", parser::literal('!')),
522 | 				_}))(ps);
523 | 	}
524 | 
525 | 	bool OPT(parser::state& ps) {
526 | 		return parser::memoize(29, 
527 | 			parser::sequence({
528 | 				parser::named("\'?\'", parser::literal('?')),
529 | 				_}))(ps);
530 | 	}
531 | 
532 | 	bool STAR(parser::state& ps) {
533 | 		return parser::memoize(30, 
534 | 			parser::sequence({
535 | 				parser::named("\'*\'", parser::literal('*')),
536 | 				_}))(ps);
537 | 	}
538 | 
539 | 	bool PLUS(parser::state& ps) {
540 | 		return parser::memoize(31, 
541 | 			parser::sequence({
542 | 				parser::named("\'+\'", parser::literal('+')),
543 | 				_}))(ps);
544 | 	}
545 | 
546 | 	bool OPEN(parser::state& ps) {
547 | 		return parser::memoize(32, 
548 | 			parser::sequence({
549 | 				parser::named("\'(\'", parser::literal('(')),
550 | 				_}))(ps);
551 | 	}
552 | 
553 | 	bool CLOSE(parser::state& ps) {
554 | 		return parser::memoize(33, 
555 | 			parser::sequence({
556 | 				parser::named("\')\'", parser::literal(')')),
557 | 				_}))(ps);
558 | 	}
559 | 
560 | 	bool ANY(parser::state& ps) {
561 | 		return parser::memoize(34, 
562 | 			parser::sequence({
563 | 				parser::named("\'.\'", parser::literal('.')),
564 | 				_}))(ps);
565 | 	}
566 | 
567 | 	bool EMPTY(parser::state& ps) {
568 | 		return parser::memoize(35, 
569 | 			parser::sequence({
570 | 				parser::named("\';\'", parser::literal(';')),
571 | 				_}))(ps);
572 | 	}
573 | 
574 | 	bool BEGIN(parser::state& ps) {
575 | 		return parser::memoize(36, 
576 | 			parser::sequence({
577 | 				parser::named("\'<\'", parser::literal('<')),
578 | 				_}))(ps);
579 | 	}
580 | 
581 | 	bool END(parser::state& ps) {
582 | 		return parser::memoize(37, 
583 | 			parser::sequence({
584 | 				parser::named("\'>\'", parser::literal('>')),
585 | 				_}))(ps);
586 | 	}
587 | 
588 | 	bool EXPECT(parser::state& ps) {
589 | 		return parser::memoize(38, 
590 | 			parser::sequence({
591 | 				parser::named("\'@\'", parser::literal('@')),
592 | 				_}))(ps);
593 | 	}
594 | 
595 | 	bool FAIL(parser::state& ps) {
596 | 		return parser::memoize(39, 
597 | 			parser::sequence({
598 | 				parser::named("\'~\'", parser::literal('~')),
599 | 				_}))(ps);
600 | 	}
601 | 
602 | 	bool _(parser::state& ps) {
603 | 		return parser::memoize(40, parser::memoize_many(41, 
604 | 			parser::choice({
605 | 				space,
606 | 				comment})))(ps);
607 | 	}
608 | 
609 | 	bool space(parser::state& ps) {
610 | 		return parser::memoize(42, 
611 | 			parser::choice({
612 | 				parser::literal(' '),
613 | 				parser::literal('\t'),
614 | 				end_of_line}))(ps);
615 | 	}
616 | 
617 | 	bool comment(parser::state& ps) {
618 | 		return parser::memoize(43, 
619 | 			parser::sequence({
620 | 				parser::literal('#'),
621 | 				parser::memoize_many(44, 
622 | 					parser::sequence({
623 | 						parser::look_not(end_of_line),
624 | 						parser::any()})),
625 | 				end_of_line}))(ps);
626 | 	}
627 | 
628 | 	bool end_of_line(parser::state& ps) {
629 | 		return parser::memoize(45, 
630 | 			parser::choice({
631 | 				parser::literal("\r\n"),
632 | 				parser::literal('\n'),
633 | 				parser::literal('\r')}))(ps);
634 | 	}
635 | 
636 | 	bool end_of_file(parser::state& ps) {
637 | 		return parser::memoize(46, parser::named("end of input", parser::look_not(parser::any())))(ps);
638 | 	}
639 | 
640 | } // namespace egg
641 | 
642 | 


--------------------------------------------------------------------------------
/grammars/.gitignore:
--------------------------------------------------------------------------------
1 | abc
2 | anbncn
3 | calc
4 | *.hpp
5 | *.cpp
6 | *.o
7 | 


--------------------------------------------------------------------------------
/grammars/Makefile:
--------------------------------------------------------------------------------
 1 | # Copyright (c) 2013 Aaron Moss
 2 | # 
 3 | # Permission is hereby granted, free of charge, to any person obtaining a copy
 4 | # of this software and associated documentation files (the "Software"), to deal
 5 | # in the Software without restriction, including without limitation the rights
 6 | # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 7 | # copies of the Software, and to permit persons to whom the Software is
 8 | # furnished to do so, subject to the following conditions:
 9 | # 
10 | # The above copyright notice and this permission notice shall be included in
11 | # all copies or substantial portions of the Software.
12 | # 
13 | # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15 | # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
16 | # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
17 | # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
18 | # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
19 | # THE SOFTWARE.
20 | 
21 | #CXXFLAGS = -O0 -ggdb --std=c++0x
22 | CXXFLAGS = -O3 --std=c++0x
23 | 
24 | parser.hpp:  
25 | 	ln -s ../parser.hpp .
26 | 
27 | %.cpp:  %.egg
28 | 	../egg -o $@ -i $<
29 | 
30 | abc:  abc.cpp parser.hpp
31 | 	$(CXX) $(CXXFLAGS) -o abc abc.cpp $(LDFLAGS)
32 | 
33 | anbncn:  anbncn.cpp parser.hpp
34 | 	$(CXX) $(CXXFLAGS) -o anbncn anbncn.cpp $(LDFLAGS)
35 | 
36 | calc:  calc.cpp parser.hpp
37 | 	$(CXX) $(CXXFLAGS) -o calc calc.cpp $(LDFLAGS)
38 | 
39 | egg:
40 | 	cd .. && $(MAKE) egg
41 | 
42 | clean:  
43 | 	-rm abc abc.cpp 
44 | 	-rm anbncn anbncn.cpp 
45 | 	-rm calc calc.cpp
46 | 
47 | test: egg abc anbncn calc
48 | 	@echo
49 | 	./abc < tests/abc.in.txt > tests/abc.test.txt
50 | 	diff tests/abc.out.txt tests/abc.test.txt
51 | 	./anbncn < tests/anbncn.in.txt > tests/anbncn.test.txt
52 | 	diff tests/anbncn.out.txt tests/anbncn.test.txt
53 | 	./calc < tests/calc.in.txt > tests/calc.test.txt
54 | 	diff tests/calc.out.txt tests/calc.test.txt
55 | 	rm tests/*.test.txt
56 | 	@echo
57 | 	@echo TESTS PASSED
58 | 


--------------------------------------------------------------------------------
/grammars/abc.egg:
--------------------------------------------------------------------------------
 1 | # Simple example grammar.
 2 | #
 3 | # Author: Aaron Moss 
 4 | #   (adapted from Bryan Ford's "Packrat Parsing: a Practical Linear-Time Algorithm with Backtracking")
 5 | 
 6 | {%
 7 | /*
 8 |  * Copyright (c) 2013 Aaron Moss
 9 |  * 
10 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
11 |  * of this software and associated documentation files (the "Software"), to deal
12 |  * in the Software without restriction, including without limitation the rights
13 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 |  * copies of the Software, and to permit persons to whom the Software is
15 |  * furnished to do so, subject to the following conditions:
16 |  * 
17 |  * The above copyright notice and this permission notice shall be included in
18 |  * all copies or substantial portions of the Software.
19 |  * 
20 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 |  * THE SOFTWARE.
27 |  */
28 | %}
29 | 
30 | g1 = ( "ab" | 'a'* ) 'c' !.
31 | 
32 | {%
33 | #include <iostream>
34 | #include <sstream>
35 | 
36 | /**
37 |  * Test harness for abc grammar.
38 |  * @author Aaron Moss
39 |  */
40 | int main(int argc, char** argv) {
41 | 	using namespace std;
42 | 	
43 | 	string s;
44 | 	while ( getline(cin, s) ) {
45 | 		stringstream ss(s);
46 | 		parser::state ps(ss);
47 | 		
48 | 		if ( abc::g1(ps) ) {
49 | 			cout << "`" << s << "' MATCHES" << endl;
50 | 		} else {
51 | 			const parser::error& err = ps.error();
52 | 			
53 | 			cout << "`" << s << "' DOESN'T MATCH  @" << err.pos.col() << endl;
54 | 			for (auto msg : err.messages) {
55 | 				cout << "\t" << msg << endl;
56 | 			}
57 | 			for (auto exp : err.expected) {
58 | 				cout << "\tExpected " << exp << endl;
59 | 			}
60 | 		}
61 | 	}
62 | }
63 | %}
64 | 


--------------------------------------------------------------------------------
/grammars/anbncn.egg:
--------------------------------------------------------------------------------
 1 | # Matches the a^n b^n c^n (n >= 1) grammar which can't be matched by a CFG
 2 | #
 3 | # Author: Aaron Moss 
 4 | #   (adapted from Bryan Ford's "Packrat Parsing: a Practical Linear-Time Algorithm with Backtracking")
 5 | 
 6 | {%
 7 | /*
 8 |  * Copyright (c) 2013 Aaron Moss
 9 |  * 
10 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
11 |  * of this software and associated documentation files (the "Software"), to deal
12 |  * in the Software without restriction, including without limitation the rights
13 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 |  * copies of the Software, and to permit persons to whom the Software is
15 |  * furnished to do so, subject to the following conditions:
16 |  * 
17 |  * The above copyright notice and this permission notice shall be included in
18 |  * all copies or substantial portions of the Software.
19 |  * 
20 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 |  * THE SOFTWARE.
27 |  */
28 | %}
29 | 
30 | G = &(A 'c') 'a'+ B !.
31 | A = 'a' A 'b' | "ab"
32 | B = 'b' B 'c' | "bc"
33 | 
34 | {%
35 | #include <iostream>
36 | #include <sstream>
37 | 
38 | /**
39 |  * Test harness for anbncn grammar.
40 |  * @author Aaron Moss
41 |  */
42 | int main(int argc, char** argv) {
43 | 	using namespace std;
44 | 	
45 | 	string s;
46 | 	while ( getline(cin, s) ) {
47 | 		stringstream ss(s);
48 | 		parser::state ps(ss);
49 | 		
50 | 		if ( anbncn::G(ps) ) {
51 | 			cout << "`" << s << "' MATCHES" << endl;
52 | 		} else {
53 | 			const parser::error& err = ps.error();
54 | 			
55 | 			cout << "`" << s << "' DOESN'T MATCH  @" << err.pos.col() << endl;
56 | 			for (auto msg : err.messages) {
57 | 				cout << "\t" << msg << endl;
58 | 			}
59 | 			for (auto exp : err.expected) {
60 | 				cout << "\tExpected " << exp << endl;
61 | 			}
62 | 		}
63 | 	}
64 | }
65 | %}
66 | 


--------------------------------------------------------------------------------
/grammars/calc.egg:
--------------------------------------------------------------------------------
 1 | # A simple calculator program.
 2 | # Respects order of operations.
 3 | #
 4 | # Author: Aaron Moss
 5 | 
 6 | {%
 7 | /*
 8 |  * Copyright (c) 2013 Aaron Moss
 9 |  * 
10 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
11 |  * of this software and associated documentation files (the "Software"), to deal
12 |  * in the Software without restriction, including without limitation the rights
13 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14 |  * copies of the Software, and to permit persons to whom the Software is
15 |  * furnished to do so, subject to the following conditions:
16 |  * 
17 |  * The above copyright notice and this permission notice shall be included in
18 |  * all copies or substantial portions of the Software.
19 |  * 
20 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
26 |  * THE SOFTWARE.
27 |  */
28 | 
29 | #include <cstdlib>
30 | %}
31 | 
32 | expr : int = _ sum : psVal !.
33 | 
34 | sum : int = prod : psVal (
35 |             PLUS prod : i { psVal += i; }
36 |             | MINUS prod : i { psVal -= i; } )*
37 | prod : int = elem : psVal (
38 |              TIMES elem : i { psVal *= i; }
39 |              | DIVIDE elem : i { psVal /= i; } )*
40 | elem : int = OPEN sum : psVal CLOSE
41 |              | num : psVal
42 | 
43 | num : int = < [0-9]+ > : s { psVal = atoi(s.c_str()); } _
44 | 
45 | PLUS = '+' _
46 | MINUS = '-' _
47 | TIMES = '*' _
48 | DIVIDE = '/' _
49 | OPEN = '(' _
50 | CLOSE = ')' _
51 | 
52 | _ = (' ' | '\t')*
53 | 
54 | {%
55 | #include <iostream>
56 | #include <sstream>
57 | 
58 | /**
59 |  * Test harness for calculator grammar.
60 |  * @author Aaron Moss
61 |  */
62 | int main(int argc, char** argv) {
63 | 	using namespace std;
64 | 	
65 | 	string s;
66 | 	while ( getline(cin, s) ) {
67 | 		stringstream ss(s);
68 | 		parser::state ps(ss);
69 | 		int x;
70 | 		
71 | 		if ( calc::expr(ps, x) ) {
72 | 			cout << x << endl;
73 | 		} else {
74 | 			const parser::error& err = ps.error();
75 | 			
76 | 			cout << "SYNTAX ERROR @" << err.pos.col() << endl;
77 | 			for (auto msg : err.messages) {
78 | 				cout << "\t" << msg << endl;
79 | 			}
80 | 			for (auto exp : err.expected) {
81 | 				cout << "\tExpected " << exp << endl;
82 | 			}
83 | 		}
84 | 	}
85 | }
86 | %}
87 | 


--------------------------------------------------------------------------------
/grammars/tests/abc.in.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | abc
 3 | c
 4 | ac
 5 | aac
 6 | aaac
 7 | bbc
 8 | bc
 9 | x
10 | abcc
11 | abbc
12 | aabc
13 | 


--------------------------------------------------------------------------------
/grammars/tests/abc.out.txt:
--------------------------------------------------------------------------------
 1 | `' DOESN'T MATCH  @0
 2 | `abc' MATCHES
 3 | `c' MATCHES
 4 | `ac' MATCHES
 5 | `aac' MATCHES
 6 | `aaac' MATCHES
 7 | `bbc' DOESN'T MATCH  @0
 8 | `bc' DOESN'T MATCH  @0
 9 | `x' DOESN'T MATCH  @0
10 | `abcc' DOESN'T MATCH  @0
11 | `abbc' DOESN'T MATCH  @2
12 | `aabc' DOESN'T MATCH  @2
13 | 


--------------------------------------------------------------------------------
/grammars/tests/anbncn.in.txt:
--------------------------------------------------------------------------------
 1 | 
 2 | abc
 3 | aabbcc
 4 | aaabbbccc
 5 | abbcc
 6 | aabcc
 7 | aabbc
 8 | x
 9 | abcc
10 | abbc
11 | aabc
12 | 


--------------------------------------------------------------------------------
/grammars/tests/anbncn.out.txt:
--------------------------------------------------------------------------------
 1 | `' DOESN'T MATCH  @0
 2 | `abc' MATCHES
 3 | `aabbcc' MATCHES
 4 | `aaabbbccc' MATCHES
 5 | `abbcc' DOESN'T MATCH  @2
 6 | `aabcc' DOESN'T MATCH  @3
 7 | `aabbc' DOESN'T MATCH  @5
 8 | `x' DOESN'T MATCH  @0
 9 | `abcc' DOESN'T MATCH  @2
10 | `abbc' DOESN'T MATCH  @2
11 | `aabc' DOESN'T MATCH  @3
12 | 


--------------------------------------------------------------------------------
/grammars/tests/calc.in.txt:
--------------------------------------------------------------------------------
1 | 42
2 | 1+1
3 | 3-4
4 | 6*8
5 | 9/3
6 | 6*(1+3)/8
7 | 6 *  (	1 + 3 ) / 8  	 
8 | 6 * sin 90
9 | 


--------------------------------------------------------------------------------
/grammars/tests/calc.out.txt:
--------------------------------------------------------------------------------
1 | 42
2 | 2
3 | -1
4 | 48
5 | 3
6 | 3
7 | 3
8 | SYNTAX ERROR @4
9 | 


--------------------------------------------------------------------------------
/main.cpp:
--------------------------------------------------------------------------------
  1 | /*
  2 |  * Copyright (c) 2013 Aaron Moss
  3 |  * 
  4 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  5 |  * of this software and associated documentation files (the "Software"), to deal
  6 |  * in the Software without restriction, including without limitation the rights
  7 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
  8 |  * copies of the Software, and to permit persons to whom the Software is
  9 |  * furnished to do so, subject to the following conditions:
 10 |  * 
 11 |  * The above copyright notice and this permission notice shall be included in
 12 |  * all copies or substantial portions of the Software.
 13 |  * 
 14 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 15 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 16 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 17 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 18 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 19 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 20 |  * THE SOFTWARE.
 21 |  */
 22 | 
 23 | #include <algorithm>
 24 | #include <cctype>
 25 | #include <fstream>
 26 | #include <iostream>
 27 | #include <sstream>
 28 | #include <string>
 29 | 
 30 | #include "egg.hpp"
 31 | #include "parser.hpp"
 32 | #include "visitors/compiler.hpp"
 33 | #include "visitors/normalizer.hpp"
 34 | #include "visitors/printer.hpp"
 35 | 
 36 | /** Egg version */
 37 | static const char* VERSION = "0.3.2";
 38 | 
 39 | /** Egg usage string */
 40 | static const char* USAGE = 
 41 | "[-c print|compile] [-i input_file] [-o output_file]\n\
 42 |  [--dbg] [--no-norm] [--no-memo] [--quiet] [--help] [--version] [--usage]";
 43 | 
 44 | /** Full Egg help string */
 45 | static const char* HELP = 
 46 | "egg [command] [flags] [input-file [output-file]]\n\
 47 | \n\
 48 | Supported flags are\n\
 49 |  -i --input    input file (default stdin)\n\
 50 |  -o --output   output file (default stdout)\n\
 51 |  -c --command  command - either compile, print, help, usage, or version \n\
 52 |                (default compile)\n\
 53 |  -n --name     grammar name - if none given, takes the longest prefix of\n\
 54 |                the input or output file name (output preferred) which is a\n\
 55 |                valid Egg identifier (default empty)\n\
 56 |  -q --quiet    suppress warning output\n\
 57 |  --dbg         turn on debugging\n\
 58 |  --no-norm     turns off grammar normalization\n\
 59 |  --no-memo     turns of grammar memoization\n\
 60 |  --usage       print usage message\n\
 61 |  --help        print full help message\n\
 62 |  --version     print version string\n";
 63 | 
 64 | /// Command to run
 65 | enum egg_mode {
 66 | 	PRINT_MODE,    ///< Print grammar
 67 | 	COMPILE_MODE,  ///< Compile grammar
 68 | 	USAGE_MODE,    ///< Print usage
 69 | 	HELP_MODE,     ///< Print help
 70 | 	VERSION_MODE   ///< Print version
 71 | };
 72 | 
 73 | /// Type of output file
 74 | enum file_type {
 75 | 	STREAM_TYPE,  ///< Output stream (unknown filetype)
 76 | 	CPP_HEADER,   ///< C++ header file
 77 | 	CPP_SOURCE,   ///< C++ source file
 78 | 	UNKNOWN_TYPE  ///< Unable to determine
 79 | };
 80 | 
 81 | class args {
 82 | private:
 83 | 	bool eq(const char* lit, char* arg) {
 84 | 		return std::string(lit) == std::string(arg);
 85 | 	}
 86 | 
 87 | 	bool match(const char* shrt, const char* lng, char* arg) {
 88 | 		std::string a(arg);
 89 | 		return std::string(shrt) == a || std::string(lng) == a;
 90 | 	}
 91 | 
 92 | 	std::string id_prefix(char* s) {
 93 | 		int len = 0;
 94 | 		char c = s[len];
 95 | 		if ( ( c >= 'A' && c <= 'Z' )
 96 | 				|| ( c >= 'a' && c <= 'z' )
 97 | 				|| c == '_' ) {
 98 | 			c = s[++len];
 99 | 		} else {
100 | 			return std::string("");
101 | 		}
102 | 
103 | 		while ( ( c >= 'A' && c <= 'Z' )
104 | 				|| ( c >= 'a' && c <= 'z' )
105 | 				|| ( c >= '0' && c <= '9' )
106 | 				|| c == '_' ) {
107 | 			c = s[++len];
108 | 		}
109 | 
110 | 		return std::string(s, len);
111 | 	}
112 | 	
113 | 	file_type suffix_type(char* s) {
114 | 		std::string t(s);
115 | 		std::string::size_type dot = t.rfind('.');
116 | 		
117 | 		if ( dot == std::string::npos ) return UNKNOWN_TYPE;
118 | 		
119 | 		std::string ext(t, dot+1);
120 | 		for (std::string::size_type i = 0; i < ext.size(); ++i) { ext[i] = std::tolower(ext[i]); }
121 | 		
122 | 		if ( ext == "hpp" || ext == "hxx" || ext == "hh" || ext == "h" ) return CPP_HEADER;
123 | 		else if ( ext == "cpp" || ext == "cxx" || ext == "cc" || ext == "c" ) return CPP_SOURCE;
124 | 		
125 | 		return UNKNOWN_TYPE;
126 | 	}
127 | 
128 | 	bool parse_mode(char* s) {
129 | 		if ( eq("print", s) ) {
130 | 			eMode = PRINT_MODE;
131 | 			return true;
132 | 		} else if ( eq("compile", s) ) {
133 | 			eMode = COMPILE_MODE;
134 | 			return true;
135 | 		} else if ( eq("help", s) ) {
136 | 			eMode = HELP_MODE;
137 | 			return true;
138 | 		} else if ( eq("usage", s) ) {
139 | 			eMode = USAGE_MODE;
140 | 			return true;
141 | 		} else if ( eq("version", s) ) {
142 | 			eMode = VERSION_MODE;
143 | 			return true;
144 | 		} else {
145 | 			return false;
146 | 		}
147 | 	}
148 | 
149 | 	void parse_input(char* s) {
150 | 		in = new std::ifstream(s);
151 | 		inName = s;
152 | 		if ( !nameFlag && out == nullptr ) {
153 | 			pName = id_prefix(s);
154 | 		}
155 | 	}
156 | 
157 | 	void parse_output(char* s) {
158 | 		out = new std::ofstream(s);
159 | 		outName = s;
160 | 		outType = suffix_type(s);
161 | 		if ( !nameFlag ) {
162 | 			pName = id_prefix(s);
163 | 		}
164 | 	}
165 | 	
166 | 	void parse_name(char* s) {
167 | 		pName = id_prefix(s);
168 | 		nameFlag = true;
169 | 	}
170 | 
171 | public:
172 | 	args(int argc, char** argv) 
173 | 		: in(nullptr), out(nullptr), 
174 | 		  inName(), outName(), outType(STREAM_TYPE), pName(), 
175 | 		  dbgFlag(false), nameFlag(false), normFlag(true), memoFlag(true), quietFlag(false),
176 | 		  eMode(COMPILE_MODE) {
177 | 		
178 | 		i = 1;
179 | 		if ( argc <= 1 ) return;
180 | 
181 | 		//parse optional sub-command
182 | 		if ( parse_mode(argv[i]) ) { ++i; }
183 | 		
184 | 		//parse explicit flags
185 | 		for (; i < argc; ++i) {
186 | 			if ( match("-i", "--input", argv[i]) ) {
187 | 				if ( i+1 >= argc ) return;
188 | 				parse_input(argv[++i]);
189 | 			} else if ( match("-o", "--output", argv[i]) ) {
190 | 				if ( i+1 >= argc ) return;
191 | 				parse_output(argv[++i]);
192 | 			} else if ( match("-c", "--command", argv[i]) ) {
193 | 				if ( i+1 >= argc ) return;
194 | 				parse_mode(argv[++i]);
195 | 			} else if ( match("-n", "--name", argv[i]) ) {
196 | 				if ( i+1 >= argc ) return;
197 | 				parse_name(argv[++i]);
198 | 			} else if ( eq("--dbg", argv[i]) ) {
199 | 				dbgFlag = true;
200 | 			} else if ( eq("--no-norm", argv[i]) ) {
201 | 				normFlag = false;
202 | 			} else if ( eq("--no-memo", argv[i]) ) {
203 | 				memoFlag = false;
204 | 			} else if ( match("-i", "--quiet", argv[i]) ) {
205 | 				quietFlag = true;
206 | 			} else if ( eq("--usage", argv[i]) ) {
207 | 				eMode = USAGE_MODE;
208 | 			} else if ( eq("--help", argv[i]) ) {
209 | 				eMode = HELP_MODE;
210 | 			} else if ( eq("--version", argv[i]) ) {
211 | 				eMode = VERSION_MODE;
212 | 			} else break;
213 | 		}
214 | 
215 | 		//parse optional input, source, and output files
216 | 		if ( i < argc && in == nullptr )  parse_input(argv[i++]);
217 | 		if ( i < argc && out == nullptr ) parse_output(argv[i++]);
218 | 	}
219 | 
220 | 	~args() {
221 | 		if ( in != nullptr ) { in->close(); delete in; }
222 | 		if ( out != nullptr ) { out->close(); delete out; }
223 | 	}
224 | 
225 | 	std::istream& input() { if ( in ) return *in; else return std::cin; }
226 | 	std::ostream& output() { if ( out ) return *out; else return std::cout; }
227 | 	std::string inputFile() { return in ? inName : "<STDIN>"; }
228 | 	std::string outputFile() { return out ? outName : "<STDOUT>"; }
229 | 	file_type outputType() { return outType; }
230 | 	std::string name() { return pName; }
231 | 	bool dbg()  { return dbgFlag; }
232 | 	bool norm() { return normFlag; }
233 | 	bool memo() { return memoFlag; }
234 | 	bool quiet() { return quietFlag; }
235 | 	egg_mode mode() { return eMode; }
236 | 
237 | private:
238 | 	int i;				  ///< next unparsed value
239 | 	std::ifstream* in;	  ///< pointer to input stream (0 for stdin)
240 | 	std::ofstream* out;	  ///< pointer to output stream (0 for stdout)
241 | 	std::string inName;   ///< Name of the input file (empty if none)
242 | 	std::string outName;  ///< Name of the output file (empty if none)
243 | 	file_type outType;    ///< Type of output type (default STREAM_TYPE)
244 | 	std::string pName; 	  ///< the name of the parser (empty if none)
245 | 	bool dbgFlag;         ///< should egg print debugging information?
246 | 	bool nameFlag;		  ///< has the parser name been explicitly set?
247 | 	bool normFlag;        ///< should egg do grammar normalization?
248 | 	bool memoFlag;        ///< should the generated grammar do memoization?
249 | 	bool quietFlag;       ///< should warnings be suppressed?
250 | 	egg_mode eMode;		  ///< compiler mode to use
251 | };
252 | 
253 | /** Command line interface
254 |  *  egg [command] [flags] [input-file [output-file]]
255 |  */
256 | int main(int argc, char** argv) {
257 | 
258 | 	args a(argc, argv);
259 | 	
260 | 	switch ( a.mode() ) {
261 | 	case USAGE_MODE:
262 | 		std::cout << argv[0] << " " << USAGE << std::endl;
263 | 		return 0;
264 | 	case HELP_MODE:
265 | 		std::cout << HELP << std::endl;
266 | 		return 0;
267 | 	case VERSION_MODE:
268 | 		std::cout << "Egg version " << VERSION << std::endl;
269 | 		return 0;
270 | 	default: break;
271 | 	}
272 | 	
273 | 	parser::state ps(a.input());
274 | 	ast::grammar_ptr g;
275 | 	
276 | 	if ( egg::grammar(ps, g) ) {
277 | 		if ( a.dbg() ) { std::cout << "DONE PARSING" << std::endl; }
278 | 		if ( a.norm() ) {
279 | 			visitor::normalizer n;
280 | 			n.normalize(*g);
281 | 		}
282 | 
283 | 		switch ( a.mode() ) {
284 | 		case PRINT_MODE: {      // Pretty-print grammar
285 | 			visitor::printer p(a.output());
286 | 			p.print(*g);
287 | 			break;
288 | 		} case COMPILE_MODE: {  // Compile grammar
289 | 			visitor::compiler c(a.name(), a.output(), (a.outputType() != CPP_SOURCE));
290 | 			c.memo(a.memo());
291 | 			auto warnings = c.compile(*g);
292 | 			if ( ! a.quiet() ) for ( auto&& warning : warnings ) {
293 | 				std::cerr << "WARNING: " << warning << std::endl;
294 | 			}
295 | 			break;
296 | 		} default: break;
297 | 		}
298 | 		
299 | 	} else {
300 | 		const parser::error& err = ps.error();
301 | 			
302 | 		std::cerr << "PARSE FAILURE @" << err.pos.line() << ":" << err.pos.col() << std::endl;
303 | 		for (auto msg : err.messages) {
304 | 			std::cerr << "\t" << msg << std::endl;
305 | 		}
306 | 		for (auto exp : err.expected) {
307 | 			std::cerr << "\tExpected " << exp << std::endl;
308 | 		}
309 | 	}
310 | 
311 | 	return 0;
312 | } /* main() */
313 | 
314 | 


--------------------------------------------------------------------------------
/parser.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /*
  4 |  * Copyright (c) 2013 Aaron Moss
  5 |  * 
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to deal
  8 |  * in the Software without restriction, including without limitation the rights
  9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  * 
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  * 
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22 |  * THE SOFTWARE.
 23 |  */
 24 | 
 25 | #include <deque>
 26 | #include <functional>
 27 | #include <initializer_list>
 28 | #include <istream>
 29 | #include <set>
 30 | #include <sstream>
 31 | #include <stdexcept>
 32 | #include <string>
 33 | #include <typeinfo>
 34 | #include <unordered_map>
 35 | #include <utility>
 36 | 
 37 | /** Implements parser state for an Egg parser.
 38 |  *  
 39 |  *  @author Aaron Moss
 40 |  */
 41 | 
 42 | namespace parser {
 43 | 	
 44 | 	typedef unsigned long ind;  /**< unsigned index type */
 45 | 	
 46 | 	/** Human-readable position type */
 47 | 	struct posn {
 48 | 	friend class state;
 49 | 	private:
 50 | 		// constructor only available to state; discourages messing with posn
 51 | 		posn(ind index, ind line, ind col) : i(index), ln(line), cl(col) {}
 52 | 	public:
 53 | 		posn() : i(0), ln(0), cl(0) {}
 54 | 		
 55 | 		/** Checks by index if one position precedes another */
 56 | 		bool operator < (const posn& o) const { return i < o.i; }
 57 | 		
 58 | 		bool operator <= (const posn& o) const { return i <= o.i; }
 59 | 		bool operator > (const posn& o) const { return i > o.i; }
 60 | 		bool operator >= (const posn& o) const { return i >= o.i; }
 61 | 		bool operator == (const posn& o) const { return i == o.i; }
 62 | 		bool operator != (const posn& o) const { return i != o.i; }
 63 | 		
 64 | 		/** @return the number of positions by which another position precedes 
 65 | 		 *          this position; 0 for this before that.
 66 | 		 */
 67 | 		ind operator - (const posn& o) const {
 68 | 			return ( i < o.i ) ? 0 : i - o.i;
 69 | 		}
 70 | 		
 71 | 		/** @return the character index */
 72 | 		ind index() const { return i; }
 73 | 		/** @return the line in the file */
 74 | 		ind line() const { return ln; }
 75 | 		/** @return the column in the file */
 76 | 		ind col() const { return cl; }
 77 | 		
 78 | 	private:
 79 | 		ind i;   /**< input index */
 80 | 		ind ln;  /**< line number */
 81 | 		ind cl;  /**< column number */
 82 | 	};
 83 | 	
 84 | 	/** Represents a parsing error.
 85 | 	 *  Provides details about position and error. 
 86 | 	 */
 87 | 	struct error {
 88 | 		error(const posn& p) : pos(p) {}
 89 | 		error() : pos() {}
 90 | 		
 91 | 		/** Merges two errors.
 92 | 		 *  Uses Bryan Ford's heuristic of "furthest forward error information".
 93 | 		 */
 94 | 		error& operator |= (const error& o) {
 95 | 			if ( pos > o.pos /* || o.empty() */ ) return *this;
 96 | 			if ( pos < o.pos /* || empty() */ ) return *this = o;
 97 | 			
 98 | 			expected.insert(o.expected.begin(), o.expected.end());
 99 | 			messages.insert(o.messages.begin(), o.messages.end());
100 | 			return *this;
101 | 		}
102 | 		
103 | 		/** Adds an "expected" message */
104 | 		inline error& expect(const std::string& s) { expected./*emplace*/insert(s); return *this; }
105 | 		
106 | 		/** Adds a programmer-defined error message */
107 | 		inline error& message(const std::string& s) { messages./*emplace*/insert(s); return *this; }
108 | 		
109 | 		/** Tests both sets of messages for emptiness */
110 | 		inline bool empty() const { return expected.empty() && messages.empty(); }
111 | 		
112 | 		posn pos;                        /**< The position of the error */
113 | 		std::set<std::string> expected;  /**< Constructs expected here */
114 | 		std::set<std::string> messages;  /**< Error messages */
115 | 	}; // struct error
116 | 	
117 | 	/** Error thrown when a parser is asked for state it has forgotten. */
118 | 	struct forgotten_state_error : public std::range_error {
119 | 		
120 | 		/** Default constructor.
121 | 		 *  @param req		Requested index
122 | 		 *  @param avail	Minimum available index
123 | 		 */
124 | 		forgotten_state_error(const posn& req, const posn& avail) throw() 
125 | 			: std::range_error("Forgotten state error"), 
126 | 			req(req), avail(avail) {}
127 | 		
128 | 		/** Inherited from std::exception. */
129 | 		const char* what() const throw() {
130 | 			try {
131 | 				std::stringstream ss("Forgotten state error");
132 | 				ss << ": requested " << req.line() << ":" << req.col() 
133 | 				   << " < " << avail.line() << ":" << avail.col();
134 | 				return ss.str().c_str();
135 | 			} catch (std::exception const& e) {
136 | 				return "Forgotten state error";
137 | 			}
138 | 		}
139 | 		
140 | 		/** requested index */
141 | 		posn req;
142 | 		/** minimum available index */
143 | 		posn avail;
144 | 	}; /* struct forgotten_range_error */
145 | 	
146 | 	/** Memoization table entry */
147 | 	struct memo {
148 | 		/** Typesafe dynamic type */
149 | 		class any {
150 | 		private:
151 | 			/** Untyped container class */
152 | 			struct dyn {
153 | 				virtual ~dyn() {}
154 | 	
155 | 				/** Gets the type of the held object */
156 | 				virtual const std::type_info& type() const = 0;
157 | 	
158 | 				/** Copies the held object */
159 | 				virtual dyn* clone() const = 0;
160 | 			};  // struct dyn
161 | 
162 | 			/** Typed container class */
163 | 			template <typename T>
164 | 			struct of : public dyn {
165 | 				/** Value constructor */
166 | 				of (const T& v) : v(v) {}
167 | 	
168 | 				/** Gets the type of the held object */
169 | 				virtual const std::type_info& type() const { return typeid(T); }
170 | 	
171 | 				/** Returns a copy of the held object */
172 | 				virtual dyn* clone() const { return new of<T>(v); }
173 | 	
174 | 				/** The held value */	
175 | 				const T v;
176 | 			};  // struct of<T>
177 | 		
178 | 		public:
179 | 			/** Empty constructor */
180 | 			any() : p(nullptr) {}
181 | 
182 | 			/** Typed constructor */
183 | 			template <typename T>
184 | 			any(const T& t) : p(new of<T>(t)) {}
185 | 
186 | 			/** Copy constructor */
187 | 			any(const any& o) : p(o.p ? o.p->clone() : nullptr) {}
188 | 
189 | 			/** Assignment operator */
190 | 			any& operator = (const any& o) {
191 | 				if ( &o != this ) {
192 | 					delete p;
193 | 					p = o.p ? o.p->clone() : nullptr;
194 | 				}
195 | 				return *this;
196 | 			}
197 | 
198 | 			/** Typed assignment operator */
199 | 			template <typename T>
200 | 			any& operator = (const T& v) {
201 | 				delete p;
202 | 				p = new of<T>(v);
203 | 				return *this;
204 | 			}
205 | 
206 | 			/** Contained type */
207 | 			const std::type_info& type() const {
208 | 				return p ? p->type() : typeid(void);
209 | 			}
210 | 
211 | 			/** Bind value */
212 | 			template <typename T>
213 | 			void bind(T& v) {
214 | 				if ( type() == typeid(T) ) {
215 | 					v = static_cast<of<T>*>(p)->v;
216 | 				}
217 | 			}
218 | 
219 | 			~any() { delete p; }
220 | 
221 | 		private:
222 | 			dyn* p;
223 | 		};  // class memo::any
224 | 		
225 | 		/** Default constructor - sets up a failed match */
226 | 		memo() : success(false) {}
227 | 	
228 | 		/** Success constructor */
229 | 		template <typename T>
230 | 		memo(const posn& end, const T& result) : success(true), end(end), result(result) {}
231 | 	
232 | 		bool success;  ///< Did the parser match?
233 | 		posn end;      ///< Endpoint in case of a match
234 | 		any result;    ///< Result object (if any)
235 | 	};
236 | 	
237 | 	/** Parser state */
238 | 	class state {
239 | 	public:
240 | 		typedef char                              value_type;
241 | 		typedef std::basic_string<value_type>     string_type;
242 | 		typedef std::basic_istream<value_type>    stream_type;
243 | 		typedef std::deque<value_type>::iterator  iterator;
244 | 		typedef std::pair<iterator, iterator>     range_type;
245 | 	
246 | 	private:
247 | 		/** Read a single character into the parser.
248 | 		 *  @return was a character read?
249 | 		 */
250 | 		bool read() {
251 | 			int c = in.get();
252 | 			
253 | 			// Check EOF
254 | 			if ( c == std::char_traits<value_type>::eof() ) return false;
255 | 			
256 | 			// Check newline
257 | 			if ( c == '\n' ) lines.push_back(off.i + str.size() + 1);
258 | 			
259 | 			// Add to stored input
260 | 			str.push_back(c);
261 | 			return true;
262 | 		}
263 | 		
264 | 		/** Read more characters into the parser.
265 | 		 *  @param n		The number of characters to read
266 | 		 *  @return The number of characters read
267 | 		 */
268 | 		ind read(ind n) {
269 | 			value_type s[n];
270 | 			// Read into buffer
271 | 			in.read(s, n);
272 | 			// Count read characters
273 | 			ind r = in.gcount();
274 | 			// Track newlines
275 | 			ind i_max = off.i + str.size();
276 | 			for (ind i = 0; i < r; ++i) {
277 | 				if ( s[i] == '\n' ) { lines.push_back(i_max + i + 1); }
278 | 			}
279 | 			// Add to stored input
280 | 			str.insert(str.end(), s, s+r);
281 | 			return r;
282 | 		}
283 | 		
284 | 	public:
285 | 		/** Default constructor.
286 | 		 *  Initializes state at beginning of input stream.
287 | 		 *  @param in		The input stream to read from
288 | 		 */
289 | 		state(stream_type& in) : pos(), off(), str(), lines(), memo_table(), err(), in(in) {
290 | 			// first line starts at 0
291 | 			lines.push_back(0);
292 | 			// read first character
293 | 			read();
294 | 		}
295 | 		
296 | 		/** Reads at the cursor.
297 | 		 *  @return The character at the current position, or '\0' for end of 
298 | 		 *          stream.
299 | 		 */
300 | 		value_type operator() () const {
301 | 			ind i = pos.i - off.i;
302 | 			if ( i >= str.size() ) return '\0';
303 | 			return str[i];
304 | 		}
305 | 		
306 | 		/** Reads at the given position.
307 | 		 *  @param p    The position to read at (should have been previously seen)
308 | 		 *  @return the character at the given position, or '\0' for end of 
309 | 		 *          stream.
310 | 		 */
311 | 		value_type operator() (const posn& p) {
312 | 			if ( p < off ) throw forgotten_state_error(p, off);
313 | 			
314 | 			ind i = p.i - off.i;
315 | 			if ( i >= str.size() ) return '\0';
316 | 			return str[i];
317 | 		}
318 | 		
319 | 		/** @return the current position */
320 | 		struct posn posn() const { return pos; }
321 | 		
322 | 		/** @return the current offset in the stream */
323 | 		struct posn offset() const { return off; }
324 | 		
325 | 		/** Sets the cursor.
326 | 		 *  @param p    The position to set (should have previously been seen)
327 | 		 *  @throws forgotten_state_error on p < off (that is, moving to 
328 | 		 *  		position previously discarded)
329 | 		 */
330 | 		void set_posn(const struct posn& p) {
331 | 			// Fail on forgotten index
332 | 			if ( p < off ) throw forgotten_state_error(p, off);
333 | 			
334 | 			pos = p;
335 | 		}
336 | 		
337 | 		/** Advances position one step. 
338 | 		 *  Will not advance past end-of-stream.
339 | 		 */
340 | 		state& operator ++ () {
341 | 			ind i = pos.i - off.i;
342 | 			
343 | 			// ignore if already end of stream
344 | 			if ( i >= str.size() ) return *this;
345 | 			
346 | 			// update index
347 | 			++pos.i;
348 | 			
349 | 			// read more input if neccessary, terminating on end-of-stream
350 | 			if ( ++i == str.size() && !read() ) { ++pos.cl; return *this; }
351 | 			
352 | 			// update row and column
353 | 			ind j = pos.ln - off.ln;
354 | 			if ( j == lines.size() - 1 || pos.i < lines[j+1] ) {
355 | 				++pos.cl;
356 | 			} else {
357 | 				++pos.ln;
358 | 				pos.cl = 0;
359 | 			}
360 | 			
361 | 			return *this;
362 | 		}
363 | 		
364 | 		/** Advances position n steps.
365 | 		 *  Will not advance past end-of-stream
366 | 		 */
367 | 		state& operator += (ind n) {
368 | 			ind i = pos.i - off.i;
369 | 			
370 | 			// check if we need to read more input
371 | 			if ( i + n >= str.size() ) {
372 | 				// ignore if already end of stream
373 | 				if ( i >= str.size() ) return *this;
374 | 				
375 | 				// read extra
376 | 				ind nn = i + n + 1 - str.size();
377 | 				ind r = read(nn);
378 | 				
379 | 				// Check read to end of stream, update n to be there
380 | 				if ( r < nn ) n = str.size() - i;
381 | 			}
382 | 			
383 | 			// update position
384 | 			pos.i += n;
385 | 			for (ind j = pos.ln - off.ln + 1; j < lines.size(); ++j) {
386 | 				if ( pos.i >= lines[j] ) { ++pos.ln; } else break;
387 | 			}
388 | 			pos.cl = pos.i - lines[pos.ln - off.ln];
389 | 			
390 | 			return *this;
391 | 		}
392 | 				
393 | 		/** Range operator.
394 | 		 *  Returns a pair of iterators, begin and end, containing up to the 
395 | 		 *  given number of elements, starting at the given position. Returned 
396 | 		 *  begin and end iterators may be invalidated by calls to any 
397 | 		 *  non-const method of this class.
398 | 		 *  @param p		The beginning of the range
399 | 		 *  @param n		The maximum number of elements in the range
400 | 		 *  @throws forgotten_state_error on p < off (that is, asking for input 
401 | 		 *  		previously discarded)
402 | 		 */
403 | 		range_type range(const struct posn& p, ind n) {
404 | 			// Fail on forgotten index
405 | 			if ( p < off ) throw forgotten_state_error(p, off);
406 | 			
407 | 			// Get index into stored input
408 | 			ind ib = p.i - off.i;
409 | 			ind ie = ib + n;
410 | 			
411 | 			// Expand stored input if needed
412 | 			if ( ie > str.size() ) {
413 | 				ind nn = ie - str.size();
414 | 				read(nn);
415 | 			}
416 | 			
417 | 			// Get iterators, adjusting for the end of the input
418 | 			iterator bIter, eIter;
419 | 			
420 | 			if ( ie >= str.size() ) {
421 | 				eIter = str.end();
422 | 				if ( ib >= str.size() ) {
423 | 					bIter = str.end();
424 | 				} else {
425 | 					bIter = str.begin() + ib;
426 | 				}
427 | 			} else {
428 | 				bIter = str.begin() + ib;
429 | 				eIter = str.begin() + ie;
430 | 			}
431 | 			
432 | 			return range_type(bIter, eIter);
433 | 		}
434 | 		
435 | 		/** Substring operator.
436 | 		 *  Convenience for the string formed by the characters in range(p, n).
437 | 		 *  @param p		The beginning of the range
438 | 		 *  @param n		The maximum number of elements in the range
439 | 		 *  @throws forgotten_state_error on i < off (that is, asking for input 
440 | 		 *  		previously discarded)
441 | 		 */
442 | 		string_type string(const struct posn& p, ind n) {
443 | 			range_type iters = range(p, n);
444 | 			return string_type(iters.first, iters.second);
445 | 		}
446 | 		
447 | 		/** Gets memoization table entry at the current position.
448 | 		 *  @param id    ID of the type to get the memoization entry for
449 | 		 *  @param m     Output parameter for memoization entry, if found
450 | 		 *  @return Was there a memoization entry?
451 | 		 */
452 | 		bool memo(ind id, struct memo& m) {
453 | 			// Get table iterator
454 | 			ind i = pos.i - off.i;
455 | 			if ( i >= memo_table.size() ) return false;
456 | 			auto& tab = memo_table[i];
457 | 			auto it = tab.find(id);
458 | 			
459 | 			// Break if nothing set
460 | 			if ( it == tab.end() ) return false;
461 | 			
462 | 			// Set output parameter
463 | 			m = it->second;
464 | 			return true;
465 | 		}
466 | 		
467 | 		/** Sets memoization table entry.
468 | 		 *  @param p     Position to set the memo table entry for; will silently ignore if position 
469 | 		 *               has been forgotten
470 | 		 *  @param id    ID of the type to set the memoization entry for
471 | 		 *  @param m     Memoization entry to set
472 | 		 *  @return Was a memoization table entry set?
473 | 		 */
474 | 		bool set_memo(const struct posn& p, ind id, const struct memo& m) {
475 | 			// ignore forgotten position
476 | 			if ( p < off ) return false;
477 | 			
478 | 			// ensure table initialized
479 | 			ind i = p.i - off.i;
480 | 			for (ind ii = memo_table.size(); ii <= i; ++ii) memo_table.emplace_back();
481 | 			
482 | 			// set table entry
483 | 			memo_table[i][id] = m;
484 | 			return true;
485 | 		}
486 | 		
487 | 		/** Get the parser's internal error object */
488 | 		const struct error& error() const { return err; }
489 | 		
490 | 		/** Adds an "expected" message at the current position */
491 | 		void expect(const std::string& s) {
492 | 			struct error e; e.pos = pos; e.expect(s);
493 | 			err |= e;
494 | 		}
495 | 		
496 | 		/** Adds a programmer-defined error message at the current position */
497 | 		void message(const std::string& s) {
498 | 			struct error e; e.pos = pos; e.message(s);
499 | 			err |= e;
500 | 		}
501 | 		
502 | 		/** Adds an unexplained error at the current position */
503 | 		void fail() {
504 | 			struct error e; e.pos = pos;
505 | 			err |= e;
506 | 		}
507 | 		
508 | 		/** Attempts to match a character at the current position */
509 | 		bool matches(value_type c) {
510 | 			if ( (*this)() != c ) return false;
511 | 			++(*this);
512 | 			return true;
513 | 		}
514 | 		
515 | 		/** Attempts to match a string at the current position */
516 | 		bool matches(const string_type& s) {
517 | 			if ( string(pos, s.size()) != s ) return false;
518 | 			(*this) += s.size();
519 | 			return true;
520 | 		}
521 | 		
522 | 		/** Attempts to match any character at the current position
523 | 		 *  @param psVal    The character matched, if any
524 | 		 */
525 | 		bool matches_any(value_type& psVal) {
526 | 			value_type c = (*this)();
527 | 			if ( c == '\0' ) return false;
528 | 			psVal = c;
529 | 			++(*this);
530 | 			return true;
531 | 		}
532 | 		
533 | 		/** Attempts to match any character at the current position */
534 | 		bool matches_any() {
535 | 			if ( (*this)() == '\0' ) return false;
536 | 			++(*this);
537 | 			return true;
538 | 		}
539 | 		
540 | 		/** Attempts to match a character in the given range at the current 
541 | 		 *  position.
542 | 		 *  @param s        The start of the range
543 | 		 *  @param e        The end of the range
544 | 		 *  @param psVal    The character matched, if any
545 | 		 */
546 | 		bool matches_in(value_type s, value_type e, value_type& psVal) {
547 | 			value_type c = (*this)();
548 | 			if ( c < s || c > e ) return false;
549 | 			psVal = c;
550 | 			++(*this);
551 | 			return true;
552 | 		}
553 | 		
554 | 		/** Attempts to match a character in the given range at the current 
555 | 		 *  position.
556 | 		 *  @param s        The start of the range
557 | 		 *  @param e        The end of the range
558 | 		 */
559 | 		bool matches_in(value_type s, value_type e) {
560 | 			value_type c = (*this)();
561 | 			if ( c < s || c > e ) return false;
562 | 			++(*this);
563 | 			return true;
564 | 		}
565 | 	private:
566 | 		/** Current parsing location */
567 | 		struct posn pos;
568 | 		/** Offset of start of str from the beginning of the stream */
569 | 		struct posn off;
570 | 		/** Characters currently in use by the parser */
571 | 		std::deque<value_type> str;
572 | 		/** Beginning indices of each line, starting from off.line */
573 | 		std::deque<ind> lines;
574 | 		/** Memoization tables for each stored input index */
575 | 		std::deque<std::unordered_map<ind, struct memo>> memo_table;
576 | 		/** Set of most recent parsing errors */
577 | 		struct error err;
578 | 		/** Input stream to read characters from */
579 | 		stream_type& in;
580 | 	}; /* class state */
581 | 	
582 | 	/** Parser combinator type */
583 | 	using combinator = std::function<bool(state&)>;
584 | 	/** List of parser combinators */
585 | 	using combinator_list = std::initializer_list<combinator>;
586 | 	/** Typed nonterminal type */
587 | 	template <typename T>
588 | 	using nonterminal = bool (*)(state&,T&);
589 | 	
590 | 	/** Character literal parser */
591 | 	combinator literal(state::value_type c) {
592 | //		return [c](state& ps) { return ps.matches(c); };
593 | 		return [c](state& ps) {
594 | 			if ( ps.matches(c) ) { return true; }
595 | 			
596 | 			ps.fail();
597 | 			return false;
598 | 		};
599 | 	}
600 | 	
601 | 	/** Character literal parser 
602 | 	 *  @param psVal    Will be bound to character matched
603 | 	 */
604 | 	combinator literal(state::value_type c, state::value_type& psVal) {
605 | 		return [c,&psVal](state& ps) {
606 | 			if ( ps.matches(c) ) { psVal = c; return true; }
607 | //			else return false;
608 | 			
609 | 			ps.fail();
610 | 			return false;
611 | 		};
612 | 	}
613 | 	
614 | 	/** String literal parser */
615 | 	combinator literal(const state::string_type& s) {
616 | //		return [&s](state& ps) { return ps.matches(s); };
617 | 		return [&s](state& ps) {
618 | 			if ( ps.matches(s) ) { return true; }
619 | 			
620 | 			ps.fail();
621 | 			return false;
622 | 		};
623 | 	}
624 | 	
625 | 	/** Any character parser */
626 | 	combinator any() {
627 | //		return [](state& ps) { return ps.matches_any(); };
628 | 		return [](state& ps) {
629 | 			if ( ps.matches_any() ) { return true; }
630 | 			ps.fail();
631 | 			return false;
632 | 		};
633 | 	}
634 | 	
635 | 	/** Any character parser
636 | 	 *  @param psVal    Will be bound to the character matched
637 | 	 */
638 | 	combinator any(state::value_type& psVal) {
639 | //		return [&psVal](state& ps) { return ps.matches_any(psVal); };
640 | 		return [&psVal](state& ps) {
641 | 			if ( ps.matches_any(psVal) ) { return true; }
642 | 			
643 | 			ps.fail();
644 | 			return false;
645 | 		};
646 | 	}
647 | 	
648 | 	/** Character range parser parser */
649 | 	combinator between(state::value_type s, state::value_type e) {
650 | //		return [s,e](state& ps) { return ps.matches_in(s, e); };
651 | 		return [s,e](state& ps) {
652 | 			if ( ps.matches_in(s, e) ) { return true; }
653 | 			
654 | 			ps.fail();
655 | 			return false;
656 | 		};
657 | 	}
658 | 	
659 | 	/** Character range parser
660 | 	 *  @param psVal    Will be bound to the character matched
661 | 	 */
662 | 	combinator between(state::value_type s, state::value_type e, state::value_type& psVal) {
663 | //		return [s,e,&psVal](state& ps) { return ps.matches_in(s, e, psVal); };
664 | 		return [s,e,&psVal](state& ps) {
665 | 			if ( ps.matches_in(s, e, psVal) ) { return true; }
666 | 			
667 | 			ps.fail();
668 | 			return false;
669 | 		};
670 | 	}
671 | 	
672 | 	/** Matches all or none of a sequence of parsers */
673 | 	combinator sequence(combinator_list fs) {
674 | 		return [fs](state& ps) {
675 | 			posn psStart = ps.posn();
676 | 			for (auto f : fs) {
677 | 				if ( ! f(ps) ) { ps.set_posn(psStart); return false; }
678 | 			}
679 | 			return true;
680 | 		};
681 | 	}
682 | 	
683 | 	/** Matches one of a set of alternate parsers */
684 | 	combinator choice(combinator_list fs) {
685 | 		return [fs](state& ps) {
686 | 			for (auto f : fs) {
687 | 				if ( f(ps) ) return true;
688 | 			}
689 | 			return false;
690 | 		};
691 | 	}
692 | 	
693 | 	/** Matches a parser any number of times */
694 | 	combinator many(const combinator& f) {
695 | 		return [&f](state& ps) {
696 | 			while ( f(ps) )
697 | 				;
698 | 			return true;
699 | 		};
700 | 	}
701 | 	
702 | 	/** Matches a parser some positive number of times */
703 | 	combinator some(const combinator& f) {
704 | 		return [&f](state& ps) {
705 | 			if ( ! f(ps) ) return false;
706 | 			while ( f(ps) )
707 | 				;
708 | 			return true;
709 | 		};
710 | 	}
711 | 	
712 | 	/** Optionally matches a parser */
713 | 	combinator option(const combinator& f) {
714 | 		return [&f](state& ps) {
715 | 			f(ps);
716 | 			return true;
717 | 		};
718 | 	}
719 | 	
720 | 	/** Looks ahead to match a parser without consuming input */
721 | 	combinator look(const combinator& f) {
722 | 		return [&f](state& ps) {
723 | 			posn psStart = ps.posn();
724 | 			if ( f(ps) ) { ps.set_posn(psStart); return true; }
725 | 			return false;
726 | 		};
727 | 	}
728 | 	
729 | 	/** Looks ahead to not match a parser without consuming input */
730 | 	combinator look_not(const combinator& f) {
731 | 		return [&f](state& ps) {
732 | 			posn psStart = ps.posn();
733 | 			if ( f(ps) ) { ps.set_posn(psStart); return false; }
734 | 			return true;
735 | 		};
736 | 	}
737 | 	
738 | 	/** Binds a variable to a non-terminal */
739 | 	template <typename T>
740 | 	combinator bind(T& psVal, nonterminal<T> f) {
741 | 		return [&psVal,f](state& ps) { return f(ps, psVal); };
742 | 	}
743 | 	
744 | 	/** Binds a throwaway variable to a non-terminal */
745 | 	template <typename T>
746 | 	combinator unbind(nonterminal<T> f) {
747 | 		return [f](state& ps) { T _; return f(ps, _); };
748 | 	}
749 | 	
750 | 	/** Memoizes a combinator with the given memoization ID */
751 | 	combinator memoize(ind id, const combinator& f) {
752 | 		return [id,&f](state& ps) {
753 | 			memo m;
754 | 			if ( ps.memo(id, m) ) {
755 | 				if ( m.success ) ps.set_posn(m.end);
756 | 			} else {
757 | 				posn psStart = ps.posn();
758 | 				m.success = f(ps);
759 | 				m.end = ps.posn();
760 | 				ps.set_memo(psStart, id, m);
761 | 			}
762 | 			return m.success;
763 | 		};
764 | 	}
765 | 	
766 | 	/** Memoizes and binds a combinator with the given memoization ID */
767 | 	template <typename T>
768 | 	combinator memoize(ind id, T& psVal, const combinator& f) {
769 | 		return [id,&psVal,f](state& ps) {
770 | 			memo m;
771 | 			if ( ps.memo(id, m) ) {
772 | 				if ( m.success ) {
773 | 					m.result.bind(psVal);
774 | 					ps.set_posn(m.end);
775 | 				}
776 | 			} else {
777 | 				posn psStart = ps.posn();
778 | 				m.success = f(ps);
779 | 				m.end = ps.posn();
780 | 				if ( m.success ) m.result = psVal;
781 | 				ps.set_memo(psStart, id, m);
782 | 			}
783 | 			return m.success;
784 | 		};
785 | 	}
786 | 	
787 | 	namespace {
788 | 		/** Helper function for memoizing repetition */
789 | 		memo many_memoized(ind id, const combinator& f, state& ps) {
790 | 			memo m;
791 | 			if ( ps.memo(id, m) ) {
792 | 				if ( m.end > ps.posn() ) ps.set_posn(m.end);
793 | 			} else {
794 | 				posn psStart = ps.posn();
795 | 				if ( f(ps) ) {
796 | 					m = many_memoized(id, f, ps);
797 | 				}
798 | 				m.success = true;
799 | 				m.end = ps.posn();
800 | 				ps.set_memo(psStart, id, m);
801 | 			}
802 | 			return m;
803 | 		}
804 | 	} /* anonymous namespace */
805 | 	
806 | 	/** Memoizes a many-matcher  */
807 | 	combinator memoize_many(ind id, const combinator& f) {
808 | 		return [id,&f](state& ps) {
809 | 			many_memoized(id, f, ps);
810 | 			return true;
811 | 		};
812 | 	}
813 | 	
814 | 	/** Memoizes a some-matcher */
815 | 	combinator memoize_some(ind id, const combinator& f) {
816 | 		return [id,&f](state& ps) {
817 | 			posn psStart = ps.posn();
818 | 			many_memoized(id, f, ps);
819 | 			return ( ps.posn() > psStart );
820 | 		};
821 | 	}
822 | 	
823 | 	/** Captures a string */
824 | 	combinator capture(std::string& s, const combinator& f) {
825 | 		return [&s,&f](state& ps) {
826 | 			posn psStart = ps.posn();
827 | 			if ( ! f(ps) ) return false;
828 | 			s = ps.string(psStart, ps.posn() - psStart);
829 | 			return true;
830 | 		};
831 | 	}
832 | 	
833 | 	/** Empty parser; always matches */
834 | 	combinator empty() {
835 | 		return [](state&) { return true; };
836 | 	}
837 | 	
838 | 	/** Failure parser; inserts message */
839 | 	combinator fail(const std::string& s) {
840 | 		return [&s](state& ps) { ps.message(s); return false; };
841 | 	}
842 | 	
843 | 	/** Names a parser for better error messages */
844 | 	combinator named(const std::string& s, const combinator& f) {
845 | 		return [&s,&f](state& ps) {
846 | 			if ( f(ps) ) return true;
847 | 			
848 | 			ps.expect(s);
849 | 			return false;
850 | 		};
851 | 	}
852 | 	
853 | } /* namespace parser */
854 | 
855 | 


--------------------------------------------------------------------------------
/utils/strings.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /*
  4 |  * Copyright (c) 2013 Aaron Moss
  5 |  * 
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to deal
  8 |  * in the Software without restriction, including without limitation the rights
  9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  * 
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  * 
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22 |  * THE SOFTWARE.
 23 |  */
 24 | 
 25 | #include <sstream>
 26 | #include <string>
 27 | 
 28 | namespace strings {
 29 | 	using std::string;
 30 | 	using std::stringstream;
 31 | 
 32 | 	/** Returns a string representing the given character with all special 
 33 | 	 *  characters '\n', '\r', '\t', '\\', '\'', and '\"' backslash-escaped. */
 34 | 	static string escape(const char c) {
 35 | 		switch ( c ) {
 36 | 		case '\n': return "\\n";
 37 | 		case '\r': return "\\r";
 38 | 		case '\t': return "\\t";
 39 | 		case '\\': return "\\\\";
 40 | 		case '\'': return "\\\'";
 41 | 		case '\"': return "\\\"";
 42 | 		default:   return string(1, c);
 43 | 		}
 44 | 	}
 45 | 	
 46 | 	/** Returns escape(c) surrounded by single quotes */
 47 | 	static string quoted_escape(const char c) {
 48 | 		return "\'" + escape(c) + "\'";
 49 | 	}
 50 | 
 51 | 	/** Returns a string representing the given string with all special 
 52 | 	 *  characters '\n', '\r', '\t', '\\', '\'', and '\"' backslash-escaped. */
 53 | 	static string escape(const string& s) {
 54 | 		stringstream ss;
 55 | 		for (auto iter = s.begin(); iter != s.end(); ++iter) {
 56 | 			ss << escape(*iter);
 57 | 		}
 58 | 		return ss.str();
 59 | 	}
 60 | 	
 61 | 	/** Returns escape(c) surrounded by double quotes */
 62 | 	static string quoted_escape(const string& s) {
 63 | 		return "\"" + escape(s) + "\"";
 64 | 	}
 65 | 
 66 | 	/** Converts one of the characters 'n', 'r', 't' to the escaped character 
 67 | 	 *  '\n', etc. Non-escaped characters will be returned as is. */
 68 | 	static char unescaped_char(const char c) {
 69 | 		switch ( c ) {
 70 | 		case 'n': return '\n';
 71 | 		case 'r': return '\r';
 72 | 		case 't': return '\t';
 73 | 		default: return c;
 74 | 		}
 75 | 	}
 76 | 
 77 | 	/** Converts escape sequences in a string to their character values. */
 78 | 	static string unescape(const string& s) {
 79 | 		stringstream ss;
 80 | 		for (auto it = s.begin(); it != s.end(); ++it) {
 81 | 			char c = *it;
 82 | 			if ( c == '\\' ) {
 83 | 				++it;
 84 | 				if ( it == s.end() ) break;
 85 | 				c = *it;
 86 | 				ss << unescaped_char(c);
 87 | 			} else {
 88 | 				ss << c;
 89 | 			}
 90 | 		}
 91 | 		return ss.str();
 92 | 	}
 93 | 	
 94 | 	/** Converts escape sequences in an Egg error string to their character values. */
 95 | 	static string unescape_error(const string& s) {
 96 | 		stringstream ss;
 97 | 		for (auto it = s.begin(); it != s.end(); ++it) {
 98 | 			char c = *it;
 99 | 			if ( c == '\\' ) {
100 | 				++it;
101 | 				if ( it == s.end() ) break;
102 | 				ss << *it;
103 | 			} else {
104 | 				ss << c;
105 | 			}
106 | 		}
107 | 		return ss.str();
108 | 	}
109 | 
110 | 	/** Replaces all sequences of newlines with spaces. */
111 | 	static string single_line(const string& s) {
112 | 		stringstream ss;
113 | 		bool hadLineBreak = false;
114 | 		for (auto iter = s.begin(); iter != s.end(); ++iter) {
115 | 			char c = *iter;
116 | 			if ( c == '\n' || c == '\r' ) { 
117 | 				if ( !hadLineBreak ) { ss << ' '; }
118 | 				hadLineBreak = true;
119 | 			} else {
120 | 				ss << c;
121 | 				hadLineBreak = false;
122 | 			}
123 | 		}
124 | 		return ss.str();
125 | 	}
126 | }; /* namespace strings */
127 | 
128 | 


--------------------------------------------------------------------------------
/visitors/compiler.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /*
  4 |  * Copyright (c) 2013 Aaron Moss
  5 |  * 
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to deal
  8 |  * in the Software without restriction, including without limitation the rights
  9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  * 
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  * 
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22 |  * THE SOFTWARE.
 23 |  */
 24 | 
 25 | #include <iostream>
 26 | #include <map>
 27 | #include <string>
 28 | #include <unordered_map>
 29 | #include <utility>
 30 | #include <vector>
 31 | 
 32 | #include "../ast.hpp"
 33 | #include "../utils/strings.hpp"
 34 | 
 35 | namespace visitor {
 36 | 
 37 | 	/** Gets a list of variables declared in a grammar rule. */
 38 | 	class variable_list : ast::tree_visitor {
 39 | 	public:
 40 | 		variable_list() {}
 41 | 
 42 | 		variable_list(const ast::grammar& g) {
 43 | 			for (auto it = g.rs.begin(); it != g.rs.end(); ++it) {
 44 | 				ast::grammar_rule& r = **it;
 45 | 				types.insert(std::make_pair(r.name, r.type));
 46 | 			}
 47 | 		}
 48 | 
 49 | 		void visit(ast::range_matcher& m) {
 50 | 			if ( ! m.var.empty() ) {
 51 | 				vars.insert(std::make_pair(m.var, "char"));
 52 | 			}
 53 | 		}
 54 | 
 55 | 		void visit(ast::rule_matcher& m) {
 56 | 			if ( ! m.var.empty() ) {
 57 | 				vars.insert(std::make_pair(m.var, types[m.rule]));
 58 | 			}
 59 | 		}
 60 | 
 61 | 		void visit(ast::any_matcher& m) {
 62 | 			if ( ! m.var.empty() ) {
 63 | 				vars.insert(std::make_pair(m.var, "char"));
 64 | 			}
 65 | 		}
 66 | 		
 67 | 		void visit(ast::capt_matcher& m) {
 68 | 			vars.insert(std::make_pair(m.var, "std::string"));
 69 | 			m.m->accept(this);
 70 | 		}
 71 | 		
 72 | 		std::map<std::string, std::string> list(ast::matcher_ptr& m) {
 73 | 			vars.clear();
 74 | 			m->accept(this);
 75 | 			return vars;
 76 | 		}
 77 | 		
 78 | 		std::map<std::string, std::string> list(ast::grammar_rule& r) {
 79 | 			vars.clear();
 80 | 			r.m->accept(this);
 81 | 			return vars;
 82 | 		}
 83 | 
 84 | 		/** checks if rule exists */
 85 | 		bool rule_exists(const std::string& rule) const {
 86 | 			return types.count(rule);
 87 | 		}
 88 | 		
 89 | 		/** assuming rule exists, checks if it has a type */
 90 | 		bool is_typed(const std::string& rule) const {
 91 | 			return !types.at(rule).empty();
 92 | 		}
 93 | 		
 94 | 	private:
 95 | 		/** map of grammar rule names to types */
 96 | 		std::unordered_map<std::string, std::string> types;
 97 | 		/** map of variable names to types */
 98 | 		std::map<std::string, std::string> vars;
 99 | 	}; /* class variable_list */
100 | 	
101 | 	/** AST visitor with function-like interface that checks whether an expression is free of 
102 | 	 *  variable bindings or semantic actions. Errors are acceptable, as the first pass will 
103 | 	 *  produce the same errors as later passes. */
104 | 	class is_lexical : ast::tree_visitor {
105 | 	public:
106 | 		/** Constructor; starts traversal */
107 | 		is_lexical(ast::matcher* m) : lexical(true) { m->accept(this); }
108 | 		is_lexical(ast::matcher_ptr m) : lexical(true) { m->accept(this); }
109 | 		
110 | 		operator bool () { return lexical; }
111 | 		
112 | 		void visit(ast::range_matcher& m) { lexical = m.var.empty(); }
113 | 		
114 | 		void visit(ast::rule_matcher& m) { lexical = m.var.empty(); }
115 | 		
116 | 		void visit(ast::any_matcher& m) { lexical = m.var.empty(); }
117 | 		
118 | 		void visit(ast::action_matcher&) { lexical = false; }
119 | 		
120 | 		void visit(ast::seq_matcher& m) {
121 | 			for (auto it = m.ms.begin(); lexical && it != m.ms.end(); ++it) {
122 | 				(*it)->accept(this);
123 | 			}
124 | 		}
125 | 		
126 | 		void visit(ast::alt_matcher& m) {
127 | 			for (auto it = m.ms.begin(); lexical && it != m.ms.end(); ++it) {
128 | 				(*it)->accept(this);
129 | 			}
130 | 		}
131 | 		
132 | 		void visit(ast::capt_matcher& m) { lexical = m.var.empty(); }
133 | 	private:
134 | 		bool lexical;  ///< Is the given expression lacking in semantic elements?
135 | 	}; /* class is_lexical */
136 | 	
137 | 	/** Code generator for Egg matcher ASTs */
138 | 	class compiler : ast::visitor {
139 | 	public:
140 | 		using warning_list = std::vector<std::string>;
141 | 		
142 | 		compiler(std::string name, std::ostream& out = std::cout, bool do_guard = true) 
143 | 			: name(name), out(out), tabs(2), do_guard(do_guard), do_memo(true), max_memo_id(0) {}
144 | 		
145 | 		compiler& memo(bool b = true) { do_memo = b; return *this; }
146 | 		compiler& no_memo() { do_memo = false; return *this; }
147 | 
148 | 		void visit(ast::char_matcher& m) {
149 | 			out << "parser::literal(\'" << strings::escape(m.c) << "\')";
150 | 		}
151 | 
152 | 		void visit(ast::str_matcher& m) {
153 | 			out << "parser::literal(\"" << strings::escape(m.s) << "\")";
154 | 		}
155 | 		
156 | 		void visit(ast::char_range& r, const std::string& var) {
157 | 			if ( r.single() ) {
158 | 				out << "parser::literal(\'" << strings::escape(r.to) << "\'";
159 | 			} else {
160 | 				out << "parser::between(\'" << strings::escape(r.from) 
161 | 				    << "\', \'" << strings::escape(r.to) << "\'";
162 | 			}
163 | 			
164 | 			if ( ! var.empty() ) {
165 | 				out << ", " << var;
166 | 			}
167 | 			
168 | 			out << ")";
169 | 		}
170 | 		
171 | 		void visit(ast::range_matcher& m) {
172 | 			if ( m.rs.empty() ) {
173 | 				out << "parser::empty()";
174 | 			}
175 | 			
176 | 			if ( m.rs.size() == 1 ) {
177 | 				visit(m.rs.front(), m.var);
178 | 				return;
179 | 			}
180 | 			
181 | 			std::string indent(++tabs, '\t');
182 | 
183 | 			//chain matcher ranges
184 | 			out << std::endl 
185 | 				<< indent << "parser::choice({\n"
186 | 				<< indent << "\t"
187 | 				;
188 | 				
189 | 			indent += '\t';
190 | 			++tabs;
191 | 			
192 | 			auto it = m.rs.begin();
193 | 			visit(*it, m.var);
194 | 			
195 | 			while ( ++it != m.rs.end() ) {
196 | 				out << "," << std::endl
197 | 					<< indent 
198 | 					;
199 | 				visit(*it, m.var);
200 | 			}
201 | 			
202 | 			out << "})";
203 | 			
204 | 			tabs -= 2;
205 | 		}
206 | 
207 | 		void visit(ast::rule_matcher& m) {
208 | 			if ( vars.rule_exists(m.rule) ) {
209 | 				if ( vars.is_typed(m.rule) ) { // syntactic rule
210 | 					if ( m.var.empty() ) {  // unbound
211 | 						out << "parser::unbind(" << m.rule << ")";
212 | 					} else {  // bound
213 | 						out << "parser::bind(" << m.var << ", " << m.rule << ")";
214 | 					}
215 | 					return;
216 | 				}
217 | 			} else {
218 | 				warnings.emplace_back("Rule \"" + m.rule + "\" is not defined");
219 | 			}
220 | 			
221 | 			// otherwise lexical rule
222 | 			out << m.rule;
223 | 		}
224 | 
225 | 		void visit(ast::any_matcher& m) {
226 | 			out << "parser::any(" << m.var << ")";
227 | 		}
228 | 
229 | 		void visit(ast::empty_matcher& m) {
230 | 			out << "parser::empty()";
231 | 		}
232 | 
233 | 		void visit(ast::action_matcher& m) {
234 | 			//runs action code with all variables bound, then returns true
235 | 			out << "[&](parser::state& ps) {" << m.a << " return true; }";
236 | 		}
237 | 
238 | 		void visit(ast::opt_matcher& m) {
239 | 			out << "parser::option(";
240 | 			m.m->accept(this);
241 | 			out << ")";
242 | 		}
243 | 
244 | 		void visit(ast::many_matcher& m) {
245 | 			if ( do_memo && is_lexical(m.m) ) {
246 | 				out << "parser::memoize_many(" << ++max_memo_id << ", ";
247 | 			} else {
248 | 				out << "parser::many(";
249 | 			}
250 | 			m.m->accept(this);
251 | 			out << ")";
252 | 		}
253 | 
254 | 		void visit(ast::some_matcher& m) {
255 | 			if ( do_memo && is_lexical(m.m) ) {
256 | 				out << "parser::memoize_some(" << ++max_memo_id << ", ";
257 | 			} else {
258 | 				out << "parser::some(";
259 | 			}
260 | 			m.m->accept(this);
261 | 			out << ")";
262 | 		}
263 | 		
264 | 		void visit(ast::seq_matcher& m) {
265 | 			// empty sequence bad form, but always matches
266 | 			if ( m.ms.empty() ) {
267 | 				out << "parser::empty()";
268 | 				return;
269 | 			}
270 | 			
271 | 			// singleton sequence also bad form, equivalent to the single matcher
272 | 			if ( m.ms.size() == 1 ) {
273 | 				m.ms.front()->accept(this);
274 | 				return;
275 | 			}
276 | 
277 | 			std::string indent(++tabs, '\t');
278 | 			
279 | 			out << std::endl
280 | 				<< indent << "parser::sequence({\n"
281 | 				<< indent << "\t"
282 | 				;
283 | 			
284 | 			indent += '\t';
285 | 			++tabs;
286 | 
287 | 			auto it = m.ms.begin();
288 | 			(*it)->accept(this);
289 | 			while ( ++it != m.ms.end() ) {
290 | 				out << "," << std::endl
291 | 					<< indent 
292 | 					;
293 | 				(*it)->accept(this);
294 | 			}
295 | 
296 | 			out << "})";
297 | 
298 | 			tabs -= 2;
299 | 		}
300 | 		
301 | 		void visit(ast::alt_matcher& m) {
302 | 			// empty alternation bad form, but always matches
303 | 			if ( m.ms.empty() ) {
304 | 				out << "parser::empty()";
305 | 				return;
306 | 			}
307 | 			
308 | 			// singleton alternation also bad form, equivalent to the single matcher
309 | 			if ( m.ms.size() == 1 ) {
310 | 				m.ms.front()->accept(this);
311 | 				return;
312 | 			}
313 | 
314 | 			std::string indent(++tabs, '\t');
315 | 			
316 | 			out << std::endl
317 | 				<< indent << "parser::choice({\n"
318 | 				<< indent << "\t"
319 | 				;
320 | 			
321 | 			indent += '\t';
322 | 			++tabs;
323 | 
324 | 			auto it = m.ms.begin();
325 | 			(*it)->accept(this);
326 | 			while ( ++it != m.ms.end() ) {
327 | 				out << "," << std::endl
328 | 					<< indent 
329 | 					;
330 | 				(*it)->accept(this);
331 | 			}
332 | 
333 | 			out << "})";
334 | 
335 | 			tabs -= 2;
336 | 		}
337 | 
338 | 		void visit(ast::look_matcher& m) {
339 | 			out << "parser::look(";
340 | 			m.m->accept(this);
341 | 			out << ")";
342 | 		}
343 | 
344 | 		void visit(ast::not_matcher& m) {
345 | 			out << "parser::look_not(";
346 | 			m.m->accept(this);
347 | 			out << ")";
348 | 		}
349 | 
350 | 		void visit(ast::capt_matcher& m) {
351 | 			out << "parser::capture(" << m.var << ", ";
352 | 			m.m->accept(this);
353 | 			out << ")";
354 | 		}
355 | 		
356 | 		void visit(ast::named_matcher& m) {
357 | 			out << "parser::named(\"" << strings::escape(m.error) << "\", ";
358 | 			m.m->accept(this);
359 | 			out << ")";
360 | 		}
361 | 		
362 | 		void visit(ast::fail_matcher& m) {
363 | 			out << "parser::fail(\"" << strings::escape(m.error) << "\")";
364 | 		}
365 | 
366 | 		void compile(ast::grammar_rule& r) {
367 | 			bool typed = ! r.type.empty();
368 | 			bool has_error = ! r.error.empty();
369 | 			bool memoized = do_memo && r.memo;
370 | 
371 | 			//print prototype
372 | 			out << "\tbool " << r.name << "(parser::state& ps";
373 | 			if ( typed ) out << ", " << r.type << "& psVal";
374 | 			out << ") {" << std::endl;
375 | 			
376 | 			//setup bound variables
377 | 			std::map<std::string, std::string> vs = vars.list(r);
378 | 			//skip parser variables
379 | 			vs.erase("ps");
380 | 			vs.erase("psVal");
381 | 			for (auto it = vs.begin(); it != vs.end(); ++it) {
382 | 				// add variable binding
383 | 				out << "\t\t" << it->second << " " << it->first << ";" << std::endl;
384 | 			}
385 | 			if ( ! vs.empty() ) out << std::endl;
386 | 
387 | 			//apply matcher
388 | 			out << "\t\treturn ";
389 | 			if ( memoized ) {
390 | 				out << "parser::memoize(" << ++max_memo_id << ", ";
391 | 				if ( typed ) out << "psVal, ";
392 | 			}
393 | 			if ( has_error ) {
394 | 				out << "parser::named(\"" << strings::escape(r.error) << "\", ";
395 | 			}
396 | 			r.m->accept(this);
397 | 			if ( has_error ) { out << ")"; }
398 | 			if ( memoized ) { out << ")"; }
399 | 			out << "(ps);";
400 | 
401 | 			//close out method
402 | 			out << "\n"
403 | 				<< "\t}" << std::endl
404 | 				<< std::endl
405 | 				;
406 | 		}
407 | 
408 | 		/** Compiles a grammar to the output file. */
409 | 		warning_list compile(ast::grammar& g) {
410 | 			warnings = {};
411 | 
412 | 			//print pre-amble
413 | 			if ( do_guard ) { out << "#pragma once\n" << std::endl; }
414 | 			out << "/* THE FOLLOWING HAS BEEN AUTOMATICALLY GENERATED BY THE EGG PARSER GENERATOR." << std::endl
415 | 				<< " * DO NOT EDIT. */" << std::endl
416 | 				<< std::endl
417 | 				;
418 | 
419 | 			//print pre-code
420 | 			if ( ! g.pre.empty() ) {
421 | 				out << "// {%" << std::endl
422 | 					<< g.pre << std::endl
423 | 					<< "// %}" << std::endl
424 | 					<< std::endl
425 | 					;
426 | 			}
427 | 
428 | 			//get needed includes
429 | 			out << "#include <string>" << std::endl
430 | 				<< "#include \"parser.hpp\"" << std::endl
431 | 				<< std::endl
432 | 				;
433 | 
434 | 			//setup parser namespace
435 | 			out << "namespace " << name << " {" << std::endl
436 | 				<< std::endl
437 | 				;
438 | 
439 | 			//pre-declare matchers
440 | 			for (auto it = g.rs.begin(); it != g.rs.end(); ++it) {
441 | 				ast::grammar_rule& r = **it;
442 | 				out << "\tbool " << r.name << "(parser::state&";
443 | 				if ( ! r.type.empty() ) {
444 | 					out << ", " << r.type << "&";
445 | 				}
446 | 				out << ");" << std::endl;
447 | 			}
448 | 			out << std::endl;
449 | 
450 | 			//set up lists of variable types and memoization IDs
451 | 			vars = variable_list(g);
452 | 			
453 | 			//generate matching functions
454 | 			for (auto it = g.rs.begin(); it != g.rs.end(); ++it) {
455 | 				ast::grammar_rule& r = **it;
456 | 				compile(r);
457 | 			}
458 | 
459 | 			//close parser namespace
460 | 			out << "} // namespace " << name << std::endl
461 | 				<< std::endl
462 | 				;
463 | 			
464 | 			//print post-code
465 | 			if ( ! g.post.empty() ) {
466 | 				out << "// {%" << std::endl
467 | 					<< g.post << std::endl
468 | 					<< "// %}" << std::endl
469 | 					<< std::endl
470 | 					;
471 | 			}
472 | 
473 | 			return std::move(warnings);
474 | 		}
475 | 		
476 | 	private:
477 | 		std::string name;	        ///< Name of the grammar
478 | 		std::ostream& out;	        ///< Output stream to print to
479 | 		variable_list vars;	        ///< Holds grammar rule types
480 | 		warning_list warnings;      ///< Holds warnings
481 | 		bool do_guard;              ///< Add include guard to generated file?
482 | 		bool do_memo;               /**< if true, memoize if grammar says, otherwise no 
483 | 		                             *   memoization [default true] */
484 | 		unsigned long max_memo_id;  ///< Largest currently used memoization ID
485 | 		int tabs;			        ///< Number of tabs for printer
486 | 	}; /* class compiler */
487 | 	
488 | } /* namespace visitor */
489 | 
490 | 


--------------------------------------------------------------------------------
/visitors/normalizer.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /*
  4 |  * Copyright (c) 2013 Aaron Moss
  5 |  * 
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to deal
  8 |  * in the Software without restriction, including without limitation the rights
  9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  * 
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  * 
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22 |  * THE SOFTWARE.
 23 |  */
 24 | 
 25 | #include "../ast.hpp"
 26 | 
 27 | namespace visitor {
 28 | 
 29 | 	/** Normalizes an Egg AST */
 30 | 	class normalizer : ast::visitor {
 31 | 	public:
 32 | 		void visit(ast::char_matcher& m) {
 33 | 			rVal = ast::as_ptr<ast::matcher>(
 34 | 					ast::make_ptr<ast::char_matcher>(m));
 35 | 		}
 36 | 
 37 | 		void visit(ast::str_matcher& m) {
 38 | 			rVal = ast::as_ptr<ast::matcher>(
 39 | 					ast::make_ptr<ast::str_matcher>(m));
 40 | 		}
 41 | 
 42 | 		void visit(ast::range_matcher& m) {
 43 | 			rVal = ast::as_ptr<ast::matcher>(
 44 | 					ast::make_ptr<ast::range_matcher>(m));
 45 | 		}
 46 | 
 47 | 		void visit(ast::rule_matcher& m) {
 48 | 			rVal = ast::as_ptr<ast::matcher>(
 49 | 					ast::make_ptr<ast::rule_matcher>(m));
 50 | 		}
 51 | 
 52 | 		void visit(ast::any_matcher& m) {
 53 | 			rVal = ast::make_ptr<ast::any_matcher>(m);
 54 | 		}
 55 | 		
 56 | 		void visit(ast::empty_matcher& m) {
 57 | 			rVal = ast::make_ptr<ast::empty_matcher>();
 58 | 		}
 59 | 
 60 | 		void visit(ast::action_matcher& m) {
 61 | 			rVal = ast::as_ptr<ast::matcher>(
 62 | 					ast::make_ptr<ast::action_matcher>(m));
 63 | 		}
 64 | 		
 65 | 		void visit(ast::opt_matcher& m) {
 66 | 			m.m->accept(this);
 67 | 			m.m = rVal;
 68 | 			rVal = ast::as_ptr<ast::matcher>(
 69 | 					ast::make_ptr<ast::opt_matcher>(m));
 70 | 		}
 71 | 
 72 | 		void visit(ast::many_matcher& m) {
 73 | 			m.m->accept(this);
 74 | 			m.m = rVal;
 75 | 			rVal = ast::as_ptr<ast::matcher>(
 76 | 					ast::make_ptr<ast::many_matcher>(m));
 77 | 		}
 78 | 
 79 | 		void visit(ast::some_matcher& m) {
 80 | 			m.m->accept(this);
 81 | 			m.m = rVal;
 82 | 			rVal = ast::as_ptr<ast::matcher>(
 83 | 					ast::make_ptr<ast::some_matcher>(m));
 84 | 		}
 85 | 
 86 | 		void visit(ast::seq_matcher& m) {
 87 | 			switch( m.ms.size() ) {
 88 | 			case 0:
 89 | 				rVal = ast::make_ptr<ast::empty_matcher>();
 90 | 				break;
 91 | 			case 1:
 92 | 				m.ms.front()->accept(this);
 93 | 				// rVal = rVal;
 94 | 				break;
 95 | 			default:
 96 | 				ast::seq_matcher_ptr p = ast::make_ptr<ast::seq_matcher>();
 97 | 				for (auto it = m.ms.begin(); it != m.ms.end(); ++it) {
 98 | 					(*it)->accept(this);
 99 | 					*p += rVal;
100 | 				}
101 | 				rVal = ast::as_ptr<ast::matcher>(p);
102 | 				break;
103 | 			}
104 | 		}
105 | 		
106 | 		void visit(ast::alt_matcher& m) {
107 | 			switch( m.ms.size() ) {
108 | 			case 0:
109 | 				rVal = ast::make_ptr<ast::empty_matcher>();
110 | 				break;
111 | 			case 1:
112 | 				m.ms.front()->accept(this);
113 | 				// rVal = rVal;
114 | 				break;
115 | 			default:
116 | 				ast::alt_matcher_ptr p = ast::make_ptr<ast::alt_matcher>();
117 | 				for (auto it = m.ms.begin(); it != m.ms.end(); ++it) {
118 | 					(*it)->accept(this);
119 | 					*p += rVal;
120 | 				}
121 | 				rVal = ast::as_ptr<ast::matcher>(p);
122 | 				break;
123 | 			}
124 | 		}
125 | 
126 | 		void visit(ast::look_matcher& m) {
127 | 			m.m->accept(this);
128 | 			m.m = rVal;
129 | 			rVal = ast::as_ptr<ast::matcher>(
130 | 					ast::make_ptr<ast::look_matcher>(m));
131 | 		}
132 | 
133 | 		void visit(ast::not_matcher& m) {
134 | 			m.m->accept(this);
135 | 			m.m = rVal;
136 | 			rVal = ast::as_ptr<ast::matcher>(
137 | 					ast::make_ptr<ast::not_matcher>(m));
138 | 		}
139 | 
140 | 		void visit(ast::capt_matcher& m) {
141 | 			m.m->accept(this);
142 | 			m.m = rVal;
143 | 			rVal = ast::as_ptr<ast::matcher>(
144 | 					ast::make_ptr<ast::capt_matcher>(m));
145 | 		}
146 | 		
147 | 		void visit(ast::named_matcher& m) {
148 | 			m.m->accept(this);
149 | 			m.m = rVal;
150 | 			rVal = ast::as_ptr<ast::matcher>(
151 | 					ast::make_ptr<ast::named_matcher>(m));
152 | 		}
153 | 		
154 | 		void visit(ast::fail_matcher& m) {
155 | 			rVal = ast::as_ptr<ast::matcher>(
156 | 					ast::make_ptr<ast::fail_matcher>(m));
157 | 		}
158 | 
159 | 		ast::grammar_rule& normalize(ast::grammar_rule& r) {
160 | 			r.m->accept(this);
161 | 			r.m = rVal;
162 | 			return r;
163 | 		}
164 | 
165 | 		ast::grammar& normalize(ast::grammar& g) {
166 | 			for (auto it = g.rs.begin(); it != g.rs.end(); ++it) {
167 | 				ast::grammar_rule_ptr& r = *it;
168 | 				*r = normalize(*r);
169 | 			}
170 | 			return g;
171 | 		}
172 | 	
173 | 	private:
174 | 		/** The matcher to return for the current visit */
175 | 		ast::matcher_ptr rVal;
176 | 	}; /* class visitor */
177 | 	
178 | } /* namespace visitor */
179 | 
180 | 


--------------------------------------------------------------------------------
/visitors/printer.hpp:
--------------------------------------------------------------------------------
  1 | #pragma once
  2 | 
  3 | /*
  4 |  * Copyright (c) 2013 Aaron Moss
  5 |  * 
  6 |  * Permission is hereby granted, free of charge, to any person obtaining a copy
  7 |  * of this software and associated documentation files (the "Software"), to deal
  8 |  * in the Software without restriction, including without limitation the rights
  9 |  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 10 |  * copies of the Software, and to permit persons to whom the Software is
 11 |  * furnished to do so, subject to the following conditions:
 12 |  * 
 13 |  * The above copyright notice and this permission notice shall be included in
 14 |  * all copies or substantial portions of the Software.
 15 |  * 
 16 |  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 17 |  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 18 |  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
 19 |  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 20 |  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 21 |  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
 22 |  * THE SOFTWARE.
 23 |  */
 24 | 
 25 | #include <iostream>
 26 | #include <string>
 27 | 
 28 | #include "../ast.hpp"
 29 | #include "../utils/strings.hpp"
 30 | 
 31 | namespace visitor {
 32 | 
 33 | 	/** Pretty-printer for Egg matcher ASTs. */
 34 | 	class printer : ast::visitor {
 35 | 	public:
 36 | 		printer(std::ostream& out = std::cout, int tabs = 0) 
 37 | 			: out(out), tabs(tabs) {}
 38 | 
 39 | 		void visit(ast::char_matcher& m) {
 40 | 			out << "\'" << strings::escape(m.c) << "\'";
 41 | 		}
 42 | 		
 43 | 		void visit(ast::str_matcher& m) {
 44 | 			out << "\"" << strings::escape(m.s) << "\"";
 45 | 		}
 46 | 
 47 | 		void visit(ast::range_matcher& m) {
 48 | 			out << "[";
 49 | 
 50 | 			for (auto iter = m.rs.begin(); iter != m.rs.end(); ++iter) {
 51 | 				ast::char_range& r = *iter;
 52 | 				out << strings::escape(r.from);
 53 | 				if ( r.from != r.to ) {
 54 | 					out << "-" << strings::escape(r.to);
 55 | 				}
 56 | 			}
 57 | 			
 58 | 			out << "]";
 59 | 			
 60 | 			if ( ! m.var.empty() ) {
 61 | 				out << " : " << m.var;
 62 | 			}
 63 | 		}
 64 | 		
 65 | 		void visit(ast::rule_matcher& m) {
 66 | 			out << m.rule;
 67 | 			if ( ! m.var.empty() ) {
 68 | 				out << " : " << m.var;
 69 | 			}
 70 | 		}
 71 | 
 72 | 		void visit(ast::any_matcher& m) {
 73 | 			out << ".";
 74 | 			
 75 | 			if ( ! m.var.empty() ) {
 76 | 				out << " : " << m.var;
 77 | 			}
 78 | 		}
 79 | 
 80 | 		void visit(ast::empty_matcher& m) {
 81 | 			out << ";";
 82 | 		}
 83 | 
 84 | 		void visit(ast::action_matcher& m) {
 85 | 			out << "{" << strings::single_line(m.a) << "}";
 86 | 		}
 87 | 
 88 | 		void visit(ast::opt_matcher& m) {
 89 | 			m.m->accept(this);
 90 | 			out << "?";
 91 | 		}
 92 | 
 93 | 		void visit(ast::many_matcher& m) {
 94 | 			m.m->accept(this);
 95 | 			out << "*";
 96 | 		}
 97 | 
 98 | 		void visit(ast::some_matcher& m) {
 99 | 			m.m->accept(this);
100 | 			out << "+";
101 | 		}
102 | 
103 | 		void visit(ast::seq_matcher& m) {
104 | 			if ( m.ms.size() != 1 ) { out << "( "; }
105 | 			if ( ! m.ms.empty() ) {
106 | 				std::string indent((4 * ++tabs), ' ');
107 | 				
108 | 				auto iter = m.ms.begin();
109 | 				(*iter)->accept(this);
110 | 				while ( ++iter != m.ms.end() ) {
111 | 					out << std::endl << indent;
112 | 					(*iter)->accept(this);
113 | 				}
114 | 				
115 | 				--tabs;
116 | 			}
117 | 			if ( m.ms.size() != 1 ) { out << " )"; }
118 | 		}
119 | 
120 | 		void visit(ast::alt_matcher& m) {
121 | 			if ( m.ms.size() != 1 ) { out << "( "; }
122 | 			if ( ! m.ms.empty() ) {
123 | 				std::string indent((4 * ++tabs), ' ');
124 | 
125 | 				auto iter = m.ms.begin();
126 | 				(*iter)->accept(this);
127 | 				while ( ++iter != m.ms.end() ) {
128 | 					out << "\n" << indent << "| ";
129 | 					(*iter)->accept(this);
130 | 				}
131 | 				
132 | 				--tabs;
133 | 			}
134 | 			if ( m.ms.size() != 1 ) { out << " )"; }
135 | 		}
136 | 
137 | 		void visit(ast::look_matcher& m) {
138 | 			out << "&";
139 | 			m.m->accept(this);
140 | 		}
141 | 
142 | 		void visit(ast::not_matcher& m) {
143 | 			out << "!";
144 | 			m.m->accept(this);
145 | 		}
146 | 		
147 | 		void visit(ast::capt_matcher& m) {
148 | 			out << "< ";
149 | 			m.m->accept(this);
150 | 			out << " > : " << m.var;
151 | 		}
152 | 		
153 | 		void visit(ast::named_matcher& m) {
154 | 			m.m->accept(this);
155 | 			out << "@`" << strings::unescape_error(m.error) << "`";
156 | 		}
157 | 		
158 | 		void visit(ast::fail_matcher& m) {
159 | 			out << "~`" << strings::unescape_error(m.error) << "`";
160 | 		}
161 | 
162 | 		void print(ast::grammar_rule& r) {
163 | 			out << r.name;
164 | 			if ( ! r.type.empty() ) {
165 | 				out << " : " << r.type;
166 | 			}
167 | 			out << " = ";
168 | 			r.m->accept(this);
169 | 			out << std::endl;
170 | 		}
171 | 
172 | 		void print(ast::grammar& g) {
173 | 			if ( ! g.pre.empty() ) {
174 | 				out << "{%" << g.pre << "%}" << std::endl;
175 | 			}
176 | 
177 | 			out << std::endl;
178 | 			for (auto iter = g.rs.begin(); iter != g.rs.end(); ++iter) {
179 | 				ast::grammar_rule_ptr& r = *iter;
180 | 				print(*r);
181 | 			}
182 | 			out << std::endl;
183 | 
184 | 			if ( ! g.post.empty() ) {
185 | 				out << "{%" << g.post << "%}" <<std::endl << std::endl;
186 | 			}
187 | 		}
188 | 		
189 | 	private:
190 | 		std::ostream& out;	/**< output stream */
191 | 		int tabs;			/**< current number of tab stops */
192 | 	};
193 | } /* namespace visitor */
194 | 
195 | 


--------------------------------------------------------------------------------