├── .gitignore
├── .travis.yml
├── LICENSE
├── README.md
├── bin
    └── .gitignore
├── build-doc.hxml
├── build-each.hxml
├── build-interp.hxml
├── build-js.hxml
├── build.hxml
├── haxelib.json
├── hxparse.hxproj
├── src
    ├── byte
    │   └── ByteData.hx
    └── hxparse
    │   ├── LexEngine.hx
    │   ├── Lexer.hx
    │   ├── LexerTokenSource.hx
    │   ├── NoMatch.hx
    │   ├── Parser.hx
    │   ├── ParserBuilder.hx
    │   ├── ParserBuilderImpl.macro.hx
    │   ├── ParserError.hx
    │   ├── Position.hx
    │   ├── RuleBuilder.hx
    │   ├── Ruleset.hx
    │   ├── State.hx
    │   ├── TokenSource.hx
    │   ├── Unexpected.hx
    │   ├── UnexpectedChar.hx
    │   ├── Utils.hx
    │   └── debug
    │       └── LexerGraph.hx
└── test
    ├── ArithmeticParser.hx
    ├── JSONParser.hx
    ├── PrintfParser.hx
    ├── Test.hx
    └── UnicodeTestLexer.hx


/.gitignore:
--------------------------------------------------------------------------------
1 | 
2 | /dump
3 | .vscode/
4 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: haxe
2 | 
3 | before_install:
4 |   - sudo apt-get update
5 |   - sudo apt-get install mono-devel
6 | 
7 | hxml:
8 |   - build.hxml
9 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2021 Simon Krajewski
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | hxparse
 2 | =======
 3 | 
 4 | [![TravisCI Build Status](https://api.travis-ci.org/Simn/hxparse.svg?branch=development)](https://travis-ci.org/Simn/hxparse)
 5 | 
 6 | This library provides tools for creating lexers and parsers in Haxe.
 7 | 
 8 | ### Installation
 9 | 
10 | Install the library via [haxelib](http://lib.haxe.org/p/hxparse)
11 | ``` 
12 | haxelib install hxparse 
13 | ```
14 | 
15 | ### Usage
16 | 
17 | - Writing a Lexer: https://github.com/Simn/hxparse/wiki/Writing-a-Lexer
18 | - Writing a Parser: https://github.com/Simn/hxparse/wiki/Writing-a-Parser
19 | - API: http://simn.github.io/hxparse/hxparse/index.html
20 | 


--------------------------------------------------------------------------------
/bin/.gitignore:
--------------------------------------------------------------------------------
1 | *
2 | !.gitignore


--------------------------------------------------------------------------------
/build-doc.hxml:
--------------------------------------------------------------------------------
1 | build-each.hxml
2 | 
3 | -dce std
4 | -neko neko.n
5 | --no-output
6 | -xml bin/hxparse.xml


--------------------------------------------------------------------------------
/build-each.hxml:
--------------------------------------------------------------------------------
1 | -cp src
2 | -cp test
3 | -main Test
4 | -dce full
5 | -lib unifill


--------------------------------------------------------------------------------
/build-interp.hxml:
--------------------------------------------------------------------------------
1 | build-each.hxml
2 | --interp
3 | --times


--------------------------------------------------------------------------------
/build-js.hxml:
--------------------------------------------------------------------------------
1 | build-each.hxml
2 | -js bin/hxparse.js


--------------------------------------------------------------------------------
/build.hxml:
--------------------------------------------------------------------------------
 1 | build-each.hxml
 2 | -lib unifill
 3 | --each
 4 | 
 5 | --next
 6 | -D dump=pretty
 7 | -neko bin/hxparse.n
 8 | 
 9 | --next
10 | -swf bin/hxparse.swf
11 | 
12 | --next
13 | -swf-version 8
14 | -swf bin/hxparse8.swf
15 | 
16 | --next
17 | -js bin/hxparse.js
18 | 
19 | --next
20 | -php bin/php
21 | 
22 | --next
23 | -cpp bin/cpp
24 | 
25 | #--next
26 | #-java bin/java
27 | 
28 | --next
29 | -cs bin/cs
30 | 
31 | --next
32 | -python bin/hxparse.py


--------------------------------------------------------------------------------
/haxelib.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "hxparse",
 3 |   "url": "https://github.com/Simn/hxparse",
 4 |   "license": "MIT",
 5 |   "classPath": "src",
 6 |   "description": "This library provides tools for creating lexers and parsers in haxe.",
 7 |   "version": "4.3.0",
 8 |   "releasenote": "update",
 9 |   "contributors": ["Simn"]
10 | }
11 | 


--------------------------------------------------------------------------------
/hxparse.hxproj:
--------------------------------------------------------------------------------
 1 | ﻿<?xml version="1.0" encoding="utf-8"?>
 2 | <project version="2">
 3 |   <!-- Output SWF options -->
 4 |   <output>
 5 |     <movie outputType="CustomBuild" />
 6 |     <movie input="" />
 7 |     <movie path="" />
 8 |     <movie fps="30" />
 9 |     <movie width="800" />
10 |     <movie height="600" />
11 |     <movie version="9" />
12 |     <movie minorVersion="0" />
13 |     <movie platform="Flash Player" />
14 |     <movie background="#FFFFFF" />
15 |   </output>
16 |   <!-- Other classes to be compiled into your SWF -->
17 |   <classpaths>
18 |     <class path="src" />
19 |     <class path="test" />
20 |   </classpaths>
21 |   <!-- Build options -->
22 |   <build>
23 |     <option directives="" />
24 |     <option flashStrict="False" />
25 |     <option noInlineOnDebug="False" />
26 |     <option mainClass="" />
27 |     <option enabledebug="False" />
28 |     <option additional="" />
29 |   </build>
30 |   <!-- haxelib libraries -->
31 |   <haxelib>
32 |     <library name="unifill" />
33 |   </haxelib>
34 |   <!-- Class files to compile (other referenced classes will automatically be included) -->
35 |   <compileTargets>
36 |     <!-- example: <compile path="..." /> -->
37 |   </compileTargets>
38 |   <!-- Assets to embed into the output SWF -->
39 |   <library>
40 |     <!-- example: <asset path="..." id="..." update="..." glyphs="..." mode="..." place="..." sharepoint="..." /> -->
41 |   </library>
42 |   <!-- Paths to exclude from the Project Explorer tree -->
43 |   <hiddenPaths>
44 |     <hidden path="obj" />
45 |   </hiddenPaths>
46 |   <!-- Executed before build -->
47 |   <preBuildCommand>"$(CompilerPath)/haxe" build.hxml</preBuildCommand>
48 |   <!-- Executed after build -->
49 |   <postBuildCommand alwaysRun="False" />
50 |   <!-- Other project options -->
51 |   <options>
52 |     <option showHiddenPaths="False" />
53 |     <option testMovie="Custom" />
54 |     <option testMovieCommand="" />
55 |   </options>
56 |   <!-- Plugin storage -->
57 |   <storage />
58 | </project>


--------------------------------------------------------------------------------
/src/byte/ByteData.hx:
--------------------------------------------------------------------------------
 1 | package byte;
 2 | 
 3 | abstract ByteData(haxe.io.Bytes) {
 4 | 
 5 | 	public var length(get,never):Int;
 6 | 	inline function get_length() return this.length;
 7 | 
 8 | 	inline public function readByte(i:Int) return this.get(i);
 9 | 
10 | 	inline function new(data) {
11 | 		this = data;
12 | 	}
13 | 
14 | 	inline static public function ofString(s:String):ByteData {
15 | 		return new ByteData(haxe.io.Bytes.ofString(s));
16 | 	}
17 | 
18 | 	inline static public function ofBytes(b:haxe.io.Bytes):ByteData {
19 | 		return new ByteData(b);
20 | 	}
21 | 
22 | 	inline public function readString(pos:Int, len:Int) {
23 | 		return this.getString(pos, len);
24 | 	}
25 | }
26 | 


--------------------------------------------------------------------------------
/src/hxparse/LexEngine.hx:
--------------------------------------------------------------------------------
  1 | package hxparse;
  2 | 
  3 | /**
  4 | 	LexEngine handles pattern parsing and state transformation.
  5 | 
  6 | 	This class is used by the `Lexer` and rarely has to be interacted with
  7 | 	directly.
  8 | 
  9 | 	The static `parse` method transforms a single `String` to a `Pattern`.
 10 | 	Multiple patterns can then be passed to the constructor to generate the
 11 | 	state machine, which is obtainable from the `firstState` method.
 12 | **/
 13 | class LexEngine {
 14 | 
 15 | 	var uid : Int;
 16 | 	var nodes : Array<Node>;
 17 | 	var finals : Array<Node>;
 18 | 	var states : Array<State>;
 19 | 	var hstates : Map<String,State>;
 20 | 
 21 | 	/**
 22 | 		Creates a new LexEngine from `patterns`.
 23 | 
 24 | 		Each LexEngine maintains a state machine, whose initial state can be
 25 | 		obtained from the `firstState` method. After this, `this` LexEngine can
 26 | 		be discarded.
 27 | 
 28 | 		If `patterns` is null, the result is unspecified.
 29 | 	**/
 30 | 	public function new( patterns : Array<Pattern> ) {
 31 | 		nodes = [];
 32 | 		finals = [];
 33 | 		states = [];
 34 | 		hstates = new Map();
 35 | 		uid = 0;
 36 | 		var pid = 0;
 37 | 		for ( p in patterns ) {
 38 | 			var id = pid++;
 39 | 			var f = node(id);
 40 | 			var n = initNode(p, f,id);
 41 | 			nodes.push(n);
 42 | 			finals.push(f);
 43 | 		}
 44 | 		makeState(addNodes([], nodes));
 45 | 	}
 46 | 
 47 | 	/**
 48 | 		Returns the entry state of the state machine generated by `this`
 49 | 		LexEngine.
 50 | 	**/
 51 | 	public function firstState() {
 52 | 		return states[0];
 53 | 	}
 54 | 
 55 | 	function makeState( nodes : Array<Node> ) {
 56 | 		var buf = new StringBuf();
 57 | 		for( n in nodes ) {
 58 | 			buf.add(n.id);
 59 | 			buf.addChar("-".code);
 60 | 		}
 61 | 		var key = buf.toString();
 62 | 		var s = hstates.get(key);
 63 | 		if( s != null )
 64 | 			return s;
 65 | 
 66 | 		s = new State();
 67 | 		states.push(s);
 68 | 		hstates.set(key, s);
 69 | 
 70 | 		var trans = getTransitions(nodes);
 71 | 
 72 | 		for ( t in trans ) {
 73 | 			var target = makeState(t.n);
 74 | 			for (chr in t.chars) {
 75 | 				for (i in chr.min...(chr.max + 1)) {
 76 | 					s.trans.set(i, target);
 77 | 				}
 78 | 			}
 79 | 		}
 80 | 
 81 | 		function setFinal() {
 82 | 			for( f in finals )
 83 | 				for( n in nodes )
 84 | 					if( n == f ) {
 85 | 						s.finalId = n.pid;
 86 | 						return;
 87 | 					}
 88 | 		}
 89 | 		if (s.finalId == -1)
 90 | 			setFinal();
 91 | 		return s;
 92 | 	}
 93 | 
 94 | 	function getTransitions( nodes : Array<Node> ) {
 95 | 		var tl = [];
 96 | 		for( n in nodes )
 97 | 			for( t in n.trans )
 98 | 				tl.push(t);
 99 | 
100 | 		// Merge transition with the same target
101 | 		tl.sort(function(t1, t2) return t1.n.id - t2.n.id);
102 | 		var t0 = tl[0];
103 | 		for( i in 1...tl.length ) {
104 | 			var t1 = tl[i];
105 | 			if( t0.n == t1.n ) {
106 | 				tl[i - 1] = null;
107 | 				t1 = { chars : cunion(t0.chars, t1.chars), n : t1.n };
108 | 				tl[i] = t1;
109 | 			}
110 | 			t0 = t1;
111 | 		}
112 | 		while( tl.remove(null) ) {
113 | 		}
114 | 
115 | 		// Split char sets to make them disjoint
116 | 		var allChars = EMPTY;
117 | 		var allStates = new List<{ chars : Charset, n : Array<Node> }>();
118 | 		for( t in tl ) {
119 | 			var states = new List();
120 | 			states.push( { chars : cdiff(t.chars, allChars), n : [t.n] } );
121 | 			for( s in allStates ) {
122 | 				var nodes = s.n.copy();
123 | 				nodes.push(t.n);
124 | 				states.push( { chars : cinter(s.chars,t.chars), n : nodes } );
125 | 				states.push( { chars : cdiff(s.chars, t.chars), n : s.n } );
126 | 			}
127 | 			for( s in states )
128 | 				if( s.chars.length == 0 )
129 | 					states.remove(s);
130 | 			allChars = cunion(allChars, t.chars);
131 | 			allStates = states;
132 | 		}
133 | 
134 | 		// Epsilon closure of targets
135 | 		var states = [];
136 | 		for( s in allStates )
137 | 			states.push({ chars : s.chars, n : addNodes([], s.n) });
138 | 
139 | 		// Canonical ordering
140 | 		states.sort(function(s1, s2) {
141 | 			var a = s1.chars.length;
142 | 			var b = s2.chars.length;
143 | 			for( i in 0...(a < b?a:b) ) {
144 | 				var a = s1.chars[i];
145 | 				var b = s2.chars[i];
146 | 				if( a.min != b.min )
147 | 					return b.min - a.min;
148 | 				if( a.max != b.max )
149 | 					return b.max - a.max;
150 | 			}
151 | 			if( a < b )
152 | 				return b - a;
153 | 			return 0;
154 | 		});
155 | 		return states;
156 | 	}
157 | 
158 | 	function addNode( nodes : Array<Node>, n : Node ) {
159 | 		for( n2 in nodes )
160 | 			if( n == n2 )
161 | 				return;
162 | 		nodes.push(n);
163 | 		addNodes(nodes, n.epsilon);
164 | 	}
165 | 
166 | 	function addNodes( nodes : Array<Node>, add : Array<Node> ) {
167 | 		for( n in add  )
168 | 			addNode(nodes, n);
169 | 		return nodes;
170 | 	}
171 | 
172 | 	inline function node(pid) {
173 | 		return new Node(uid++, pid);
174 | 	}
175 | 
176 | 	function initNode( p : Pattern, finalId : Node, pid : Int ) {
177 | 		return switch( p ) {
178 | 		case Empty:
179 | 			finalId;
180 | 		case Match(c):
181 | 			var n = node(pid);
182 | 			n.trans.push({ chars : c, n : finalId });
183 | 			n;
184 | 		case Star(p):
185 | 			var n = node(pid);
186 | 			var an = initNode(p,n,pid);
187 | 			n.epsilon.push(an);
188 | 			n.epsilon.push(finalId);
189 | 			n;
190 | 		case Plus(p):
191 | 			var n = node(pid);
192 | 			var an = initNode(p,n,pid);
193 | 			n.epsilon.push(an);
194 | 			n.epsilon.push(finalId);
195 | 			an;
196 | 		case Next(a,b):
197 | 			initNode(a, initNode(b, finalId,pid),pid);
198 | 		case Choice(a,b):
199 | 			var n = node(pid);
200 | 			n.epsilon.push(initNode(a,finalId,pid));
201 | 			n.epsilon.push(initNode(b,finalId,pid));
202 | 			n;
203 | 		case Group(p):
204 | 			initNode(p, finalId, pid);
205 | 		}
206 | 	}
207 | 
208 | 	// ----------------------- PATTERN PARSING ---------------------------
209 | 
210 | 	static inline var MAX_CODE = 255;
211 | 	static var EMPTY:Charset = [];
212 | 	static var ALL_CHARS = [ new CharRange( 0, MAX_CODE ) ];
213 | 
214 | 	static inline function single( c : Int ) : Charset {
215 | 		return [ { min : c, max : c } ];
216 | 	}
217 | 
218 | 	/**
219 | 		Parses the `pattern` `String` and returns an instance of `Pattern`.
220 | 
221 | 		If `pattern` is not a valid pattern string, an exception of `String` is
222 | 		thrown.
223 | 
224 | 		The following meta characters are supported:
225 | 
226 | 			- `*`: zero or more
227 | 			- `+`: one or more
228 | 			- `?`: zero or one
229 | 			- `|`: or
230 | 			- `[`: begin char range
231 | 			- `]`: end char range
232 | 			- `(`: begin group
233 | 			- `)`: end group
234 | 			- `\`: escape next char
235 | 
236 | 		These characters must be escaped if they are part of the pattern, by
237 | 		using `\\*`, `\\]` etc.
238 | 	**/
239 | 	public static function parse( pattern : String ) : Pattern {
240 | 		var p = parseInner(byte.ByteData.ofString(pattern));
241 | 		if( p == null ) throw "Invalid pattern '" + pattern + "'";
242 | 		return p.pattern;
243 | 	}
244 | 
245 | 	static function next( a, b ) {
246 | 		return a == Empty ? b : Next(a, b);
247 | 	}
248 | 
249 | 	static function plus(r) {
250 | 		return switch( r ) {
251 | 		case Next(r1, r2): Next(r1, plus(r2));
252 | 		default: Plus(r);
253 | 		}
254 | 	}
255 | 
256 | 	static function star(r) {
257 | 		return switch( r ) {
258 | 		case Next(r1, r2): Next(r1, star(r2));
259 | 		default: Star(r);
260 | 		}
261 | 	}
262 | 
263 | 	static function opt(r) {
264 | 		return switch( r ) {
265 | 		case Next(r1, r2): Next(r1, opt(r2));
266 | 		default: Choice(r, Empty);
267 | 		}
268 | 	}
269 | 
270 | 	static function cinter(c1,c2) {
271 | 		return ccomplement(cunion(ccomplement(c1), ccomplement(c2)));
272 | 	}
273 | 
274 | 	static function cdiff(c1,c2) {
275 | 		return ccomplement(cunion(ccomplement(c1), c2));
276 | 	}
277 | 
278 | 	static function ccomplement( c : Charset ) {
279 | 		var first = c[0];
280 | 		var start = first != null && first.min == -1 ? c.shift().max + 1 : -1;
281 | 		var out: Charset = [];
282 | 		for( k in c ) {
283 | 			out.push( { min : start, max : k.min - 1 } );
284 | 			start = k.max + 1;
285 | 		}
286 | 		if( start <= MAX_CODE )
287 | 			out.push( { min : start, max : MAX_CODE } );
288 | 		return out;
289 | 	}
290 | 
291 | 	static function cunion( ca : Charset, cb : Charset ) {
292 | 		var i = 0, j = 0;
293 | 		var out = [];
294 | 		var a = ca[i++], b = cb[j++];
295 | 		while( true ) {
296 | 			if( a == null ) {
297 | 				out.push(b);
298 | 				while( j < cb.length )
299 | 					out.push(cb[j++]);
300 | 				break;
301 | 			}
302 | 			if( b == null ) {
303 | 				out.push(a);
304 | 				while( i < ca.length )
305 | 					out.push(ca[i++]);
306 | 				break;
307 | 			}
308 | 			if( a.min <= b.min ) {
309 | 				if( a.max + 1 < b.min ) {
310 | 					out.push(a);
311 | 					a = ca[i++];
312 | 				} else if( a.max < b.max ) {
313 | 					b = { min : a.min, max : b.max };
314 | 					a = ca[i++];
315 | 				} else
316 | 					b = cb[j++];
317 | 			} else {
318 | 				// swap
319 | 				var tmp = ca;
320 | 				ca = cb;
321 | 				cb = tmp;
322 | 				var tmp = j;
323 | 				j = i;
324 | 				i = tmp;
325 | 				var tmp = a;
326 | 				a = b;
327 | 				b = tmp;
328 | 			}
329 | 		}
330 | 		return out;
331 | 	}
332 | 
333 | 	static function parseInner( pattern : byte.ByteData, i : Int = 0, pDepth : Int = 0 ) : { pattern: Pattern, pos: Int } {
334 | 		function readChar() {
335 | 			var c = pattern.readByte(i++);
336 | 			if ( StringTools.isEof(c) ) {
337 | 				c = '\\'.code;
338 | 			} else if (c == "x".code) {
339 | 				c = Std.parseInt("0x" + pattern.readString(i, 2));
340 | 				i += 2;
341 | 			} else if (c >= "0".code && c <= "9".code) {
342 | 				var v = c - 48;
343 | 				while(true) {
344 | 					var cNext = pattern.readByte(i);
345 | 					if (cNext >= "0".code && cNext <= "9".code) {
346 | 						v = v * 10 + (cNext - 48);
347 | 						++i;
348 | 					} else {
349 | 						break;
350 | 					}
351 | 				}
352 | 				c = v;
353 | 			}
354 | 			return c;
355 | 		}
356 | 
357 | 		var r = Empty;
358 | 		var l = pattern.length;
359 | 		while( i < l ) {
360 | 			var c = pattern.readByte(i++);
361 | 			if (c > 255) throw c;
362 | 			switch( c ) {
363 | 			case '+'.code if (r != Empty):
364 | 				r = plus(r);
365 | 			case '*'.code if (r != Empty):
366 | 				r = star(r);
367 | 			case '?'.code if (r != Empty):
368 | 				r = opt(r);
369 | 			case '|'.code if (r != Empty):
370 | 				var r2 = parseInner(pattern, i);
371 | 				return {pattern: Choice(r, r2.pattern), pos: r2.pos};
372 | 			case '.'.code:
373 | 				r = next(r, Match(ALL_CHARS));
374 | 			case '('.code:
375 | 				var r2 = parseInner(pattern, i, pDepth + 1);
376 | 				i = r2.pos;
377 | 				r = next(r, r2.pattern);
378 | 			case ')'.code:
379 | 				if (r == Empty) throw "Empty group";
380 | 				return { pattern: Group(r), pos: i};
381 | 			case '['.code if (pattern.length > 1):
382 | 				var range = 0;
383 | 				var acc:Charset = [];
384 | 				var not = pattern.readByte(i) == '^'.code;
385 | 				if( not ) i++;
386 | 				while( true ) {
387 | 					var c = pattern.readByte(i++);
388 | 					if( c == ']'.code ) {
389 | 						if( range != 0 ) return null;
390 | 						break;
391 | 					} else if( c == '-'.code ) {
392 | 						if( range != 0 ) return null;
393 | 						var last = acc.pop();
394 | 						if( last == null )
395 | 							acc.push( { min : c, max : c } );
396 | 						else {
397 | 							if( last.min != last.max ) return null;
398 | 							range = last.min;
399 | 						}
400 | 					} else {
401 | 						if( c == '\\'.code ) {
402 | 							c = readChar();
403 | 						}
404 | 						if( range == 0 )
405 | 							acc.push( { min : c, max : c } );
406 | 						else {
407 | 							acc.push( { min : range, max : c } );
408 | 							range = 0;
409 | 						}
410 | 					}
411 | 				}
412 | 				var g:Charset = [];
413 | 				for( k in acc )
414 | 					g = cunion(g, [k]);
415 | 				if( not )
416 | 					g = cdiff(ALL_CHARS, g);
417 | 				r = next(r, Match(g));
418 | 			case '\\'.code:
419 | 				c = readChar();
420 | 				r = next(r, Match(single(c)));
421 | 			default:
422 | 				r = next(r, Match(single(c)));
423 | 			}
424 | 		}
425 | 		if (pDepth != 0) throw 'Found unclosed parenthesis while parsing "$pattern"';
426 | 		return {pattern:r, pos: i};
427 | 	}
428 | }
429 | 
430 | private enum Pattern {
431 | 	Empty;
432 | 	Match( c : Charset );
433 | 	Star( p : Pattern );
434 | 	Plus( p : Pattern );
435 | 	Next( p1 : Pattern, p2 : Pattern );
436 | 	Choice( p1 : Pattern, p2 : Pattern );
437 | 	Group ( p : Pattern );
438 | }
439 | 
440 | @:structInit private class CharRange {
441 |    public var min:Int;
442 |    public var max:Int;
443 |    public function new(min,max) {
444 |       this.min = min;
445 |       this.max = max;
446 |    }
447 | }
448 | private typedef Charset = Array<CharRange>;
449 | 
450 | private class Node {
451 | 	public var id : Int;
452 | 	public var pid : Int;
453 | 	public var trans : Array<{ chars : Charset, n : Node }>;
454 | 	public var epsilon : Array<Node>;
455 | 	public function new(id, pid) {
456 | 		this.id = id;
457 | 		this.pid = pid;
458 | 		trans = [];
459 | 		epsilon = [];
460 | 	}
461 | }
462 | 
463 | private class Transition {
464 | 	public var chars : Charset;
465 | 	public function new(chars) {
466 | 		this.chars = chars;
467 | 	}
468 | 	public function toString() {
469 | 		return Std.string(chars);
470 | 	}
471 | }
472 | 


--------------------------------------------------------------------------------
/src/hxparse/Lexer.hx:
--------------------------------------------------------------------------------
  1 | package hxparse;
  2 | 
  3 | /**
  4 | 	Lexer matches a sequence of characters against a set of rule patterns.
  5 | 
  6 | 	An instance of Lexer is created once for each input and maintains state
  7 | 	for that input. Tokens can then be obtained by calling the `token` method,
  8 | 	passing an instance of `Ruleset`.
  9 | 
 10 | 	Rule sets can be created manually, or by calling the static `buildRuleset`
 11 | 	method.
 12 | **/
 13 | class Lexer {
 14 | 
 15 | 	/**
 16 | 		The `String` that was matched by the most recent invocation of the
 17 | 		`token` method.
 18 | 	**/
 19 | 	public var current(default, null):String;
 20 | 
 21 | 	var input:byte.ByteData;
 22 | 	var source:String;
 23 | 	var pos:Int;
 24 | 
 25 | 	/**
 26 | 		Creates a new Lexer for `input`.
 27 | 
 28 | 		If `sourceName` is provided, it is used in error messages to denote
 29 | 		the position of an error.
 30 | 
 31 | 		If `input` is null, the result is unspecified.
 32 | 	**/
 33 | 	public function new(input:byte.ByteData, sourceName:String = "<null>") {
 34 | 		current = "";
 35 | 		this.input = input;
 36 | 		source = sourceName;
 37 | 		pos = 0;
 38 | 	}
 39 | 
 40 | 	/**
 41 | 		Returns the current position of `this` Lexer.
 42 | 	**/
 43 | 	public inline function curPos():Position {
 44 | 		return new Position(source, pos - current.length, pos);
 45 | 	}
 46 | 
 47 | 	/**
 48 | 		Returns the next token according to `ruleset`.
 49 | 
 50 | 		This method starts with `ruleset.state` and reads characters from `this`
 51 | 		input until no further state transitions are possible. It always returns
 52 | 		the longest match.
 53 | 
 54 | 		If a character is read which has no transition defined, an
 55 | 		`UnexpectedChar` exception is thrown.
 56 | 
 57 | 		If the input is in the end of file state upon method invocation,
 58 | 		`ruleset.eofFunction` is called with `this` Lexer as argument. If
 59 | 		`ruleset` defines no `eofFunction` field, a `haxe.io.Eof` exception
 60 | 		is thrown.
 61 | 
 62 | 		If `ruleset` is null, the result is unspecified.
 63 | 	**/
 64 | 	public function token<T>(ruleset:Ruleset<T>):T {
 65 | 		if (pos == input.length) {
 66 | 			if (ruleset.eofFunction != null) return ruleset.eofFunction(this);
 67 | 			else throw new haxe.io.Eof();
 68 | 		}
 69 | 		var state = ruleset.state;
 70 | 		var lastMatch = null;
 71 | 		var lastMatchPos = pos;
 72 | 		var start = pos;
 73 | 
 74 | 		#if expose_lexer_state
 75 | 		stateCallback(state, pos, -1);
 76 | 		#end
 77 | 
 78 | 		while(true) {
 79 | 			if (state.finalId > -1) {
 80 | 				lastMatch = state;
 81 | 				lastMatchPos = pos;
 82 | 			}
 83 | 			if (pos == input.length) {
 84 | 				break;
 85 | 			}
 86 | 			var i = input.readByte(pos);
 87 | 			++pos;
 88 | 			state = state.trans.get(i);
 89 | 
 90 | 			#if expose_lexer_state
 91 | 			stateCallback(state, pos-1, i);
 92 | 			#end
 93 | 
 94 | 			if (state == null)
 95 | 				break;
 96 | 		}
 97 | 		pos = lastMatchPos;
 98 | 		current = input.readString(start, pos - start);
 99 | 		if (lastMatch == null || lastMatch.finalId == -1)
100 | 			throw new UnexpectedChar(String.fromCharCode(input.readByte(pos)), curPos());
101 | 		return ruleset.functions[lastMatch.finalId](this);
102 | 	}
103 | 
104 | 	#if expose_lexer_state
105 | 	/**
106 | 
107 | 		@param	state	`null` if it's the last state visited
108 | 		@param	position	Position of the byte read
109 | 		@param	input	Transition input byte, `-1` if initial state
110 | 	**/
111 | 	dynamic public function stateCallback(state:State, position:Int, input:Int) {}
112 | 	#end
113 | 
114 | 	/**
115 | 		Builds a `Ruleset` from the given `rules` `Array`.
116 | 
117 | 		For each element of `rules`, its `rule` `String` is parsed into a
118 | 		`Pattern` using `LexEngine.parse`.
119 | 
120 | 		If `rules` is null, the result is unspecified.
121 | 	**/
122 | 	static public function buildRuleset<Token>(rules:Array<{rule:String,func:Lexer->Token}>, name:String = "") {
123 | 		var cases = [];
124 | 		var functions = [];
125 | 		var eofFunction = null;
126 | 		for (rule in rules) {
127 | 			if (rule.rule == "") {
128 | 				eofFunction = rule.func;
129 | 			} else {
130 | 				cases.push(LexEngine.parse(rule.rule));
131 | 				functions.push(rule.func);
132 | 			}
133 | 		}
134 | 		return new Ruleset(new LexEngine(cases).firstState(), functions, eofFunction, name);
135 | 	}
136 | }


--------------------------------------------------------------------------------
/src/hxparse/LexerTokenSource.hx:
--------------------------------------------------------------------------------
 1 | package hxparse;
 2 | 
 3 | class LexerTokenSource<Token> {
 4 | 	var lexer:Lexer;
 5 | 	public var ruleset:Ruleset<Token>;
 6 | 
 7 | 	public function new(lexer, ruleset){
 8 | 		this.lexer = lexer;
 9 | 		this.ruleset = ruleset;
10 | 	}
11 | 
12 | 	public function token():Token{
13 | 		return lexer.token(ruleset);
14 | 	}
15 | 
16 | 	public function curPos():Position{
17 | 		return lexer.curPos();
18 | 	}
19 | }


--------------------------------------------------------------------------------
/src/hxparse/NoMatch.hx:
--------------------------------------------------------------------------------
 1 | package hxparse;
 2 | 
 3 | /**
 4 | 	A NoMatch exception is thrown if an outer token matching fails.
 5 | 
 6 | 	Matching can continue because no tokens have been consumed.
 7 | **/
 8 | class NoMatch<T> extends ParserError {
 9 | 
10 | 	/**
11 | 		The token which was encountered and could not be matched.
12 | 	**/
13 | 	public var token(default, null):T;
14 | 
15 | 	/**
16 | 		Creates a new NoMatch exception.
17 | 	**/
18 | 	public function new(pos:hxparse.Position, token:T) {
19 | 		super(pos);
20 | 		this.token = token;
21 | 	}
22 | 
23 | 	override public function toString() {
24 | 		return 'No match: $token';
25 | 	}
26 | }


--------------------------------------------------------------------------------
/src/hxparse/Parser.hx:
--------------------------------------------------------------------------------
  1 | package hxparse;
  2 | 
  3 | /**
  4 | 	Parser is the base class for all custom parsers.
  5 | 
  6 | 	The intended usage is to extend it and utilize its method as an API where
  7 | 	required.
  8 |  */
  9 | @:generic
 10 | class Parser<S:TokenSource<Token>, Token> {
 11 | 
 12 | 	/**
 13 | 		Returns the last matched token.
 14 | 
 15 | 		This is a convenience property for accessing `cache[offset - 1]`.
 16 | 	**/
 17 | 	public var last(default, null):Token;
 18 | 
 19 | 	var stream:S;
 20 | 	var token:haxe.ds.GenericStack.GenericCell<Token>;
 21 | 
 22 | 	/**
 23 | 		Creates a new Parser instance over `TokenSource` `stream`
 24 | 	**/
 25 | 	public function new(stream:S) {
 26 | 		this.stream = stream;
 27 | 	}
 28 | 
 29 | 	/**
 30 | 		Returns the `n`th token without consuming it.
 31 | 	**/
 32 | 	@:dox(show)
 33 | 	#if cs inline #end // Workaround for https://github.com/HaxeFoundation/haxe/issues/3212
 34 | 	function peek(n:Int):Token {
 35 | 		if (token == null) {
 36 | 			token = new haxe.ds.GenericStack.GenericCell<Token>(stream.token(), null);
 37 | 			n--;
 38 | 		}
 39 | 		var tok = token;
 40 | 		while (n > 0) {
 41 | 			if (tok.next == null) tok.next = new haxe.ds.GenericStack.GenericCell<Token>(stream.token(), null);
 42 | 			tok = tok.next;
 43 | 			n--;
 44 | 		}
 45 | 		return tok.elt;
 46 | 	}
 47 | 
 48 | 	/**
 49 | 		Consumes the current token.
 50 | 
 51 | 		This method is automatically called after a successful match.
 52 | 	**/
 53 | 	@:dox(show)
 54 | 	inline function junk() {
 55 | 		last = token.elt;
 56 | 		token = token.next;
 57 | 	}
 58 | 
 59 | 	/**
 60 | 		Returns the current lexer position.
 61 | 	**/
 62 | 	@:dox(show)
 63 | 	public inline function curPos() {
 64 | 		return stream.curPos();
 65 | 	}
 66 | 
 67 | 	/**
 68 | 		Invokes `f` and then `separatorFunc` with the current token until the
 69 | 		result of that call is `false`.
 70 | 
 71 | 		The result is an Array containing the results of all calls to `f`.
 72 | 
 73 | 		A typical use case is parsing function arguments which are separated by
 74 | 		a comma.
 75 | 	**/
 76 | 	@:dox(show)
 77 | 	function parseSeparated<T>(separatorFunc:Token->Bool, f:Void->T):Array<T> {
 78 | 		var acc = [];
 79 | 		while(true) {
 80 | 			try {
 81 | 				acc.push(f());
 82 | 			} catch(e:hxparse.NoMatch<Dynamic>) {
 83 | 				break;
 84 | 			}
 85 | 			if (separatorFunc(peek(0))) {
 86 | 				junk();
 87 | 			} else {
 88 | 				break;
 89 | 			}
 90 | 		}
 91 | 		return acc;
 92 | 	}
 93 | 
 94 | 	/**
 95 | 		Returns the result of calling `f()` if a match is made, or `null`
 96 | 		otherwise.
 97 | 	**/
 98 | 	@:dox(show)
 99 | 	function parseOptional<T>(f:Void->T) {
100 | 		try {
101 | 			return f();
102 | 		} catch(e:hxparse.NoMatch<Dynamic>) {
103 | 			return null;
104 | 		}
105 | 	}
106 | 
107 | 	/**
108 | 		Calls `f` until no match can be made.
109 | 
110 | 		The result is an Array containing the results of all calls to `f`.
111 | 	**/
112 | 	@:dox(show)
113 | 	function parseRepeat<T>(f:Void->T) {
114 | 		var acc = [];
115 | 		while(true) {
116 | 			try {
117 | 				acc.push(f());
118 | 			} catch(e:hxparse.NoMatch<Dynamic>) {
119 | 				return acc;
120 | 			}
121 | 		}
122 | 	}
123 | 	
124 | 	/**
125 | 		Returns the result of calling `f()` if a match is made, or throw
126 | 		`Unexpected` otherwise.
127 | 	**/
128 | 	function parseExpect<T>(f:Void->T) {
129 | 		try {
130 | 			return f();
131 | 		} catch(_:NoMatch<Dynamic>) {
132 | 			unexpected();
133 | 		}
134 | 	}
135 | 	
136 | 	/**
137 | 		Throws `NoMatch` exception, which contains last matched position and token.
138 | 	**/
139 | 	inline function noMatch() {
140 | 		return new NoMatch(stream.curPos(), peek(0));
141 | 	}
142 | 	
143 | 	/**
144 | 		Throws `Unexpected` exception, which contains last matched position and token.
145 | 	**/
146 | 	inline function unexpected():Dynamic {
147 | 		throw new Unexpected(peek(0), stream.curPos());
148 | 	}
149 | 	
150 | 	/**
151 | 		Macro that processes and returns the result of `switch`.
152 | 	**/
153 | 	@:access(hxparse.ParserBuilderImpl.transformSwitch)
154 | 	static public macro function parse(e:haxe.macro.Expr) {
155 | 		switch (e.expr) {
156 | 			case ESwitch(_, cases, edef) | EParenthesis({expr: ESwitch(_, cases, edef)}):
157 | 				return hxparse.ParserBuilderImpl.transformSwitch(cases, edef);
158 | 			case _:
159 | 				return haxe.macro.Context.error("Expected switch expression", e.pos);
160 | 		}
161 | 	}
162 | }
163 | 


--------------------------------------------------------------------------------
/src/hxparse/ParserBuilder.hx:
--------------------------------------------------------------------------------
1 | package hxparse;
2 | 
3 | @:autoBuild(hxparse.ParserBuilderImpl.build())
4 | interface ParserBuilder { }


--------------------------------------------------------------------------------
/src/hxparse/ParserBuilderImpl.macro.hx:
--------------------------------------------------------------------------------
  1 | package hxparse;
  2 | 
  3 | import haxe.macro.Context;
  4 | import haxe.macro.Expr;
  5 | 
  6 | using haxe.macro.Tools;
  7 | using Lambda;
  8 | 
  9 | private typedef ParserCase = {
 10 | 	expr: Expr,
 11 | 	head: Expr,
 12 | 	tail: Array<Expr>
 13 | }
 14 | 
 15 | private enum CaseGroup {
 16 | 	Simple(group:Array<ParserCase>);
 17 | 	Complex(c:ParserCase);
 18 | }
 19 | 
 20 | class ParserBuilderImpl {
 21 | 	static public function build():Array<Field> {
 22 | 		var fields = Context.getBuildFields();
 23 | 		for (field in fields) {
 24 | 			switch(field.kind) {
 25 | 				case FFun(fun) if (fun.expr != null):
 26 | 					fun.expr = map(fun.expr);
 27 | 				case _:
 28 | 			}
 29 | 		}
 30 | 		return fields;
 31 | 	}
 32 | 
 33 | 	static function punion(p1:Position, p2:Position) {
 34 | 		var p1 = Context.getPosInfos(p1);
 35 | 		var p2 = Context.getPosInfos(p2);
 36 | 		return Context.makePosition({
 37 | 			file: p1.file,
 38 | 			min: p1.min < p2.min ? p1.min : p2.min,
 39 | 			max: p1.max > p2.max ? p1.max : p2.max
 40 | 		});
 41 | 	}
 42 | 
 43 | 	static function map(e:Expr) {
 44 | 		return switch(e.expr) {
 45 | 			case ESwitch({expr: EConst(CIdent("stream"))}, cl, edef):
 46 | 				transformSwitch(cl, edef);
 47 | 			case EBlock([]):
 48 | 				e;
 49 | 			case EBlock(el):
 50 | 				var elast = el.pop();
 51 | 				var el = el.map(map);
 52 | 				el.push(map(elast));
 53 | 				macro @:pos(e.pos) $b{el};
 54 | 			case _: e.map(map);
 55 | 		}
 56 | 	}
 57 | 
 58 | 	static function transformSwitch(cl:Array<Case>, edef:Null<Expr>) {
 59 | 		if (edef != null)
 60 | 			cl.push({values: [macro _], expr: edef, guard: null});
 61 | 		return transformCases(cl);
 62 | 	}
 63 | 
 64 | 	static function transformCases(cl:Array<Case>) {
 65 | 		var groups = [];
 66 | 		var group = [];
 67 | 		var def = noMatch;
 68 | 		for (c in cl) {
 69 | 			switch(c.values) {
 70 | 				case [{expr:EArrayDecl(el)}]:
 71 | 					var head = el.shift();
 72 | 					var chead = {head:head, tail: el, expr:c.expr == null ? macro null : map(c.expr)};
 73 | 					switch(head.expr) {
 74 | 						case EBinop(_):
 75 | 							if (group.length > 0) groups.push(Simple(group));
 76 | 							groups.push(Complex(chead));
 77 | 							group = [];
 78 | 						case _:
 79 | 							group.push(chead);
 80 | 					}
 81 | 				case [{expr:EConst(CIdent("_"))}]:
 82 | 					def = c.expr == null ? macro null : map(c.expr);
 83 | 				case [e]:
 84 | 					Context.error("Expected [ patterns ]", e.pos);
 85 | 				case _:
 86 | 					Context.error("Comma notation is not allowed while matching streams", punion(c.values[0].pos, c.values[c.values.length - 1].pos));
 87 | 			}
 88 | 		}
 89 | 		if (group.length > 0)
 90 | 			groups.push(Simple(group));
 91 | 
 92 | 		var last = groups.pop();
 93 | 		var elast = makeCase(last,def);
 94 | 		while (groups.length > 0) {
 95 | 			elast = makeCase(groups.pop(), elast);
 96 | 		}
 97 | 		return elast;
 98 | 	}
 99 | 
100 | 	static var unexpected = macro unexpected();
101 | 	static var noMatch = macro throw noMatch();
102 | 
103 | 	static function makeCase(g:CaseGroup, def:Expr) {
104 | 		return switch(g) {
105 | 			case Simple(group):
106 | 				var cl = group.map(makeInner);
107 | 				cl.iter(function(c) {
108 | 					c.expr = macro @:pos(c.expr.pos) { junk(); ${c.expr}; };
109 | 				});
110 | 				{
111 | 					pos: def.pos,
112 | 					expr: ESwitch(macro peek(0), cl, def)
113 | 				}
114 | 			case Complex(c):
115 | 				var inner = makeInner(c);
116 | 				makePattern(c.head, inner.expr, def);
117 | 		}
118 | 	}
119 | 
120 | 	static function makeInner(c:ParserCase) {
121 | 		var last = c.tail.pop();
122 | 		if (last == null) {
123 | 			return {values:[c.head], guard:null, expr: c.expr};
124 | 		}
125 | 		var elast = makePattern(last, c.expr, unexpected);
126 | 		while (c.tail.length > 0)
127 | 			elast = makePattern(c.tail.pop(), elast, unexpected);
128 | 		return {values: [c.head], guard: null, expr: elast};
129 | 	}
130 | 
131 | 	static function makePattern(pat:Expr, e:Expr, def:Expr) {
132 | 		return switch(pat.expr) {
133 | 			case EBinop(OpAssign, {expr: EConst(CIdent(s))}, e2):
134 | 				if (def == unexpected || def == noMatch) {
135 | 					var e1 = s == "_" ? e2 : macro var $s = $e2;
136 | 					macro {
137 | 						$e1;
138 | 						$e;
139 | 					}
140 | 				} else {
141 | 					buildExtractor(pat, e, e2, s, def);
142 | 				}
143 | 			case EBinop(OpBoolAnd, e1, e2):
144 | 				macro @:pos(pat.pos) {
145 | 					switch peek(0) {
146 | 						case $e1 if ($e2):
147 | 							junk();
148 | 							$e;
149 | 						case _: $def;
150 | 					}
151 | 				}
152 | 			case EBinop(OpBoolOr, e1, e2):
153 | 				makePattern(e1, e, macro throw stream.curPos() + ": " +$e2);
154 | 			case _:
155 | 				macro @:pos(pat.pos) switch peek(0) {
156 | 					case $pat:
157 | 						junk();
158 | 						$e;
159 | 					case _: $def;
160 | 				}
161 | 		}
162 | 	}
163 | 
164 | 	static function buildExtractor(pat, e, e2, s, def) {
165 | 		var e1 = s == "_" ? e2 : macro var $s = $e2;
166 | 		return macro @:pos(pat.pos) {
167 | 			try {
168 | 				$e1;
169 | 				$e;
170 | 			} catch (_:hxparse.NoMatch<Dynamic>) {
171 | 				$def;
172 | 			}
173 | 		}
174 | 	}
175 | }


--------------------------------------------------------------------------------
/src/hxparse/ParserError.hx:
--------------------------------------------------------------------------------
 1 | package hxparse;
 2 | 
 3 | /**
 4 | 	This is the base class of all parser errors.
 5 | **/
 6 | class ParserError {
 7 | 	/**
 8 | 		The position in the input where `this` exception occured.
 9 | 	**/
10 | 	public var pos(default, null):Position;
11 | 
12 | 	public function new(pos:Position) {
13 | 		this.pos = pos;
14 | 	}
15 | 
16 | 	public function toString() {
17 | 		return "Parser error";
18 | 	}
19 | }


--------------------------------------------------------------------------------
/src/hxparse/Position.hx:
--------------------------------------------------------------------------------
  1 | package hxparse;
  2 | 
  3 | /**
  4 | 	The position information maintained by `Lexer`.
  5 | **/
  6 | class Position {
  7 | 	/**
  8 | 		Name of the source.
  9 | 	**/
 10 | 	public var psource : String;
 11 | 
 12 | 	/**
 13 | 		The first character position, counting from the beginning of the input.
 14 | 	**/
 15 | 	public var pmin : Int;
 16 | 
 17 | 	/**
 18 | 		The last character position, counting from the beginning of the input.
 19 | 	**/
 20 | 	public var pmax : Int;
 21 | 
 22 | 	/**
 23 | 		Creates a new `Position` from the given information.
 24 | 	**/
 25 | 	public function new(source, min, max) {
 26 | 		psource = source;
 27 | 		pmin = min;
 28 | 		pmax = max;
 29 | 	}
 30 | 
 31 | 	/**
 32 | 		Returns a readable representation of `this` position;
 33 | 	**/
 34 | 	public function toString() {
 35 | 		return '$psource:characters $pmin-$pmax';
 36 | 	}
 37 | 
 38 | 	public function getLinePosition(input:byte.ByteData) {
 39 | 		var lineMin = 1;
 40 | 		var lineMax = 1;
 41 | 		var posMin = 0;
 42 | 		var posMax = 0;
 43 | 		var cur = 0;
 44 | 		while (cur < pmin) {
 45 | 			if (input.readByte(cur) == "\n".code) {
 46 | 				lineMin++;
 47 | 				posMin = cur + 1;
 48 | 			}
 49 | 			cur++;
 50 | 		}
 51 | 		lineMax = lineMin;
 52 | 		posMax = posMin;
 53 | 		posMin = cur - posMin;
 54 | 		while (cur < pmax) {
 55 | 			if (input.readByte(cur) == "\n".code) {
 56 | 				lineMax++;
 57 | 				posMax = cur + 1;
 58 | 			}
 59 | 			cur++;
 60 | 		}
 61 | 		posMax = cur - posMax;
 62 | 		return {
 63 | 			lineMin: lineMin,
 64 | 			lineMax: lineMax,
 65 | 			posMin: posMin,
 66 | 			posMax: posMax
 67 | 		}
 68 | 	}
 69 | 
 70 | 	/**
 71 | 		Formats `this` position by resolving line numbers within `input`.
 72 | 
 73 | 		If `input` is null, the result is unspecified.
 74 | 	**/
 75 | 	public function format(input:byte.ByteData) {
 76 | 		var linePos = getLinePosition(input);
 77 | 		if (linePos.lineMin != linePos.lineMax) {
 78 | 			return '${psource}:lines ${linePos.lineMin}-${linePos.lineMax}';
 79 | 		} else {
 80 | 			return '${psource}:${linePos.lineMin}: characters ${linePos.posMin}-${linePos.posMax}';
 81 | 		}
 82 | 	}
 83 | 
 84 | 	/**
 85 | 		Unifies two positions `p1` and `p2`, using the minimum `pmin` and
 86 | 		maximum `pmax` of both.
 87 | 
 88 | 		The resulting `psource` and `pline` are taken from `p1`.
 89 | 
 90 | 		If `p1` or `p2` are null, the result is unspecified.
 91 | 	**/
 92 | 	static public function union(p1:Position, p2:Position) {
 93 | 		return new Position(p1.psource, p1.pmin < p2.pmin ? p1.pmin : p2.pmin, p1.pmax > p2.pmax ? p1.pmax : p2.pmax);
 94 | 	}
 95 | }
 96 | 
 97 | private typedef Position2 = {
 98 | 	lineMin: Int,
 99 | 	lineMax: Int,
100 | 	posMin: Int,
101 | 	posMax: Int
102 | }
103 | 


--------------------------------------------------------------------------------
/src/hxparse/RuleBuilder.hx:
--------------------------------------------------------------------------------
  1 | package hxparse;
  2 | 
  3 | import haxe.macro.Context;
  4 | import haxe.macro.Expr;
  5 | 
  6 | using Lambda;
  7 | using haxe.macro.Tools;
  8 | 
  9 | /**
 10 | 	The RuleBuilder interfaces provides syntactic shortcuts for writing lexer
 11 | 	rules.
 12 | **/
 13 | #if !macro
 14 | @:autoBuild(hxparse.RuleBuilderImpl.build())
 15 | #end
 16 | interface RuleBuilder { }
 17 | 
 18 | class RuleBuilderImpl {
 19 | 	macro static public function build():Array<Field> {
 20 | 		var fields = Context.getBuildFields();
 21 | 		var fieldExprs = new Map();
 22 | 		var delays = [];
 23 | 		var ret = [];
 24 | 		var rules = [];
 25 | 		for (field in fields) {
 26 | 			if (field.access.exists(function(a) return a == AStatic))
 27 | 				switch(field.kind) {
 28 | 					case FVar(t, e) if (e != null):
 29 | 						switch(e.expr) {
 30 | 							case EMeta({name: ":rule"}, e):
 31 | 								rules.push(field.name);
 32 | 								delays.push(transformRule.bind(field, e, t, fieldExprs));
 33 | 							case EMeta({name: ":mapping", params: args}, e):
 34 | 								var offset = switch(args) {
 35 | 									case [{expr: EConst(CInt(i))}]: Std.parseInt(i);
 36 | 									case _: 0;
 37 | 								}
 38 | 								delays.push(transformMapping.bind(field, e, offset));
 39 | 							case _:
 40 | 								fieldExprs.set(field.name, e);
 41 | 						}
 42 | 					case _:
 43 | 				}
 44 | 			if (!field.meta.exists(function(m) return m.name == ":ruleHelper")) {
 45 | 				ret.push(field);
 46 | 			}
 47 | 		}
 48 | 		for (delay in delays)
 49 | 			delay();
 50 | 		var ruleIdents = [for (rv in rules) macro $i{rv}];
 51 | 		ret.push( {
 52 | 			name: "generatedRulesets",
 53 | 			access: [APublic, AStatic],
 54 | 			kind: FVar(TPath({
 55 | 				name: "Array",
 56 | 				pack: [],
 57 | 				params: [TPType(TPath({
 58 | 					name: "Ruleset",
 59 | 					pack: ["hxparse"],
 60 | 					params: [TPType(TPath( {
 61 | 						name: "Dynamic",
 62 | 						pack: []
 63 | 					}))]
 64 | 				}))]
 65 | 			}), macro $a{ruleIdents}),
 66 | 			pos: Context.currentPos()
 67 | 		});
 68 | 		return ret;
 69 | 	}
 70 | 
 71 | 	#if macro
 72 | 
 73 | 	#if unifill
 74 | 
 75 | 	static function handleUnicode(s:String, p:Position) {
 76 | 		function getPosInfo(i, l) {
 77 | 			var p = Context.getPosInfos(p);
 78 | 			return Context.makePosition({
 79 | 				min: p.min + i,
 80 | 				max: p.min + i + l,
 81 | 				file: p.file
 82 | 			});
 83 | 		}
 84 | 		var uLength = unifill.Unifill.uLength(s);
 85 | 		if (uLength == s.length) {
 86 | 			return s;
 87 | 		}
 88 | 		var buf = new StringBuf();
 89 | 		var itr = new unifill.InternalEncodingIter(s, 0, s.length);
 90 | 		while (itr.hasNext()) {
 91 | 			var i = itr.next();
 92 | 			var c = unifill.InternalEncoding.charAt(s, i);
 93 | 			switch (c) {
 94 | 				case '[':
 95 | 					buf.add("(");
 96 | 					var first = true;
 97 | 					while(true) {
 98 | 						if (!itr.hasNext()) {
 99 | 							Context.error("Unterminated regular expression", getPosInfo(itr.index, 1));
100 | 						}
101 | 						var i = itr.next();
102 | 						var c = unifill.InternalEncoding.charAt(s, i);
103 | 						switch (c) {
104 | 							case "]":
105 | 								break;
106 | 							case "^" if (first):
107 | 								var p = unifill.InternalEncoding.codePointCount(s, 0, i);
108 | 								Context.error("Not-ranges are not supported in unicode strings", getPosInfo(i, 1));
109 | 							case _:
110 | 								if (!first) {
111 | 									buf.add("|");
112 | 								}
113 | 								buf.add("(");
114 | 								if (!itr.hasNext()) {
115 | 									Context.error("Unterminated regular expression", getPosInfo(itr.index, 1));
116 | 								}
117 | 								var w = unifill.InternalEncoding.codePointWidthAt(s, i);
118 | 								if (unifill.InternalEncoding.charAt(s, i + w) == "-") {
119 | 									itr.next();
120 | 									if (!itr.hasNext()) {
121 | 										Context.error("Unterminated regular expression", getPosInfo(itr.index, 1));
122 | 									}
123 | 									var k = itr.next();
124 | 									var cNext = unifill.InternalEncoding.charAt(s, k);
125 | 									if (unifill.InternalEncoding.codePointAt(c, 0) > 0x7F) {
126 | 										Context.error("Unicode ranges are not supported", getPosInfo(i, 3));
127 | 									} else {
128 | 										buf.add("[");
129 | 										buf.add(c);
130 | 										buf.add("-");
131 | 										buf.add(cNext);
132 | 										buf.add("]");
133 | 									}
134 | 								} else {
135 | 									buf.add(c);
136 | 								}
137 | 								buf.add(")");
138 | 						}
139 | 						first = false;
140 | 					}
141 | 					buf.add(")");
142 | 				case _:
143 | 					buf.add(c);
144 | 			}
145 | 		}
146 | 		return buf.toString();
147 | 	}
148 | 
149 | 	#end
150 | 
151 | 	static function makeRule(fields:Map<String,Expr>, rule:Expr):String {
152 | 		return switch(rule) {
153 | 			case macro $v{(s:String)}: #if unifill handleUnicode(s, rule.pos) #else s #end;
154 | 			case macro $i{i}: makeRule(fields, fields.get(i));
155 | 			case macro $e1 + $e2: "(" + makeRule(fields, e1) +")(" + makeRule(fields, e2) +")";
156 | 			case {expr:EConst(CRegexp(r, opt))}:
157 | 				if (opt != "") {
158 | 					Context.error("Cannot use regular expression flags for lexer rules", rule.pos);
159 | 				}
160 | 				r;
161 | 			case _: Context.error("Invalid rule", rule.pos);
162 | 		}
163 | 	}
164 | 
165 | 	static function transformRule(field:Field, e:Expr, t:ComplexType, fields:Map<String,Expr>) {
166 | 		var el = switch(e.expr) {
167 | 			case EArrayDecl(el): el;
168 | 			case _: Context.error("Expected pattern => function map declaration", e.pos);
169 | 		}
170 | 		var el = el.map(function(e) {
171 | 			function loop(e:Expr) {
172 | 				return switch(e.expr) {
173 | 					case EBinop(OpArrow, rule, e):
174 | 						macro @:pos(e.pos) {rule:$v{makeRule(fields, rule)}, func:function(lexer:hxparse.Lexer):$t return $e};
175 | 					case EConst(CIdent(s)) if (fields.exists(s)):
176 | 						loop(fields.get(s));
177 | 					case _:
178 | 						Context.error("Expected pattern => function", e.pos);
179 | 				}
180 | 			}
181 | 			return loop(e);
182 | 		});
183 | 		var e = macro $a{el};
184 | 		var e = macro hxparse.Lexer.buildRuleset($e, $v{field.name});
185 | 		field.kind = FVar(null, e);
186 | 		return e;
187 | 	}
188 | 
189 | 	static function transformMapping(field:Field, e:Expr, offset:Int) {
190 | 		var t = Context.typeof(e).follow();
191 | 		var sl = [];
192 | 		switch(t) {
193 | 			case TAnonymous(_.get() => {status: AEnumStatics(_.get() => e)}):
194 | 				for (f in e.names) {
195 | 					var name = macro @:pos(e.pos) $i{f};
196 | 					var cName = f.charAt(offset).toLowerCase() + f.substr(offset + 1);
197 | 					sl.push(macro $v{cName} => $name);
198 | 				}
199 | 			case _:
200 | 				Context.error("Invalid mapping type", e.pos);
201 | 		}
202 | 		var e = macro $a{sl};
203 | 		field.kind = FVar(null, e);
204 | 		return e;
205 | 	}
206 | 
207 | 	#end
208 | }


--------------------------------------------------------------------------------
/src/hxparse/Ruleset.hx:
--------------------------------------------------------------------------------
 1 | package hxparse;
 2 | 
 3 | /**
 4 | 	A Ruleset wraps an input state and the semantic callback functions for the
 5 | 	`Lexer`.
 6 | **/
 7 | class Ruleset<Token> {
 8 | 
 9 | 	/**
10 | 		The initial state.
11 | 	**/
12 | 	public var state:State;
13 | 
14 | 	/**
15 | 		The semantic functions.
16 | 	**/
17 | 	public var functions:Array<Lexer->Token>;
18 | 
19 | 	/**
20 | 		The callback function for when end of file state is reached.
21 | 	**/
22 | 	public var eofFunction:Lexer->Token;
23 | 
24 | 	/**
25 | 		Informative name for the state, if any. Generated automatically from field name by RuleBuilder if @:rule is used.
26 | 	**/
27 | 	public var name:String;
28 | 
29 | 	/**
30 | 		Creates a new Ruleset.
31 | 	**/
32 | 	public function new(state, functions, eofFunction, name = "") {
33 | 		this.state = state;
34 | 		this.functions = functions;
35 | 		this.eofFunction = eofFunction;
36 | 		this.name = name;
37 | 	}
38 | }


--------------------------------------------------------------------------------
/src/hxparse/State.hx:
--------------------------------------------------------------------------------
 1 | package hxparse;
 2 | 
 3 | /**
 4 | 	Represents a state in the state machine generated by the `LexEngine`.
 5 | **/
 6 | class State {
 7 | 	/**
 8 | 		The transition vector, where the index corresponds to a char code.
 9 | 	**/
10 | 	public var trans:haxe.ds.Vector<State>;
11 | 
12 | 	/**
13 | 		The ids of the final states.
14 | 	**/
15 | 	public var finalId:Int;
16 | 
17 | 	/**
18 | 		Creates a new State.
19 | 	**/
20 | 	public function new() {
21 | 		finalId = -1;
22 | 		trans = new haxe.ds.Vector(256);
23 | 	}
24 | }


--------------------------------------------------------------------------------
/src/hxparse/TokenSource.hx:
--------------------------------------------------------------------------------
 1 | package hxparse;
 2 | 
 3 | /**
 4 | 	Defines the structure of a type usable as input for a `Parser`.
 5 | **/
 6 | typedef TokenSource<Token> = {
 7 | 
 8 | 	/**
 9 | 		Returns the next token
10 | 	**/
11 | 	function token():Token;
12 | 
13 | 	/**
14 | 		Returns the current `Position` of `this` TokenSource.
15 | 	**/
16 | 	function curPos():Position;
17 | }


--------------------------------------------------------------------------------
/src/hxparse/Unexpected.hx:
--------------------------------------------------------------------------------
 1 | package hxparse;
 2 | 
 3 | /**
 4 | 	Unexpected is thrown by `Parser.serror`, which is invoked when an inner
 5 | 	token matching fails.
 6 | 
 7 | 	Unlike `NoMatch`, this exception denotes that the stream is in an
 8 | 	irrecoverable state because tokens have been consumed.
 9 | **/
10 | class Unexpected<Token> extends ParserError {
11 | 
12 | 	/**
13 | 		The token which was found.
14 | 	**/
15 | 	public var token:Token;
16 | 
17 | 	/**
18 | 		Creates a new instance of Unexpected.
19 | 	**/
20 | 	public function new(token:Token, pos) {
21 | 		super(pos);
22 | 		this.token = token;
23 | 	}
24 | 
25 | 	/**
26 | 		Returns a readable representation of `this` exception.
27 | 	**/
28 | 	override public function toString() {
29 | 		return 'Unexpected $token';
30 | 	}
31 | }


--------------------------------------------------------------------------------
/src/hxparse/UnexpectedChar.hx:
--------------------------------------------------------------------------------
 1 | package hxparse;
 2 | 
 3 | /**
 4 | 	UnexpectedChar is thrown by `Lexer.token` if it encounters a character for
 5 | 	which no state transition is defined.
 6 | **/
 7 | class UnexpectedChar extends ParserError {
 8 | 
 9 | 	/**
10 | 		The character which caused `this` exception.
11 | 	**/
12 | 	public var char:String;
13 | 
14 | 	/**
15 | 		Creates a new instance of UnexpectedChar.
16 | 	**/
17 | 	public function new(char, pos) {
18 | 		super(pos);
19 | 		this.char = char;
20 | 	}
21 | 
22 | 	/**
23 | 		Returns a readable representation of `this` exception.
24 | 	**/
25 | 	override public function toString() {
26 | 		return 'Unexpected $char';
27 | 	}
28 | }


--------------------------------------------------------------------------------
/src/hxparse/Utils.hx:
--------------------------------------------------------------------------------
 1 | package hxparse;
 2 | 
 3 | import hxparse.Unexpected;
 4 | import hxparse.UnexpectedChar;
 5 | import hxparse.NoMatch;
 6 | 
 7 | /**
 8 | 	This class provides some static utility methods.
 9 | **/
10 | class Utils {
11 | 
12 | 	/**
13 | 		Tries to invoke `f` and return its value, while catching the lexer and
14 | 		parser exceptions `hxparse.NoMatch`, `hxparse.Unexpected` and
15 | 		`hxparse.UnexpectedChar`.
16 | 
17 | 		If no exception occurs, the result of `f` is returned.
18 | 
19 | 		Otherwise the caught exception is rethrown as `String` in a human-
20 | 		readable representation and with positions formatted within `input`.
21 | 
22 | 		If `input` or `f` are null, the result is unspecified.
23 | 	**/
24 | 	static public function catchErrors<T>(input:byte.ByteData, f:Void->T) {
25 | 		try {
26 | 			return f();
27 | 		} catch(e:ParserError) {
28 | 			throw e.pos.format(input) + ": " + e.toString();
29 | 		}
30 | 	}
31 | }


--------------------------------------------------------------------------------
/src/hxparse/debug/LexerGraph.hx:
--------------------------------------------------------------------------------
  1 | package hxparse.debug;
  2 | 
  3 | #if !hxdotgraph
  4 | #error "Using this class requires -lib hxdotgraph"
  5 | #end
  6 | 
  7 | import hxparse.Ruleset;
  8 | import hxparse.State;
  9 | import dot.Graph;
 10 | import dot.Node;
 11 | import dot.Attribute;
 12 | using Lambda;
 13 | 
 14 | class LexerGraph<T> {
 15 | 	
 16 | 	static public function printRuleset<T>(ruleset:Ruleset<T>):String {
 17 | 		var lexerGraph = new LexerGraph(ruleset);
 18 | 		return lexerGraph.graph.getDotCode();
 19 | 	}
 20 | 	
 21 | 	var graph:Graph;
 22 | 	var ruleset:Ruleset<T>;
 23 | 	var map:Map<State, Node>;
 24 | 	
 25 | 	function new(ruleset:Ruleset<T>) {
 26 | 		this.ruleset = ruleset;
 27 | 		this.graph = new Graph([RankDir(Lr)], true);
 28 | 		map = new Map();
 29 | 		processState(ruleset.state);
 30 | 	}
 31 | 	
 32 | 	function processState(state:State) {
 33 | 		if (map.exists(state)) {
 34 | 			return map[state];
 35 | 		}
 36 | 		var attrs = [Label("")];
 37 | 		if (state.finalId > -1) {
 38 | 			attrs.push(Shape(Doublecircle));
 39 | 		}
 40 | 		
 41 | 		var node = graph.node(attrs);
 42 | 		map[state] = node;
 43 | 		
 44 | 		var targets = new Map();
 45 | 		for (i in 0...256) {
 46 | 			if (state.trans[i] == null) {
 47 | 				continue;
 48 | 			}
 49 | 			var target = state.trans[i];
 50 | 			if (!targets.exists(target)) {
 51 | 				targets[target] = [i];
 52 | 			} else {
 53 | 				targets[target].push(i);
 54 | 			}
 55 | 		}
 56 | 		
 57 | 		for (target in targets.keys()) {
 58 | 			var il = targets[target];
 59 | 			var targetNode = processState(target);
 60 | 			var edgeLabel = getRangeString(il);
 61 | 			graph.edge(node, targetNode, [Label(edgeLabel)]);
 62 | 		}
 63 | 		
 64 | 		return node;
 65 | 	}
 66 | 	
 67 | 	function getRangeString(il:Array<Int>) {
 68 | 		if (il.length > 240) {
 69 | 			return "[^" + getRangeString(complementOf(il)) + "]";
 70 | 		} else if (il.length == 1) {
 71 | 			return printCode(il[0]);
 72 | 		}
 73 | 		
 74 | 		var ranges = [];
 75 | 		var i = 0;
 76 | 		var last = -1;
 77 | 		var start = -1;
 78 | 		function addRange() {
 79 | 			if (start == last) {
 80 | 				ranges.push(printCode(start));
 81 | 			} else {
 82 | 				ranges.push(printCode(start) + "-" +printCode(last));
 83 | 			}
 84 | 		}
 85 | 		while (i < il.length) {
 86 | 			var cur = il[i];
 87 | 			if (start == -1) {
 88 | 				start = cur;
 89 | 				++i;
 90 | 			} else if (cur != last + 1) {
 91 | 				addRange();
 92 | 				start = -1;
 93 | 			} else {
 94 | 				++i;
 95 | 			}
 96 | 			last = cur;
 97 | 		}
 98 | 		if (start != -1) {
 99 | 			addRange();
100 | 		}
101 | 		return ranges.join(" ");
102 | 	}
103 | 	
104 | 	function printCode(i:Int) {
105 | 		if (i >= 32 && i <= 0x7F) {
106 | 			return switch (i) {
107 | 				case '"'.code: '\\"';
108 | 				case '\\'.code: '\\\\';
109 | 				case ' '.code: "' '";
110 | 				case _: String.fromCharCode(i);
111 | 			}
112 | 		} else {
113 | 			return "\\\\" +i;
114 | 		}
115 | 	}
116 | 	
117 | 	function complementOf(il:Array<Int>) {
118 | 		var ret = [];
119 | 		for (i in 0...256) {
120 | 			if (!il.has(i)) {
121 | 				ret.push(i);
122 | 			}
123 | 		}
124 | 		return ret;
125 | 	}
126 | }
127 | 


--------------------------------------------------------------------------------
/test/ArithmeticParser.hx:
--------------------------------------------------------------------------------
 1 | enum ArithmeticBinop {
 2 | 	OpAdd;
 3 | 	OpSub;
 4 | 	OpMul;
 5 | 	OpDiv;
 6 | }
 7 | 
 8 | enum ArithmeticToken {
 9 | 	TNumber(f:Float);
10 | 	TPOpen;
11 | 	TPClose;
12 | 	TBinop(op:ArithmeticBinop);
13 | 	TEof;
14 | }
15 | 
16 | enum ArithmeticExpr {
17 | 	ENumber(f:Float);
18 | 	EBinop(op:ArithmeticBinop, e1:ArithmeticExpr, e2:ArithmeticExpr);
19 | 	EParenthesis(e:ArithmeticExpr);
20 | 	ENeg(e:ArithmeticExpr);
21 | }
22 | 
23 | class ArithmeticLexer extends hxparse.Lexer implements hxparse.RuleBuilder {
24 | 	static public var tok = @:rule [
25 | 		"[1-9][0-9]*" => TNumber(Std.parseFloat(lexer.current)), // lazy...
26 | 		"\\(" => TPOpen,
27 | 		"\\)" => TPClose,
28 | 		"\\+" => TBinop(OpAdd),
29 | 		"\\-" => TBinop(OpSub),
30 | 		"\\*" => TBinop(OpMul),
31 | 		"\\/" => TBinop(OpDiv),
32 | 		"[\r\n\t ]" => lexer.token(tok),
33 | 		"" => TEof
34 | 	];
35 | }
36 | 
37 | class ArithmeticParser extends hxparse.Parser<hxparse.LexerTokenSource<ArithmeticToken>, ArithmeticToken> implements hxparse.ParserBuilder {
38 | 	public function parse() {
39 | 		return switch stream {
40 | 			case [TNumber(f)]:
41 | 				parseNext(ENumber(f));
42 | 			case [TPOpen, e = parse(), TPClose]:
43 | 				parseNext(EParenthesis(e));
44 | 			case [TBinop(OpSub), e = parse()]:
45 | 				parseNext(ENeg(e));
46 | 		}
47 | 	}
48 | 
49 | 	function parseNext(e1:ArithmeticExpr) {
50 | 		return switch stream {
51 | 			case [TBinop(op), e2 = parse()]:
52 | 				binop(e1, op, e2);
53 | 			case _:
54 | 				e1;
55 | 		}
56 | 	}
57 | 
58 | 	function binop(e1:ArithmeticExpr, op:ArithmeticBinop, e2:ArithmeticExpr) {
59 | 		return switch [e2, op] {
60 | 			case [EBinop(op2 = OpAdd | OpSub, e3, e4), OpMul | OpDiv]:
61 | 				// precedence
62 | 				EBinop(op2, EBinop(op, e1, e3), e4);
63 | 			case _:
64 | 				EBinop(op, e1, e2);
65 | 		}
66 | 	}
67 | }
68 | 
69 | class ArithmeticEvaluator {
70 | 	static public function eval(e:ArithmeticExpr):Float {
71 | 		return switch(e) {
72 | 			case ENumber(f):
73 | 				f;
74 | 			case EBinop(op, e1, e2):
75 | 				switch(op) {
76 | 					case OpAdd:
77 | 						eval(e1) + eval(e2);
78 | 					case OpSub:
79 | 						eval(e1) - eval(e2);
80 | 					case OpMul:
81 | 						eval(e1) * eval(e2);
82 | 					case OpDiv:
83 | 						eval(e1) / eval(e2);
84 | 				}
85 | 			case EParenthesis(e1):
86 | 				eval(e1);
87 | 			case ENeg(e1):
88 | 				-eval(e1);
89 | 		}
90 | 	}
91 | }


--------------------------------------------------------------------------------
/test/JSONParser.hx:
--------------------------------------------------------------------------------
  1 | import hxparse.Parser.parse as parse;
  2 | 
  3 | private enum Token {
  4 | 	TBrOpen;
  5 | 	TBrClose;
  6 | 	TComma;
  7 | 	TDblDot;
  8 | 	TBkOpen;
  9 | 	TBkClose;
 10 | 	TDash;
 11 | 	TDot;
 12 | 	TTrue;
 13 | 	TFalse;
 14 | 	TNull;
 15 | 	TNumber(v:String);
 16 | 	TString(v:String);
 17 | 	TEof;
 18 | }
 19 | 
 20 | class JSONLexer extends hxparse.Lexer implements hxparse.RuleBuilder {
 21 | 
 22 | 	static var buf:StringBuf;
 23 | 
 24 | 	public static var tok = @:rule [
 25 | 		"{" => TBrOpen,
 26 | 		"}" => TBrClose,
 27 | 		"," => TComma,
 28 | 		":" => TDblDot,
 29 | 		"[" => TBkOpen,
 30 | 		"]" => TBkClose,
 31 | 		"-" => TDash,
 32 | 		"\\." => TDot,
 33 | 		"true" => TTrue,
 34 | 		"false" => TFalse,
 35 | 		"null" => TNull,
 36 | 		"-?(([1-9][0-9]*)|0)(.[0-9]+)?([eE][\\+\\-]?[0-9]+)?" => TNumber(lexer.current),
 37 | 		'"' => {
 38 | 			buf = new StringBuf();
 39 | 			lexer.token(string);
 40 | 			TString(buf.toString());
 41 | 		},
 42 | 		"[\r\n\t ]" => lexer.token(tok),
 43 | 		"" => TEof
 44 | 	];
 45 | 
 46 | 	static var string = @:rule [
 47 | 		"\\\\t" => {
 48 | 			buf.addChar("\t".code);
 49 | 			lexer.token(string);
 50 | 		},
 51 | 		"\\\\n" => {
 52 | 			buf.addChar("\n".code);
 53 | 			lexer.token(string);
 54 | 		},
 55 | 		"\\\\r" => {
 56 | 			buf.addChar("\r".code);
 57 | 			lexer.token(string);
 58 | 		},
 59 | 		'\\\\"' => {
 60 | 			buf.addChar('"'.code);
 61 | 			lexer.token(string);
 62 | 		},
 63 | 		"\\\\u[0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f][0-9A-Fa-f]" => {
 64 | 			buf.add(String.fromCharCode(Std.parseInt("0x" +lexer.current.substr(2))));
 65 | 			lexer.token(string);
 66 | 		},
 67 | 		'"' => {
 68 | 			lexer.curPos().pmax;
 69 | 		},
 70 | 		'[^"]' => {
 71 | 			buf.add(lexer.current);
 72 | 			lexer.token(string);
 73 | 		},
 74 | 	];
 75 | }
 76 | 
 77 | class JSONParser extends hxparse.Parser<hxparse.LexerTokenSource<Token>, Token> {
 78 | 	public function new(input:byte.ByteData, sourceName:String) {
 79 | 		var lexer = new JSONLexer(input, sourceName);
 80 | 		var ts = new hxparse.LexerTokenSource(lexer, JSONLexer.tok);
 81 | 		super(ts);
 82 | 	}
 83 | 
 84 | 	public function parseJson():Dynamic {
 85 | 		return parse(switch stream {
 86 | 			case [TBrOpen, obj = object({})]: obj;
 87 | 			case [TBkOpen, arr = array([])]: arr;
 88 | 			case [TNumber(s)]: s;
 89 | 			case [TTrue]: true;
 90 | 			case [TFalse]: false;
 91 | 			case [TNull]: null;
 92 | 			case [TString(s)]: s;
 93 | 		});
 94 | 	}
 95 | 
 96 | 	function object(obj:{}) {
 97 | 		return parse(switch stream {
 98 | 			case [TBrClose]: obj;
 99 | 			case [TString(s), TDblDot, e = parseJson()]:
100 | 				Reflect.setField(obj, s, e);
101 | 				switch stream {
102 | 					case [TBrClose]: obj;
103 | 					case [TComma]: object(obj);
104 | 				}
105 | 		});
106 | 	}
107 | 
108 | 	function array(acc:Array<Dynamic>) {
109 | 		return parse(switch stream {
110 | 			case [TBkClose]: acc;
111 | 			case [elt = parseJson()]:
112 | 				acc.push(elt);
113 | 				switch stream {
114 | 					case [TBkClose]: acc;
115 | 					case [TComma]: array(acc);
116 | 				}
117 | 		});
118 | 	}
119 | }
120 | 


--------------------------------------------------------------------------------
/test/PrintfParser.hx:
--------------------------------------------------------------------------------
  1 | enum PToken {
  2 | 	Eof;
  3 | 	Placeholder;
  4 | 	Dot;
  5 | 	Number(i:Int);
  6 | 	Literal(s:String);
  7 | 	Flag(flag:PFlag);
  8 | 	Value<C>(v:PValue<C>);
  9 | }
 10 | 
 11 | enum PFlag {
 12 | 	Zero;
 13 | 	Alt;
 14 | 	Plus;
 15 | 	Minus;
 16 | 	Space;
 17 | }
 18 | 
 19 | enum PValue<T> {
 20 | 	VInt:PValue<Int>;
 21 | 	VString:PValue<String>;
 22 | 	VBool:PValue<Bool>;
 23 | 	VFloat:PValue<Float>;
 24 | }
 25 | 
 26 | enum Fmt<A,B> {
 27 | 	Lit(s:String):Fmt<A,A>;
 28 | 	Val<C>(v:PValue<C>):Fmt<A,C->A>;
 29 | 	Cat<C>(a:Fmt<B,C>, b:Fmt<A,B>):Fmt<A,C>;
 30 | }
 31 | 
 32 | class PrintfLexer extends hxparse.Lexer implements hxparse.RuleBuilder {
 33 | 
 34 | 	static public var tok = @:rule [
 35 | 		"$" => Placeholder,
 36 | 		"$$" => Literal(lexer.current),
 37 | 		"[^$]+" => Literal(lexer.current),
 38 | 		"" => Eof
 39 | 	];
 40 | 
 41 | 	static public var placeholder = @:rule [
 42 | 		"0" => Flag(Zero),
 43 | 		"#" => Flag(Alt),
 44 | 		" " => Flag(Space),
 45 | 		"+" => Flag(Plus),
 46 | 		"-" => Flag(Minus),
 47 | 		"[1-9][0-9]*" => Number(Std.parseInt(lexer.current)),
 48 | 		"\\." => Dot,
 49 | 		"i" => Value(VInt),
 50 | 		"f" => Value(VFloat),
 51 | 		"s" => Value(VString),
 52 | 		"b" => Value(VBool),
 53 | 	];
 54 | }
 55 | 
 56 | class PrintfParser extends hxparse.Parser<hxparse.LexerTokenSource<PToken>, PToken> implements hxparse.ParserBuilder {
 57 | 	public function new(input:byte.ByteData) {
 58 | 		var lexer = new PrintfLexer(input);
 59 | 		var ts = new hxparse.LexerTokenSource(lexer, PrintfLexer.tok);
 60 | 		super(ts);
 61 | 	}
 62 | 
 63 | 	public function parse() {
 64 | 		var v:Fmt<Dynamic,Dynamic> = switch stream {
 65 | 			case [Literal(s)]: Lit(s);
 66 | 			case [Placeholder]:
 67 | 				var current = stream.ruleset;
 68 | 				stream.ruleset = PrintfLexer.placeholder;
 69 | 				var r = parsePlaceholder();
 70 | 				stream.ruleset = current;
 71 | 				r;
 72 | 			case [Eof]: null;
 73 | 		}
 74 | 		if (v == null) return null;
 75 | 		var next = parse();
 76 | 		return next == null ? v : Cat(v, next);
 77 | 	}
 78 | 
 79 | 	function parsePlaceholder() {
 80 | 		var flags = parseFlags([]);
 81 | 		var width = switch stream {
 82 | 			case [Number(n)]: n;
 83 | 			case _: -1;
 84 | 		}
 85 | 		var precision = switch stream {
 86 | 			case [Dot, Number(n)]: n;
 87 | 			case _: -1;
 88 | 		}
 89 | 		return switch stream {
 90 | 			case [Value(v)]: Val(v); // we omit the config for simplicity reasons
 91 | 			case _: unexpected();
 92 | 		}
 93 | 	}
 94 | 
 95 | 	function parseFlags(acc:Array<PFlag>) {
 96 | 		return switch stream {
 97 | 			case [Flag(x)]:
 98 | 				acc.push(x);
 99 | 				parseFlags(acc);
100 | 			case _: acc;
101 | 		}
102 | 	}
103 | }


--------------------------------------------------------------------------------
/test/Test.hx:
--------------------------------------------------------------------------------
 1 | class Test {
 2 | 	static function main() {
 3 | 
 4 | 		var t0 = haxe.Timer.stamp();
 5 | 
 6 | 		var parser = new PrintfParser(byte.ByteData.ofString("Valu$$e: $-050.2f kg"));
 7 | 		trace(parser.parse());
 8 | 
 9 | 		var parser = new JSONParser(byte.ByteData.ofString('{ "key": [true, false, null], "other\tkey": [12, 12.1, 0, 0.1, 0.9e1, 0.9E1, 9E-1] }'), "jsontest");
10 | 		trace(parser.parseJson());
11 | 
12 | 		// Using haxe.Utf8
13 | 		var value = 'hello âê€𩸽ùあ𠀀ÊÀÁÂÃÄÅÆÇÈÉËÌÍÎÏÐÑÒÓÔÕÖØÙÚÛÜÝÞßàáãäåæçèéëìíîïðñòóôõöøúûüýþÿ№ unicode';
14 | 		var lexer = new UnicodeTestLexer( byte.ByteData.ofString( value ), 'uft8-test' );
15 | 		var tokens = [];
16 | 
17 | 		try while (true) {
18 | 			tokens.push( lexer.token( UnicodeTestLexer.root ) );
19 | 		} catch (_e:Dynamic) {
20 | 			trace(_e);
21 | 		}
22 | 		trace( tokens );
23 | 
24 | 		var numTests = 0;
25 | 		function eq(expected:Float, s:String) {
26 | 			++numTests;
27 | 			var lexer = new ArithmeticParser.ArithmeticLexer(byte.ByteData.ofString(s));
28 | 			var ts = new hxparse.LexerTokenSource(lexer, ArithmeticParser.ArithmeticLexer.tok);
29 | 			var parser = new ArithmeticParser(ts);
30 | 			var result = ArithmeticParser.ArithmeticEvaluator.eval(parser.parse());
31 | 			if (expected != result) {
32 | 				trace('Error in "$s"; expected $expected but was $result');
33 | 			}
34 | 		}
35 | 		eq(1, "1");
36 | 		eq(2, "1 + 1");
37 | 		eq(6, "2 * 3");
38 | 		eq(2, "6 / 3");
39 | 		eq(1.5, "3 / 2");
40 | 		eq(10, "2 * 3 + 4");
41 | 		eq(14, "2 * (3 + 4)");
42 | 		eq(18, "9 + (3 * 4) - 3 / (1 * 1)");
43 | 		eq(-9, "-9");
44 | 		eq(-12, "-(4 + 8)");
45 | 		eq(12, "--12");
46 | 		eq(8, "2*(3-(2+(-3)))");
47 | 
48 | 		var diff = haxe.Timer.stamp() - t0;
49 | 		trace('Done $numTests tests in $diff ms');
50 | 	}
51 | }
52 | 


--------------------------------------------------------------------------------
/test/UnicodeTestLexer.hx:
--------------------------------------------------------------------------------
 1 | package ;
 2 | 
 3 | import hxparse.Lexer;
 4 | import hxparse.RuleBuilder;
 5 | import haxe.Utf8;
 6 | 
 7 | /**
 8 |  * ...
 9 |  * @author Skial Bainn
10 |  */
11 | class UnicodeTestLexer extends Lexer implements RuleBuilder {
12 | 
13 | 	public static var root = @:rule [
14 | 		'â' => lexer.current,
15 | 		'ê' => lexer.current,
16 | 		'ù' => lexer.current,
17 | 		"あ𠀀" => lexer.current,
18 | 		'\u00CA' => lexer.current, // Ê
19 | 		'\u20AC' => lexer.current,	// €
20 | 		'\u{29e3d}' => lexer.current, // 𩸽
21 | 		'[ a-zA-Z0-9ÀÁÂÔÕÖØÙÚÛÜÝÞßàáãäåæçèéëìíîïðñòóôõöøúûüýþÿ№あ𠀀]' => lexer.current,
22 | 		'\\195[\\131-\\139]' => lexer.current,
23 | 		'\\xC3[\\x8c-\\x93]' => lexer.current,
24 | 		//'[Ã-Ë]' => lexer.current
25 | 	];
26 | 
27 | }
28 | 


--------------------------------------------------------------------------------