├── .gitignore
├── .codeclimate.yml
├── test.hxml
├── src
└── parsihax
│ ├── ParseFunction.hx
│ ├── ParseResult.hx
│ ├── ParseObject.hx
│ ├── ParseUtil.hx
│ └── Parser.hx
├── doc.hxml
├── haxelib.json
├── .travis.yml
├── LICENSE
├── test
└── parsihax
│ ├── Test.hx
│ ├── LispGrammar.hx
│ └── JsonGrammar.hx
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | *.n
2 | /.vscode/
3 | /bin/
4 | /dump/
5 | *.zip
6 |
--------------------------------------------------------------------------------
/.codeclimate.yml:
--------------------------------------------------------------------------------
1 | engines:
2 | haxe-checkstyle:
3 | enabled: true
4 | ratings:
5 | paths:
6 | - "**.hx"
7 |
--------------------------------------------------------------------------------
/test.hxml:
--------------------------------------------------------------------------------
1 | -cp src
2 | -cp test
3 | -lib monax
4 | -lib buddy
5 | -main parsihax.Test
6 | -neko test/parsihax/Test.n
7 | -cmd neko test/parsihax/Test.n
--------------------------------------------------------------------------------
/src/parsihax/ParseFunction.hx:
--------------------------------------------------------------------------------
1 | package parsihax;
2 |
3 | /**
4 | Parsing function created by chaining Parser combinators.
5 | **/
6 | typedef ParseFunction = String -> ?Int -> ParseResult;
7 |
--------------------------------------------------------------------------------
/doc.hxml:
--------------------------------------------------------------------------------
1 | -lib monax
2 | -dce std
3 | -cp src
4 | parsihax.ParseFunction
5 | parsihax.ParseObject
6 | parsihax.ParseResult
7 | parsihax.ParseUtil
8 | parsihax.Parser
9 | -xml bin/parsihax.xml
10 | --next
11 | -cmd haxelib run dox -o bin/api -i bin --title "Parsihax - API documentation" -D source-path https://github.com/deathbeam/parsihax/blob/master/src/
12 |
--------------------------------------------------------------------------------
/haxelib.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "parsihax",
3 | "url" : "https://github.com/deathbeam/parsihax",
4 | "license": "MIT",
5 | "tags": ["parser", "parsing", "cross", "utility"],
6 | "description": "A monadic LL(infinity) parser combinator library for Haxe.",
7 | "version": "2.0.2",
8 | "classPath": "src/",
9 | "releasenote": "Haxe 4 compatibility fixes",
10 | "contributors": [ "deathbeam" ]
11 | }
12 |
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | language: haxe
2 |
3 | env:
4 | global:
5 | - GH_REF: github.com/deathbeam/parsihax.git
6 |
7 | haxe:
8 | - development
9 |
10 | before_install:
11 | - sudo apt-get update
12 | - sudo apt-get install mono-devel
13 |
14 | hxml:
15 | - build.hxml
16 | - test.hxml
17 |
18 | script:
19 | - haxe test.hxml
20 | - haxelib git dox https://github.com/HaxeFoundation/dox
21 | - rm -rf bin
22 | - mkdir bin
23 | - haxe doc.hxml
24 |
25 | deploy:
26 | provider: pages
27 | skip_cleanup: true
28 | github_token: $GH_TOKEN
29 | keep_history: true
30 | local_dir: bin/api
31 | on:
32 | branch: master
33 |
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2016 Tomas Slusny
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/src/parsihax/ParseResult.hx:
--------------------------------------------------------------------------------
1 | package parsihax;
2 |
3 | /**
4 | A structure with a boolean `status` flag, indicating whether the parse
5 | succeeded. If it succeeded, the `value` attribute will contain the yielded
6 | value. Otherwise, the `index` and `expected` attributes will contain the
7 | offset of the parse error, and a sorted, unique array of messages indicating
8 | what was expected.
9 |
10 | The error structure can be passed along with the original source to
11 | `Parser.formatError` to obtain a human-readable error string.
12 | **/
13 | typedef ParseResult = {
14 |
15 | /**
16 | Flag, indicating whether the parse succeeded
17 | **/
18 | var status : Bool;
19 |
20 | /**
21 | Offset of the parse error (in case of failed parse)
22 | **/
23 | var index : Int;
24 |
25 | /**
26 | Yielded value of `Parser` (in case of successfull parse)
27 | **/
28 | var value : T;
29 |
30 | /**
31 | Offset of last parse
32 | **/
33 | var furthest : Int;
34 |
35 | /**
36 | A sorted, unique array of messages indicating what was expected (in case of failed parse)
37 | **/
38 | var expected : Array;
39 |
40 | }
41 |
--------------------------------------------------------------------------------
/test/parsihax/Test.hx:
--------------------------------------------------------------------------------
1 | package parsihax;
2 |
3 | import buddy.SingleSuite;
4 | using buddy.Should;
5 |
6 | class Test extends SingleSuite {
7 | public function new() {
8 | describe("Using Parsihax", {
9 | var result = false;
10 |
11 | beforeEach({
12 | result = false;
13 | });
14 |
15 | describe("JSON grammar", {
16 | var input = '
17 | {
18 | "firstName": "John",
19 | "lastName": "Smith",
20 | "age": 25,
21 | "address": {
22 | "streetAddress": "21 2nd Street",
23 | "city": "New York",
24 | "state": "NY",
25 | "postalCode": "10021"
26 | },
27 | "phoneNumber": [
28 | {
29 | "type": "home",
30 | "number": "212 555-1234"
31 | },
32 | {
33 | "type": "fax",
34 | "number": "646 555-4567"
35 | }
36 | ]
37 | }';
38 |
39 | beforeEach({
40 | result = JsonGrammar.build()(input).status;
41 | });
42 |
43 | it('should parse "$input"', {
44 | result.should.be(true);
45 | });
46 | });
47 |
48 | describe("Lisp grammar", {
49 | var input = '
50 | (if (empty brain)
51 | (print "Hello Lisp!")
52 | (print 42.0))';
53 |
54 | beforeEach({
55 | result = LispGrammar.build()(input).status;
56 | });
57 |
58 | it('should parse "$input"', {
59 | result.should.be(true);
60 | });
61 | });
62 |
63 | });
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/src/parsihax/ParseObject.hx:
--------------------------------------------------------------------------------
1 | package parsihax;
2 |
3 | import haxe.ds.Vector;
4 |
5 | /**
6 | The ParseObject object is a wrapper for a parser function.
7 | Externally, you use one to parse a string by calling
8 | `var result = SomeParseObject.apply('Me Me Me! Parse Me!');`
9 | **/
10 | abstract ParseObject(Vector>) {
11 |
12 | inline function new() this = new Vector(1);
13 | @:to inline function get_apply() : ParseFunction return this[0];
14 | inline function set_apply(param : ParseFunction) return this[0] = param;
15 |
16 | /**
17 | Getting `ParseObject.apply` from a parser (or explicitly casting it to
18 | `ParseFunction` returns parsing function `String -> ?Int -> ParseResult`
19 | (or just `ParseFunction`), that parses the string and returns `ParseResult`.
20 |
21 | Changing `ParseObject.apply` value changes parser behaviour, but still keeps it's
22 | reference, what is really usefull in recursive parsers.
23 | **/
24 | public var apply(get, set): ParseFunction;
25 |
26 | /**
27 | Creates `ParseObject` from `ParseFunction`
28 | **/
29 | @:noUsing @:from public static inline function to(v : ParseFunction) : ParseObject {
30 | var ret = new ParseObject();
31 | ret.apply = v;
32 | return ret;
33 | }
34 |
35 | /**
36 | Same as `Hax.then(l, r)`
37 | **/
38 | @:noUsing @:op(A + B) public static inline function opAdd(l: ParseObject, r: ParseObject): ParseObject {
39 | return Parser.then(l, r);
40 | }
41 |
42 | /**
43 | Same as `Hax.or(l, r)`
44 | **/
45 | @:noUsing @:op(A | B) public static inline function opOr(l: ParseObject, r: ParseObject): ParseObject {
46 | return Parser.or(l, r);
47 | }
48 |
49 | /**
50 | Same as `Hax.as(l, r)`
51 | **/
52 | @:noUsing @:op(A / B) public static inline function opDiv(l: ParseObject, r: String): ParseObject {
53 | return Parser.as(l, r);
54 | }
55 |
56 | }
57 |
--------------------------------------------------------------------------------
/test/parsihax/LispGrammar.hx:
--------------------------------------------------------------------------------
1 | package parsihax;
2 |
3 | import parsihax.*;
4 | import parsihax.Parser.*;
5 | using parsihax.Parser;
6 | using parsihax.LispGrammar;
7 |
8 | // ADT definition
9 | enum LispExpression {
10 | LispNumber(v: Float);
11 | LispSymbol(v: String);
12 | LispString(v: String);
13 | LispList(v : Array);
14 | }
15 |
16 | class LispGrammar {
17 | // A little helper to wrap a parser with optional whitespace.
18 | private static inline function trim(parser : ParseObject) {
19 | return parser.skip(optWhitespace());
20 | }
21 |
22 | public static function build() {
23 | // We need to use `empty` here because the other parsers don't exist yet. We
24 | // can't just declare this later though, because `LList` references this parser!
25 | var LExpression = empty();
26 |
27 | // The basic parsers (usually the ones described via regexp) should have a
28 | // description for error message purposes.
29 |
30 | var LString =
31 | ~/"[^"]*"/.regexp().trim()
32 | .map(function(r) return LispString(r))
33 | .as('string');
34 |
35 | var LSymbol =
36 | ~/[a-zA-Z_-][a-zA-Z0-9_-]*/.regexp().trim()
37 | .map(function(r) return LispSymbol(r))
38 | .as('symbol');
39 |
40 | var LNumber =
41 | ~/(?=.)([+-]?([0-9]*)(\.([0-9]+))?)/.regexp().trim()
42 | .map(function(r) return LispNumber(Std.parseFloat(r)))
43 | .as('number');
44 |
45 | // `.then` throws away the first value, and `.skip` throws away the second
46 | // `.value, so we're left with just the `LExpression.many()` part as the
47 | // `.yielded value from this parser.
48 | var LList =
49 | '('.string().trim()
50 | .then(LExpression.many())
51 | .skip(')'.string().trim())
52 | .map(function(r) return LispList(r));
53 |
54 | // Initialize LExpression now because of before recursion by modifying magical .apply field
55 | LExpression.apply = [
56 | LSymbol,
57 | LNumber,
58 | LString,
59 | LList
60 | ].alt();
61 |
62 | // Let's remember to throw away whitespace at the top level of the parser.
63 | return optWhitespace().then(LExpression).apply;
64 | }
65 | }
66 |
--------------------------------------------------------------------------------
/src/parsihax/ParseUtil.hx:
--------------------------------------------------------------------------------
1 | package parsihax;
2 |
3 | class ParseUtil {
4 |
5 | /**
6 | Obtain a human-readable error `String`.
7 | **/
8 | public static function formatError(result : ParseResult, stream : String) : String {
9 | var sexpected = result.expected.length == 1
10 | ? result.expected[0]
11 | : 'one of ' + result.expected.join(', ');
12 |
13 | var indexOffset = result.furthest;
14 | var lines = stream.substring(0, indexOffset).split("\n");
15 | var lineWeAreUpTo = lines.length;
16 | var columnWeAreUpTo = lines[lines.length - 1].length + 1;
17 |
18 | var got = '';
19 |
20 | if (indexOffset == stream.length) {
21 | got = ', got the end of the stream';
22 | } else {
23 | var prefix = (indexOffset > 0 ? "'..." : "'");
24 | var suffix = (stream.length - indexOffset > 12 ? "...'" : "'");
25 |
26 | got = ' at line ' + lineWeAreUpTo + ' column ' + columnWeAreUpTo
27 | + ', got ' + prefix + stream.substring(indexOffset, indexOffset + 12) + suffix;
28 | }
29 |
30 | return 'expected ' + sexpected + got;
31 | }
32 |
33 | /**
34 | Create successfull `ParseResult` with specified `index` and `value`.
35 | **/
36 | @:allow(parsihax.Parser)
37 | private static inline function makeSuccess(index : Int, value : A) : ParseResult {
38 | return {
39 | status: true,
40 | index: index,
41 | value: value,
42 | furthest: -1,
43 | expected: []
44 | };
45 | }
46 |
47 | /**
48 | Create failed `ParseResult` with specified `index` and `expected` input.
49 | **/
50 | @:allow(parsihax.Parser)
51 | private static inline function makeFailure(index : Int, expected : String) : ParseResult {
52 | return {
53 | status: false,
54 | index: -1,
55 | value: null,
56 | furthest: index,
57 | expected: [expected]
58 | };
59 | }
60 |
61 | /**
62 | Merge `result` and `last` into single `ParseResult`.
63 | **/
64 | @:allow(parsihax.Parser)
65 | private static function mergeReplies(result : ParseResult, ?last : ParseResult) : ParseResult {
66 | if (last == null) return result;
67 | if (result.furthest > last.furthest) return result;
68 |
69 | var expected = (result.furthest == last.furthest)
70 | ? unsafeUnion(result.expected, last.expected)
71 | : last.expected;
72 |
73 | return {
74 | status: result.status,
75 | index: result.index,
76 | value: result.value,
77 | furthest: last.furthest,
78 | expected: expected
79 | }
80 | }
81 |
82 | /**
83 | Create unsafe union from two string arrays `xs` and `ys`.
84 | **/
85 | private static function unsafeUnion(xs : Array, ys : Array) : Array {
86 | if (xs.length == 0) {
87 | return ys;
88 | } else if (ys.length == 0) {
89 | return xs;
90 | }
91 |
92 | var result = xs.concat(ys);
93 |
94 | result.sort(function(a, b):Int {
95 | a = a.toLowerCase();
96 | b = b.toLowerCase();
97 | if (a < b) return -1;
98 | if (a > b) return 1;
99 | return 0;
100 | });
101 |
102 | return result;
103 | }
104 |
105 | }
106 |
107 |
108 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Parsihax
2 | [![TravisCI Build Status][travis-img]][travis]
3 |
4 | Parsihax is a small library for writing big parsers made up of lots of little parsers. The API is inspired by
5 | [parsec][] and [Parsimmon][parsimmon] (originally, Parsihax was just supposed to be Parsimmon rewrite in Haxe).
6 |
7 | ### Installation
8 |
9 | Install the library via [haxelib][] (library manager that comes with any Haxe distribution).
10 |
11 | ```
12 | haxelib install parsihax
13 | ```
14 |
15 | ## API Documentation
16 |
17 | Haxe-generated API documentation is available at [documentation website][docs], or see the
18 | [annotated source of `parsihax.Parser.hx`.][parsihax]
19 |
20 | ## Examples
21 |
22 | See the [test][] directory for annotated examples of parsing JSON, simple Lisp-like structure and monad parser.
23 |
24 | ## Basics
25 | To use nice sugar syntax, simply add this to your Haxe file
26 |
27 | ```haxe
28 | import parsihax.*;
29 | import parsihax.Parser.*;
30 | using parsihax.Parser;
31 | ```
32 |
33 | A `ParseObject` parser is an abstract that represents an action on a stream of text, and the promise of either an
34 | object yielded by that action on success or a message in case of failure. For example, `Parser.string('foo')` yields
35 | the string `'foo'` if the beginning of the stream is `'foo'`, and otherwise fails.
36 |
37 | The method `.map` is used to transform the yielded value. For example,
38 |
39 | ```haxe
40 | 'foo'.string()
41 | .map(function(x) return x + 'bar');
42 | ```
43 |
44 | will yield `'foobar'` if the stream starts with `'foo'`. The parser
45 |
46 | ```haxe
47 | ~/[0-9]+/.regexp()
48 | .map(function(x) return Std.parseInt(x) * 2);
49 | ```
50 |
51 | will yield the number `24` when it encounters the string `'12'`.
52 |
53 | Also, Parsihax supports nice sugar syntax thanks to Haxe operator overloading. For example,
54 |
55 | ```haxe
56 | var a = "a".string() / "important letter a"
57 | var b = "b".string() / "important letter b"
58 | var c = "c".string() / "important letter c"
59 |
60 | var result = a | b + c;
61 |
62 | // Will succeed on "ac" and "bc"
63 | // In case of failure, it will throw "expected important letter a|b|c"
64 | // So, plus operator is alias to then, or operator to or and div
65 | // operator to as
66 | ```
67 |
68 | Getting `apply` from a `ParseObject` (or explicitly casting it to `ParseFunction` returns parsing function
69 | `String -> ?Int -> Result` (or just `ParseFunction`), that parses the string and returns a `Hax.Result`
70 | with a boolean `status` flag, indicating whether the parse succeeded. If it succeeded, the `value` attribute will
71 | contain the yielded value. Otherwise, the `index` and `expected` attributes will contain the offset of the parse error,
72 | and a sorted, unique array of messages indicating what was expected.
73 |
74 | The error object can be passed along with the original source to `ParseUtil.formatError` to obtain
75 | a human-readable error string.
76 |
77 | Changing `ParseObject.apply` value changes `ParseObject` behaviour, but still keeps it's reference, what is
78 | really usefull in recursive parsers.
79 |
80 | [travis]: https://travis-ci.org/deathbeam/parsihax
81 | [travis-img]: https://api.travis-ci.org/deathbeam/parsihax.svg?branch=master
82 | [haxelib]: http://lib.haxe.org/p/parsihax
83 | [docs]: https://deathbeam.github.io/parsihax/parsihax/Parser.html
84 | [parsihax]: https://github.com/deathbeam/parsihax/blob/master/src/parsihax/Parser.hx
85 | [test]: https://github.com/deathbeam/parsihax/tree/master/test/parsihax
86 | [parsec]: https://hackage.haskell.org/package/parsec
87 | [parsimmon]: https://github.com/jneen/parsimmon
88 |
--------------------------------------------------------------------------------
/test/parsihax/JsonGrammar.hx:
--------------------------------------------------------------------------------
1 | package parsihax;
2 |
3 | import parsihax.*;
4 | import parsihax.Parser.*;
5 | using parsihax.Parser;
6 |
7 | // ADT definition
8 | enum JsonExpression {
9 | JsonNull;
10 | JsonTrue;
11 | JsonFalse;
12 | JsonNumber(v : Float);
13 | JsonString(v : String);
14 | JsonPair(k : JsonExpression, v : JsonExpression);
15 | JsonArray(v : Array);
16 | JsonObject(v : Array);
17 | }
18 |
19 | class JsonGrammar {
20 | // This is the main entry point of the parser: a full Json document.
21 | static var json = (function() {
22 | return whitespace.then([
23 | object,
24 | array,
25 | stringLiteral,
26 | numberLiteral,
27 | nullLiteral,
28 | trueLiteral,
29 | falseLiteral
30 | ].alt());
31 | }).lazy();
32 |
33 | // Use the Json standard's definition of whitespace rather than Parsihax's.
34 | static var whitespace = ~/\s*/m.regexp();
35 |
36 | // Json is pretty relaxed about whitespace, so let's make it easy to ignore
37 | // after most text.
38 | static function token(parser) {
39 | return skip(parser, whitespace);
40 | }
41 |
42 | // This gets reused for both array and object parsing.
43 | static function commaSep(parser) {
44 | return sepBy(parser, token(','.string()));
45 | }
46 |
47 | // The basic tokens in Json, with optional whitespace afterward.
48 | static var lbrace = token('{'.string());
49 | static var rbrace = token('}'.string());
50 | static var lbracket = token('['.string());
51 | static var rbracket = token(']'.string());
52 | static var comma = token(','.string());
53 | static var colon = token(':'.string());
54 |
55 | // `.result` is like `.map` but it takes a value instead of a function, and
56 | // `.always returns the same value.
57 | static var nullLiteral = token('null'.string()).result(JsonNull);
58 | static var trueLiteral = token('true'.string()).result(JsonTrue);
59 | static var falseLiteral = token('false'.string()).result(JsonFalse);
60 |
61 | // regexp based parsers should generally be named for better error reporting.
62 | static var stringLiteral =
63 | token(~/"((?:\\.|.)*?)"/.regexp(1))
64 | // Turn escaped characters into real ones (e.g. "\\n" becoems "\n").
65 | .map(function interpretEscapes(str) {
66 | var escapes = [
67 | 'b' => '\\b',
68 | 'f' => '\\f',
69 | 'n' => '\\n',
70 | 'r' => '\\r',
71 | 't' => '\\t'
72 | ];
73 |
74 | return JsonString(~/\\(u[0-9a-fA-F]{4}|[^u])/.map(str, function(reg) {
75 | var escape = reg.matched(0);
76 | var type = escape.charAt(0);
77 | var hex = escape.substr(1);
78 | if (type == 'u') return String.fromCharCode(Std.parseInt(hex));
79 | if (escapes.exists(type)) return escapes[type];
80 | return type;
81 | }));
82 | }).as('string');
83 |
84 | static var numberLiteral =
85 | token(~/-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?/.regexp())
86 | .map(function(result) return JsonNumber(Std.parseFloat(result)))
87 | .as('number');
88 |
89 | // Array parsing is just ignoring brackets and commas and parsing as many nested
90 | // Json documents as possible. Notice that we're using the parser `json` we just
91 | // defined above. Arrays and objects in the Json grammar are recursive because
92 | // they can contain any other Json document within them.
93 | static var array = lbracket.then(commaSep(json)).skip(rbracket)
94 | .map(function(results) return JsonArray(results));
95 |
96 | // Object parsing is a little trickier because we have to collect all the key-
97 | // value pairs in order as length-2 arrays, then manually copy them into an
98 | // object.
99 | static var pair =
100 | [stringLiteral.skip(colon), json].seq()
101 | .map(function(results) return JsonPair(results[0], results[1]));
102 |
103 | static var object =
104 | lbrace.then(commaSep(pair)).skip(rbrace)
105 | .map(function(pairs) return JsonObject(pairs));
106 |
107 | public static function build() {
108 | return json.apply;
109 | }
110 | }
111 |
--------------------------------------------------------------------------------
/src/parsihax/Parser.hx:
--------------------------------------------------------------------------------
1 | package parsihax;
2 |
3 | using parsihax.Parser;
4 |
5 | /**
6 | Defines grammar and encapsulates parsing logic. A `ParseObject` takes as input a
7 | `String` source and parses it when the `ParseObject.apply` method is called.
8 | A structure `ParseResult` is returned.
9 | **/
10 | class Parser {
11 |
12 | /**
13 | Yields current position in stream
14 | **/
15 | public static function index() : ParseObject {
16 | return function(stream : String, i : Int = 0) : ParseResult {
17 | return ParseUtil.makeSuccess(i, i);
18 | };
19 | }
20 |
21 | /**
22 | Equivalent to `Parser.regexp(~/[a-z]/i)`
23 | **/
24 | public static inline function letter() : ParseObject {
25 | return ~/[a-z]/i.regexp().as('a letter');
26 | }
27 |
28 | /**
29 | Equivalent to `Parser.regexp(~/[a-z]* /i)`
30 | **/
31 | public static inline function letters() : ParseObject {
32 | return ~/[a-z]*/i.regexp();
33 | }
34 |
35 | /**
36 | Equivalent to `Parser.regexp(~/[0-9]/)`
37 | **/
38 | public static inline function digit() : ParseObject {
39 | return ~/[0-9]/.regexp().as('a digit');
40 | }
41 |
42 | /**
43 | Equivalent to `Parser.regexp(~/[0-9]* /)`
44 | **/
45 | public static inline function digits() : ParseObject {
46 | return ~/[0-9]*/.regexp();
47 | }
48 |
49 | /**
50 | Equivalent to `Parser.regexp(~/\s+/)`
51 | **/
52 | public static inline function whitespace() : ParseObject {
53 | return ~/\s+/.regexp().as('whitespace');
54 | }
55 |
56 | /**
57 | Equivalent to `Parser.regexp(~/\s* /)`
58 | **/
59 | public static inline function optWhitespace() : ParseObject {
60 | return ~/\s*/.regexp();
61 | }
62 |
63 | /**
64 | A `ParseObject` that consumes and yields the next character of the stream.
65 | **/
66 | public static function any() : ParseObject {
67 | return function(stream : String, i : Int = 0) : ParseResult {
68 | return i >= stream.length
69 | ? ParseUtil.makeFailure(i, 'any character')
70 | : ParseUtil.makeSuccess(i+1, stream.charAt(i));
71 | };
72 | }
73 |
74 | /**
75 | A `ParseObject` that consumes and yields the entire remainder of the stream.
76 | **/
77 | public static function all() : ParseObject {
78 | return function(stream : String, i : Int = 0) : ParseResult {
79 | return ParseUtil.makeSuccess(stream.length, stream.substring(i));
80 | };
81 | }
82 |
83 | /**
84 | A `ParseObject` that expects to be at the end of the stream (zero characters left).
85 | **/
86 | public static function eof() : ParseObject {
87 | return function(stream : String, i : Int = 0) : ParseResult {
88 | return i < stream.length
89 | ? ParseUtil.makeFailure(i, 'EOF')
90 | : ParseUtil.makeSuccess(i, null);
91 | };
92 | }
93 |
94 | /**
95 | Returns a `ParseObject` that looks for `String` and yields that exact value.
96 | **/
97 | public static function string(string : String) : ParseObject {
98 | var len = string.length;
99 | var expected = "'"+string+"'";
100 |
101 | return function(stream : String, i : Int = 0) : ParseResult {
102 | var head = stream.substring(i, i + len);
103 |
104 | if (head == string) {
105 | return ParseUtil.makeSuccess(i+len, head);
106 | } else {
107 | return ParseUtil.makeFailure(i, expected);
108 | }
109 | };
110 | }
111 |
112 | /**
113 | Returns a `ParseObject` that looks for exactly one character from `String` and
114 | yields that exact value. This combinator is faster than `Parser.string`
115 | in case of matching single character.
116 | **/
117 | public static function char(character : String) : ParseObject {
118 | return (function(ch) { return character == ch; }).test().as("'"+character+"'");
119 | }
120 |
121 | /**
122 | Returns a `ParseObject` that looks for exactly one character from `String`, and
123 | yields that character.
124 | **/
125 | public static function oneOf(string : String) : ParseObject {
126 | return (function(ch) { return string.indexOf(ch) >= 0; }).test();
127 | }
128 |
129 | /**
130 | Returns a `ParseObject` that looks for exactly one character NOT from `String`,
131 | and yields that character.
132 | **/
133 | public static function noneOf(string : String) : ParseObject {
134 | return (function(ch) { return string.indexOf(ch) < 0; }).test();
135 | }
136 |
137 | /**
138 | Returns a `ParseObject` that looks for a match to the `EReg` and yields the given
139 | match group (defaulting to the entire match). The `EReg` will always match
140 | starting at the current parse location. The regexp may only use the
141 | following flags: imu. Any other flag will result in some weird behaviour.
142 | **/
143 | public static function regexp(re : EReg, group : Int = 0) : ParseObject {
144 | var expected = Std.string(re);
145 |
146 | return function(stream : String, i : Int = 0) : ParseResult {
147 | var match = re.match(stream.substring(i));
148 |
149 | if (match) {
150 | var groupMatch = re.matched(group);
151 | var pos = re.matchedPos();
152 | if (groupMatch != null && pos.pos == 0) {
153 | return ParseUtil.makeSuccess(i + pos.len, groupMatch);
154 | }
155 | }
156 |
157 | return ParseUtil.makeFailure(i, expected);
158 | };
159 | }
160 |
161 | /**
162 | Returns a `ParseObject` that doesn't consume any of the string, and yields
163 | `value`.
164 | **/
165 | public static function succeed(value : A) : ParseObject {
166 | return function(stream : String, i : Int = 0) : ParseResult {
167 | return ParseUtil.makeSuccess(i, value);
168 | };
169 | }
170 |
171 | /**
172 | Returns a failing `ParseObject` with the given `expected` message.
173 | **/
174 | public static function fail(expected : String) : ParseObject {
175 | return function(stream : String, i : Int = 0) : ParseResult {
176 | return ParseUtil.makeFailure(i, expected);
177 | }
178 | }
179 |
180 | /**
181 | Returns a new failed `ParseObject` with 'empty' message
182 | **/
183 | public static function empty() : ParseObject {
184 | return fail('empty');
185 | }
186 |
187 | /**
188 | Accepts an array of parsers `Array` and returns a new
189 | `ParseObject` that expects them to match in order, yielding an array of
190 | all their results.
191 | **/
192 | public static function seq(parsers : Array>) : ParseObject> {
193 | if (parsers.length == 0) return fail('sequence of parsers');
194 |
195 | return function(stream : String, i : Int = 0) : ParseResult> {
196 | var result : ParseResult = null;
197 | var accum : Array = [];
198 |
199 | for (parser in parsers) {
200 | result = ParseUtil.mergeReplies(parser.apply(stream, i), result);
201 | if (!result.status) return cast(result);
202 | accum.push(result.value);
203 | i = result.index;
204 | }
205 |
206 | return ParseUtil.mergeReplies(ParseUtil.makeSuccess(i, accum), result);
207 | };
208 | }
209 |
210 | /**
211 | Accepts an array of parsers `Array`, yielding the value of the first
212 | one that succeeds, backtracking in between. This means that the order of
213 | parsers matters. If two parsers match the same prefix, the longer of the two
214 | must come first.
215 |
216 | ```haxe
217 | Parser.alt([
218 | Parser.string('ab'),
219 | Parser.string('a')
220 | ]).apply('ab');
221 | // => {status: true, value: 'ab'}
222 |
223 | Parser.alt([
224 | Parser.string('a'),
225 | Parser.string('ab')
226 | ]).apply('ab');
227 | // => {status: false, ...}
228 | ```
229 |
230 | In the second case, `Parser.alt` matches on the first parser, then
231 | there are extra characters left over (`'b'`), so `ParseObject` returns a failure.
232 | **/
233 | public static function alt(parsers : Array>) : ParseObject {
234 | if (parsers.length == 0) return fail('at least one alt');
235 |
236 | return function(stream : String, i : Int = 0) : ParseResult {
237 | var result : ParseResult = null;
238 |
239 | for (parser in parsers) {
240 | result = ParseUtil.mergeReplies(parser.apply(stream, i), result);
241 | if (result.status) return result;
242 | }
243 |
244 | return result;
245 | };
246 | }
247 |
248 | /**
249 | Accepts two `ParseObject`s, and expects zero or more matches for content,
250 | separated by `separator`, yielding an array.
251 |
252 | ```haxe
253 | Parser.sepBy(
254 | Parser.oneOf('abc'),
255 | Parser.string('|')
256 | ).apply('a|b|c|c|c|a');
257 | // => {status: true, value: ['a', 'b', 'c', 'c', 'c', 'a']}
258 |
259 | Parser.sepBy(
260 | Parser.oneOf('XYZ'),
261 | Parser.string('-')
262 | ).apply('');
263 | // => {status: true, value: []}
264 | ```
265 | **/
266 | public static inline function sepBy(parser : ParseObject, separator : ParseObject) : ParseObject> {
267 | return parser.sepBy1(separator).or([].succeed());
268 | }
269 |
270 | /**
271 | This is the same as `ParseObject.sepBy`, but matches the content parser at least
272 | once.
273 | **/
274 | public static function sepBy1(parser : ParseObject, separator : ParseObject) : ParseObject> {
275 | var pairs = separator.then(parser).many();
276 |
277 | return parser.flatMap(function(r) {
278 | return pairs.map(function(rs) {
279 | return [r].concat(rs);
280 | });
281 | });
282 | }
283 |
284 | /**
285 | Returns a `ParseObject` that yield a single character if it passes the `predicate`
286 | function `String -> Bool`.
287 |
288 | ```haxe
289 | var SameUpperLower = Parser.test(function(c) {
290 | return c.toUpperCase() == c.toLowerCase();
291 | });
292 |
293 | SameUpperLower.apply('a'); // => {status: false, ...}
294 | SameUpperLower.apply('-'); // => {status: true, ...}
295 | SameUpperLower.apply(':'); // => {status: true, ...}
296 | ```
297 | **/
298 | public static function test(predicate : String -> Bool) : ParseObject {
299 | return function(stream : String, i : Int = 0) : ParseResult {
300 | var char = stream.charAt(i);
301 |
302 | return i < stream.length && predicate(char)
303 | ? ParseUtil.makeSuccess(i+1, char)
304 | : ParseUtil.makeFailure(i, 'a character matching ' + predicate);
305 | };
306 | }
307 |
308 | /**
309 | Returns a `ParseObject` yielding a string containing all the next characters that
310 | pass the `predicate : String -> Bool`.
311 |
312 | ```haxe
313 | var CustomString =
314 | Parser.string('%')
315 | .then(Parser.any())
316 | .flatMap(function(start) {
317 | var end = [
318 | '[' => ']',
319 | '(' => ')',
320 | '{' => '}',
321 | '<'=> '>'
322 | ][start];
323 | end = end != null ? end : start;
324 |
325 | return Parser.takeWhile(function(c) {
326 | return c != end;
327 | }).skip(Parser.string(end));
328 | });
329 |
330 | CustomString.apply('%:a string:'); // => {status: true, value: 'a string'}
331 | CustomString.apply('%[a string]'); // => {status: true, value: 'a string'}
332 | CustomString.apply('%{a string}'); // => {status: true, value: 'a string'}
333 | CustomString.apply('%(a string)'); // => {status: true, value: 'a string'}
334 | CustomString.apply('%'); // => {status: true, value: 'a string'}
335 | ```
336 | **/
337 | public static function takeWhile(predicate : String -> Bool) : ParseObject {
338 | return function(stream : String, i : Int = 0) : ParseResult {
339 | var j = i;
340 | while (j < stream.length && predicate(stream.charAt(j))) j += 1;
341 | return ParseUtil.makeSuccess(j, stream.substring(i, j));
342 | };
343 | }
344 |
345 | /**
346 | Returns a new `ParseObject` which tries `parser`, and if it fails uses
347 | `alternative`. Example:
348 |
349 | ```haxe
350 | var numberPrefix =
351 | Parser.string('+')
352 | .or(Parser.of('-'))
353 | .or(Parser.of(''));
354 |
355 | numberPrefix.apply('+'); // => {status: true, value: '+'}
356 | numberPrefix.apply('-'); // => {status: true, value: '-'}
357 | numberPrefix.apply(''); // => {status: true, value: ''}
358 | ```
359 | **/
360 | public static function or(parser: ParseObject, alternative : ParseObject) : ParseObject {
361 | return [parser, alternative].alt();
362 | }
363 |
364 | /**
365 | Returns a new `ParseObject` which tries `parser`, and on success calls the function
366 | `fun : A -> ParseObject` with the result of the parse, which is expected to
367 | return another parser, which will be tried next. This allows you to
368 | dynamically decide how to continue the parse, which is impossible with the
369 | other combinators.
370 |
371 | ```haxe
372 | var CustomString =
373 | Parser.string('%')
374 | .then(Parser.any())
375 | .flatMap(function(start) {
376 | var end = [
377 | '[' => ']',
378 | '(' => ')',
379 | '{' => '}',
380 | '<'=> '>'
381 | ][start];
382 | end = end != null ? end : start;
383 |
384 | return Parser.takeWhile(function(c) {
385 | return c != end;
386 | }).skip(Parser.string(end));
387 | });
388 |
389 | CustomString.apply('%:a string:'); // => {status: true, value: 'a string'}
390 | CustomString.apply('%[a string]'); // => {status: true, value: 'a string'}
391 | CustomString.apply('%{a string}'); // => {status: true, value: 'a string'}
392 | CustomString.apply('%(a string)'); // => {status: true, value: 'a string'}
393 | CustomString.apply('%'); // => {status: true, value: 'a string'}
394 | ```
395 | **/
396 | public static function flatMap(parser: ParseObject, fun : A -> ParseObject) : ParseObject {
397 | return function(stream : String, i : Int = 0) : ParseResult {
398 | var result = parser.apply(stream, i);
399 | if (!result.status) return cast(result);
400 | var nextParseObject = fun(result.value);
401 | return ParseUtil.mergeReplies(nextParseObject.apply(stream, result.index), result);
402 | };
403 | }
404 |
405 | /**
406 | Expects `next` to follow `parser`, and yields the result of `next`.
407 |
408 | ```haxe
409 | var parserA = p1.then(p2); // is equivalent to...
410 | var parserB = Parser.seq([p1, p2]).map(function(results) return results[1]);
411 | ```
412 | **/
413 | public static function then(parser: ParseObject, next : ParseObject) : ParseObject {
414 | return parser.flatMap(function(result) return next);
415 | }
416 |
417 | /**
418 | Transforms the output of `parser` with the given function `fun : A -> B`.
419 |
420 | ```haxe
421 | var pNum = Parser.regexp(~/[0-9]+/).map(Std.applyInt);
422 |
423 | pNum.apply('9'); // => {status: true, value: 9}
424 | pNum.apply('123'); // => {status: true, value: 123}
425 | pNum.apply('3.1'); // => {status: true, value: 3.1}
426 | ```
427 | **/
428 | public static function map(parser: ParseObject, fun : A -> B) : ParseObject {
429 | return function(stream : String, i : Int = 0) : ParseResult {
430 | var result = parser.apply(stream, i);
431 | if (!result.status) return cast(result);
432 | return ParseUtil.mergeReplies(ParseUtil.makeSuccess(result.index, fun(result.value)), result);
433 | };
434 | }
435 |
436 | /**
437 | Returns a new `ParseObject` with the same behavior, but which yields `value`.
438 | Equivalent to `Parser.map(parser, function(x) return x)`.
439 | **/
440 | public static function result(parser: ParseObject, value : B) : ParseObject {
441 | return parser.map(function(_) return value);
442 | }
443 |
444 | /**
445 | Expects `next` after `parser`, but yields the value of `parser`.
446 |
447 | ```haxe
448 | var parserA = p1.skip(p2); // is equivalent to...
449 | var parserB = Parser.seq([p1, p2]).map(function(results) return results[0]);
450 | ```
451 | **/
452 | public static function skip(parser: ParseObject, next : ParseObject) : ParseObject {
453 | return parser.flatMap(function(result) return next.result(result));
454 | };
455 |
456 | /**
457 | Expects `ParseObject` zero or more times, and yields an array of the results.
458 | **/
459 | public static function many(parser: ParseObject) : ParseObject> {
460 | return function(stream : String, i : Int = 0) : ParseResult> {
461 | var accum : Array = [];
462 | var result = null;
463 |
464 | while (true) {
465 | result = ParseUtil.mergeReplies(parser.apply(stream, i), result);
466 |
467 | if (result.status) {
468 | i = result.index;
469 | accum.push(result.value);
470 | } else {
471 | return ParseUtil.mergeReplies(ParseUtil.makeSuccess(i, accum), result);
472 | }
473 | }
474 | };
475 | }
476 |
477 | /**
478 | Expects `ParseObject` one or more times, and yields an array of the results.
479 | **/
480 | public static inline function many1(parser: ParseObject) : ParseObject> {
481 | return parser.atLeast(1);
482 | }
483 |
484 | /**
485 | Expects `ParseObject` between `min` and `max` times (or exactly `min` times, when
486 | `max` is omitted), and yields an array of the results.
487 | **/
488 | public static function times(parser: ParseObject, min : Int, ?max : Int) : ParseObject> {
489 | if (max == null) max = min;
490 |
491 | return function(stream : String, i : Int = 0) : ParseResult> {
492 | var accum = [];
493 | var start = i;
494 | var result = null;
495 | var prevParseResult = null;
496 |
497 | for (times in 0...min) {
498 | result = parser.apply(stream, i);
499 | prevParseResult = ParseUtil.mergeReplies(result, prevParseResult);
500 | if (result.status) {
501 | i = result.index;
502 | accum.push(result.value);
503 | } else return cast(prevParseResult);
504 | }
505 |
506 | for (times in 0...max) {
507 | result = parser.apply(stream, i);
508 | prevParseResult = ParseUtil.mergeReplies(result, prevParseResult);
509 | if (result.status) {
510 | i = result.index;
511 | accum.push(result.value);
512 | } else break;
513 | }
514 |
515 | return ParseUtil.mergeReplies(ParseUtil.makeSuccess(i, accum), prevParseResult);
516 | };
517 | }
518 |
519 | /**
520 | Expects `ParseObject` at most `n` times. Yields an array of the results.
521 | **/
522 | public static inline function atMost(parser: ParseObject, n : Int) : ParseObject> {
523 | return parser.times(0, n);
524 | }
525 |
526 | /**
527 | Expects `ParseObject` at least `n` times. Yields an array of the results.
528 | **/
529 | public static function atLeast(parser: ParseObject, n : Int) : ParseObject> {
530 | return [parser.times(n), parser.many()].seq().map(function(results) {
531 | return results[0].concat(results[1]);
532 | });
533 | }
534 |
535 | /**
536 | Returns a new `ParseObject` whose failure message is expected parameter. For example,
537 | `string('x').as('the letter x')` will indicate that 'the letter x' was
538 | expected.
539 | **/
540 | public static function as(parser: ParseObject, expected : String) : ParseObject {
541 | return function(stream : String, i : Int = 0) : ParseResult {
542 | var reply = parser.apply(stream, i);
543 | if (!reply.status) reply.expected = [expected];
544 | return reply;
545 | };
546 | }
547 |
548 | /**
549 | Accepts a function that returns a `ParseObject`, which is evaluated the first
550 | time the parser is used. This is useful for referencing parsers that haven't
551 | yet been defined, and for implementing recursive parsers.
552 |
553 | ```haxe
554 | static var Value = Parser.lazy(function() {
555 | return Parser.alt([
556 | Parser.string('x'),
557 | Parser.string('(')
558 | .then(Value)
559 | .skip(Parser.string(')'))
560 | ]);
561 | });
562 |
563 | // ...
564 | Value.apply('X'); // => {status: true, value: 'X'}
565 | Value.apply('(X)'); // => {status: true, value: 'X'}
566 | Value.apply('((X))'); // => {status: true, value: 'X'}
567 | ```
568 | **/
569 | public static function lazy(fun : Void -> ParseObject) : ParseObject {
570 | var parser : ParseObject = null;
571 |
572 | return parser = function(stream : String, i : Int = 0) : ParseResult {
573 | return (parser.apply = fun().apply)(stream, i);
574 | };
575 | }
576 |
577 | }
578 |
--------------------------------------------------------------------------------