├── .gitignore ├── .codeclimate.yml ├── test.hxml ├── src └── parsihax │ ├── ParseFunction.hx │ ├── ParseResult.hx │ ├── ParseObject.hx │ ├── ParseUtil.hx │ └── Parser.hx ├── doc.hxml ├── haxelib.json ├── .travis.yml ├── LICENSE ├── test └── parsihax │ ├── Test.hx │ ├── LispGrammar.hx │ └── JsonGrammar.hx └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | *.n 2 | /.vscode/ 3 | /bin/ 4 | /dump/ 5 | *.zip 6 | -------------------------------------------------------------------------------- /.codeclimate.yml: -------------------------------------------------------------------------------- 1 | engines: 2 | haxe-checkstyle: 3 | enabled: true 4 | ratings: 5 | paths: 6 | - "**.hx" 7 | -------------------------------------------------------------------------------- /test.hxml: -------------------------------------------------------------------------------- 1 | -cp src 2 | -cp test 3 | -lib monax 4 | -lib buddy 5 | -main parsihax.Test 6 | -neko test/parsihax/Test.n 7 | -cmd neko test/parsihax/Test.n -------------------------------------------------------------------------------- /src/parsihax/ParseFunction.hx: -------------------------------------------------------------------------------- 1 | package parsihax; 2 | 3 | /** 4 | Parsing function created by chaining Parser combinators. 5 | **/ 6 | typedef ParseFunction = String -> ?Int -> ParseResult; 7 | -------------------------------------------------------------------------------- /doc.hxml: -------------------------------------------------------------------------------- 1 | -lib monax 2 | -dce std 3 | -cp src 4 | parsihax.ParseFunction 5 | parsihax.ParseObject 6 | parsihax.ParseResult 7 | parsihax.ParseUtil 8 | parsihax.Parser 9 | -xml bin/parsihax.xml 10 | --next 11 | -cmd haxelib run dox -o bin/api -i bin --title "Parsihax - API documentation" -D source-path https://github.com/deathbeam/parsihax/blob/master/src/ 12 | -------------------------------------------------------------------------------- /haxelib.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "parsihax", 3 | "url" : "https://github.com/deathbeam/parsihax", 4 | "license": "MIT", 5 | "tags": ["parser", "parsing", "cross", "utility"], 6 | "description": "A monadic LL(infinity) parser combinator library for Haxe.", 7 | "version": "2.0.2", 8 | "classPath": "src/", 9 | "releasenote": "Haxe 4 compatibility fixes", 10 | "contributors": [ "deathbeam" ] 11 | } 12 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: haxe 2 | 3 | env: 4 | global: 5 | - GH_REF: github.com/deathbeam/parsihax.git 6 | 7 | haxe: 8 | - development 9 | 10 | before_install: 11 | - sudo apt-get update 12 | - sudo apt-get install mono-devel 13 | 14 | hxml: 15 | - build.hxml 16 | - test.hxml 17 | 18 | script: 19 | - haxe test.hxml 20 | - haxelib git dox https://github.com/HaxeFoundation/dox 21 | - rm -rf bin 22 | - mkdir bin 23 | - haxe doc.hxml 24 | 25 | deploy: 26 | provider: pages 27 | skip_cleanup: true 28 | github_token: $GH_TOKEN 29 | keep_history: true 30 | local_dir: bin/api 31 | on: 32 | branch: master 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2016 Tomas Slusny 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /src/parsihax/ParseResult.hx: -------------------------------------------------------------------------------- 1 | package parsihax; 2 | 3 | /** 4 | A structure with a boolean `status` flag, indicating whether the parse 5 | succeeded. If it succeeded, the `value` attribute will contain the yielded 6 | value. Otherwise, the `index` and `expected` attributes will contain the 7 | offset of the parse error, and a sorted, unique array of messages indicating 8 | what was expected. 9 | 10 | The error structure can be passed along with the original source to 11 | `Parser.formatError` to obtain a human-readable error string. 12 | **/ 13 | typedef ParseResult = { 14 | 15 | /** 16 | Flag, indicating whether the parse succeeded 17 | **/ 18 | var status : Bool; 19 | 20 | /** 21 | Offset of the parse error (in case of failed parse) 22 | **/ 23 | var index : Int; 24 | 25 | /** 26 | Yielded value of `Parser` (in case of successfull parse) 27 | **/ 28 | var value : T; 29 | 30 | /** 31 | Offset of last parse 32 | **/ 33 | var furthest : Int; 34 | 35 | /** 36 | A sorted, unique array of messages indicating what was expected (in case of failed parse) 37 | **/ 38 | var expected : Array; 39 | 40 | } 41 | -------------------------------------------------------------------------------- /test/parsihax/Test.hx: -------------------------------------------------------------------------------- 1 | package parsihax; 2 | 3 | import buddy.SingleSuite; 4 | using buddy.Should; 5 | 6 | class Test extends SingleSuite { 7 | public function new() { 8 | describe("Using Parsihax", { 9 | var result = false; 10 | 11 | beforeEach({ 12 | result = false; 13 | }); 14 | 15 | describe("JSON grammar", { 16 | var input = ' 17 | { 18 | "firstName": "John", 19 | "lastName": "Smith", 20 | "age": 25, 21 | "address": { 22 | "streetAddress": "21 2nd Street", 23 | "city": "New York", 24 | "state": "NY", 25 | "postalCode": "10021" 26 | }, 27 | "phoneNumber": [ 28 | { 29 | "type": "home", 30 | "number": "212 555-1234" 31 | }, 32 | { 33 | "type": "fax", 34 | "number": "646 555-4567" 35 | } 36 | ] 37 | }'; 38 | 39 | beforeEach({ 40 | result = JsonGrammar.build()(input).status; 41 | }); 42 | 43 | it('should parse "$input"', { 44 | result.should.be(true); 45 | }); 46 | }); 47 | 48 | describe("Lisp grammar", { 49 | var input = ' 50 | (if (empty brain) 51 | (print "Hello Lisp!") 52 | (print 42.0))'; 53 | 54 | beforeEach({ 55 | result = LispGrammar.build()(input).status; 56 | }); 57 | 58 | it('should parse "$input"', { 59 | result.should.be(true); 60 | }); 61 | }); 62 | 63 | }); 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/parsihax/ParseObject.hx: -------------------------------------------------------------------------------- 1 | package parsihax; 2 | 3 | import haxe.ds.Vector; 4 | 5 | /** 6 | The ParseObject object is a wrapper for a parser function. 7 | Externally, you use one to parse a string by calling 8 | `var result = SomeParseObject.apply('Me Me Me! Parse Me!');` 9 | **/ 10 | abstract ParseObject(Vector>) { 11 | 12 | inline function new() this = new Vector(1); 13 | @:to inline function get_apply() : ParseFunction return this[0]; 14 | inline function set_apply(param : ParseFunction) return this[0] = param; 15 | 16 | /** 17 | Getting `ParseObject.apply` from a parser (or explicitly casting it to 18 | `ParseFunction` returns parsing function `String -> ?Int -> ParseResult` 19 | (or just `ParseFunction`), that parses the string and returns `ParseResult`. 20 | 21 | Changing `ParseObject.apply` value changes parser behaviour, but still keeps it's 22 | reference, what is really usefull in recursive parsers. 23 | **/ 24 | public var apply(get, set): ParseFunction; 25 | 26 | /** 27 | Creates `ParseObject` from `ParseFunction` 28 | **/ 29 | @:noUsing @:from public static inline function to(v : ParseFunction) : ParseObject { 30 | var ret = new ParseObject(); 31 | ret.apply = v; 32 | return ret; 33 | } 34 | 35 | /** 36 | Same as `Hax.then(l, r)` 37 | **/ 38 | @:noUsing @:op(A + B) public static inline function opAdd(l: ParseObject, r: ParseObject): ParseObject { 39 | return Parser.then(l, r); 40 | } 41 | 42 | /** 43 | Same as `Hax.or(l, r)` 44 | **/ 45 | @:noUsing @:op(A | B) public static inline function opOr(l: ParseObject, r: ParseObject): ParseObject { 46 | return Parser.or(l, r); 47 | } 48 | 49 | /** 50 | Same as `Hax.as(l, r)` 51 | **/ 52 | @:noUsing @:op(A / B) public static inline function opDiv(l: ParseObject, r: String): ParseObject { 53 | return Parser.as(l, r); 54 | } 55 | 56 | } 57 | -------------------------------------------------------------------------------- /test/parsihax/LispGrammar.hx: -------------------------------------------------------------------------------- 1 | package parsihax; 2 | 3 | import parsihax.*; 4 | import parsihax.Parser.*; 5 | using parsihax.Parser; 6 | using parsihax.LispGrammar; 7 | 8 | // ADT definition 9 | enum LispExpression { 10 | LispNumber(v: Float); 11 | LispSymbol(v: String); 12 | LispString(v: String); 13 | LispList(v : Array); 14 | } 15 | 16 | class LispGrammar { 17 | // A little helper to wrap a parser with optional whitespace. 18 | private static inline function trim(parser : ParseObject) { 19 | return parser.skip(optWhitespace()); 20 | } 21 | 22 | public static function build() { 23 | // We need to use `empty` here because the other parsers don't exist yet. We 24 | // can't just declare this later though, because `LList` references this parser! 25 | var LExpression = empty(); 26 | 27 | // The basic parsers (usually the ones described via regexp) should have a 28 | // description for error message purposes. 29 | 30 | var LString = 31 | ~/"[^"]*"/.regexp().trim() 32 | .map(function(r) return LispString(r)) 33 | .as('string'); 34 | 35 | var LSymbol = 36 | ~/[a-zA-Z_-][a-zA-Z0-9_-]*/.regexp().trim() 37 | .map(function(r) return LispSymbol(r)) 38 | .as('symbol'); 39 | 40 | var LNumber = 41 | ~/(?=.)([+-]?([0-9]*)(\.([0-9]+))?)/.regexp().trim() 42 | .map(function(r) return LispNumber(Std.parseFloat(r))) 43 | .as('number'); 44 | 45 | // `.then` throws away the first value, and `.skip` throws away the second 46 | // `.value, so we're left with just the `LExpression.many()` part as the 47 | // `.yielded value from this parser. 48 | var LList = 49 | '('.string().trim() 50 | .then(LExpression.many()) 51 | .skip(')'.string().trim()) 52 | .map(function(r) return LispList(r)); 53 | 54 | // Initialize LExpression now because of before recursion by modifying magical .apply field 55 | LExpression.apply = [ 56 | LSymbol, 57 | LNumber, 58 | LString, 59 | LList 60 | ].alt(); 61 | 62 | // Let's remember to throw away whitespace at the top level of the parser. 63 | return optWhitespace().then(LExpression).apply; 64 | } 65 | } 66 | -------------------------------------------------------------------------------- /src/parsihax/ParseUtil.hx: -------------------------------------------------------------------------------- 1 | package parsihax; 2 | 3 | class ParseUtil { 4 | 5 | /** 6 | Obtain a human-readable error `String`. 7 | **/ 8 | public static function formatError(result : ParseResult, stream : String) : String { 9 | var sexpected = result.expected.length == 1 10 | ? result.expected[0] 11 | : 'one of ' + result.expected.join(', '); 12 | 13 | var indexOffset = result.furthest; 14 | var lines = stream.substring(0, indexOffset).split("\n"); 15 | var lineWeAreUpTo = lines.length; 16 | var columnWeAreUpTo = lines[lines.length - 1].length + 1; 17 | 18 | var got = ''; 19 | 20 | if (indexOffset == stream.length) { 21 | got = ', got the end of the stream'; 22 | } else { 23 | var prefix = (indexOffset > 0 ? "'..." : "'"); 24 | var suffix = (stream.length - indexOffset > 12 ? "...'" : "'"); 25 | 26 | got = ' at line ' + lineWeAreUpTo + ' column ' + columnWeAreUpTo 27 | + ', got ' + prefix + stream.substring(indexOffset, indexOffset + 12) + suffix; 28 | } 29 | 30 | return 'expected ' + sexpected + got; 31 | } 32 | 33 | /** 34 | Create successfull `ParseResult` with specified `index` and `value`. 35 | **/ 36 | @:allow(parsihax.Parser) 37 | private static inline function makeSuccess(index : Int, value : A) : ParseResult { 38 | return { 39 | status: true, 40 | index: index, 41 | value: value, 42 | furthest: -1, 43 | expected: [] 44 | }; 45 | } 46 | 47 | /** 48 | Create failed `ParseResult` with specified `index` and `expected` input. 49 | **/ 50 | @:allow(parsihax.Parser) 51 | private static inline function makeFailure(index : Int, expected : String) : ParseResult { 52 | return { 53 | status: false, 54 | index: -1, 55 | value: null, 56 | furthest: index, 57 | expected: [expected] 58 | }; 59 | } 60 | 61 | /** 62 | Merge `result` and `last` into single `ParseResult`. 63 | **/ 64 | @:allow(parsihax.Parser) 65 | private static function mergeReplies(result : ParseResult, ?last : ParseResult) : ParseResult { 66 | if (last == null) return result; 67 | if (result.furthest > last.furthest) return result; 68 | 69 | var expected = (result.furthest == last.furthest) 70 | ? unsafeUnion(result.expected, last.expected) 71 | : last.expected; 72 | 73 | return { 74 | status: result.status, 75 | index: result.index, 76 | value: result.value, 77 | furthest: last.furthest, 78 | expected: expected 79 | } 80 | } 81 | 82 | /** 83 | Create unsafe union from two string arrays `xs` and `ys`. 84 | **/ 85 | private static function unsafeUnion(xs : Array, ys : Array) : Array { 86 | if (xs.length == 0) { 87 | return ys; 88 | } else if (ys.length == 0) { 89 | return xs; 90 | } 91 | 92 | var result = xs.concat(ys); 93 | 94 | result.sort(function(a, b):Int { 95 | a = a.toLowerCase(); 96 | b = b.toLowerCase(); 97 | if (a < b) return -1; 98 | if (a > b) return 1; 99 | return 0; 100 | }); 101 | 102 | return result; 103 | } 104 | 105 | } 106 | 107 | 108 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Parsihax 2 | [![TravisCI Build Status][travis-img]][travis] 3 | 4 | Parsihax is a small library for writing big parsers made up of lots of little parsers. The API is inspired by 5 | [parsec][] and [Parsimmon][parsimmon] (originally, Parsihax was just supposed to be Parsimmon rewrite in Haxe). 6 | 7 | ### Installation 8 | 9 | Install the library via [haxelib][] (library manager that comes with any Haxe distribution). 10 | 11 | ``` 12 | haxelib install parsihax 13 | ``` 14 | 15 | ## API Documentation 16 | 17 | Haxe-generated API documentation is available at [documentation website][docs], or see the 18 | [annotated source of `parsihax.Parser.hx`.][parsihax] 19 | 20 | ## Examples 21 | 22 | See the [test][] directory for annotated examples of parsing JSON, simple Lisp-like structure and monad parser. 23 | 24 | ## Basics 25 | To use nice sugar syntax, simply add this to your Haxe file 26 | 27 | ```haxe 28 | import parsihax.*; 29 | import parsihax.Parser.*; 30 | using parsihax.Parser; 31 | ``` 32 | 33 | A `ParseObject` parser is an abstract that represents an action on a stream of text, and the promise of either an 34 | object yielded by that action on success or a message in case of failure. For example, `Parser.string('foo')` yields 35 | the string `'foo'` if the beginning of the stream is `'foo'`, and otherwise fails. 36 | 37 | The method `.map` is used to transform the yielded value. For example, 38 | 39 | ```haxe 40 | 'foo'.string() 41 | .map(function(x) return x + 'bar'); 42 | ``` 43 | 44 | will yield `'foobar'` if the stream starts with `'foo'`. The parser 45 | 46 | ```haxe 47 | ~/[0-9]+/.regexp() 48 | .map(function(x) return Std.parseInt(x) * 2); 49 | ``` 50 | 51 | will yield the number `24` when it encounters the string `'12'`. 52 | 53 | Also, Parsihax supports nice sugar syntax thanks to Haxe operator overloading. For example, 54 | 55 | ```haxe 56 | var a = "a".string() / "important letter a" 57 | var b = "b".string() / "important letter b" 58 | var c = "c".string() / "important letter c" 59 | 60 | var result = a | b + c; 61 | 62 | // Will succeed on "ac" and "bc" 63 | // In case of failure, it will throw "expected important letter a|b|c" 64 | // So, plus operator is alias to then, or operator to or and div 65 | // operator to as 66 | ``` 67 | 68 | Getting `apply` from a `ParseObject` (or explicitly casting it to `ParseFunction` returns parsing function 69 | `String -> ?Int -> Result` (or just `ParseFunction`), that parses the string and returns a `Hax.Result` 70 | with a boolean `status` flag, indicating whether the parse succeeded. If it succeeded, the `value` attribute will 71 | contain the yielded value. Otherwise, the `index` and `expected` attributes will contain the offset of the parse error, 72 | and a sorted, unique array of messages indicating what was expected. 73 | 74 | The error object can be passed along with the original source to `ParseUtil.formatError` to obtain 75 | a human-readable error string. 76 | 77 | Changing `ParseObject.apply` value changes `ParseObject` behaviour, but still keeps it's reference, what is 78 | really usefull in recursive parsers. 79 | 80 | [travis]: https://travis-ci.org/deathbeam/parsihax 81 | [travis-img]: https://api.travis-ci.org/deathbeam/parsihax.svg?branch=master 82 | [haxelib]: http://lib.haxe.org/p/parsihax 83 | [docs]: https://deathbeam.github.io/parsihax/parsihax/Parser.html 84 | [parsihax]: https://github.com/deathbeam/parsihax/blob/master/src/parsihax/Parser.hx 85 | [test]: https://github.com/deathbeam/parsihax/tree/master/test/parsihax 86 | [parsec]: https://hackage.haskell.org/package/parsec 87 | [parsimmon]: https://github.com/jneen/parsimmon 88 | -------------------------------------------------------------------------------- /test/parsihax/JsonGrammar.hx: -------------------------------------------------------------------------------- 1 | package parsihax; 2 | 3 | import parsihax.*; 4 | import parsihax.Parser.*; 5 | using parsihax.Parser; 6 | 7 | // ADT definition 8 | enum JsonExpression { 9 | JsonNull; 10 | JsonTrue; 11 | JsonFalse; 12 | JsonNumber(v : Float); 13 | JsonString(v : String); 14 | JsonPair(k : JsonExpression, v : JsonExpression); 15 | JsonArray(v : Array); 16 | JsonObject(v : Array); 17 | } 18 | 19 | class JsonGrammar { 20 | // This is the main entry point of the parser: a full Json document. 21 | static var json = (function() { 22 | return whitespace.then([ 23 | object, 24 | array, 25 | stringLiteral, 26 | numberLiteral, 27 | nullLiteral, 28 | trueLiteral, 29 | falseLiteral 30 | ].alt()); 31 | }).lazy(); 32 | 33 | // Use the Json standard's definition of whitespace rather than Parsihax's. 34 | static var whitespace = ~/\s*/m.regexp(); 35 | 36 | // Json is pretty relaxed about whitespace, so let's make it easy to ignore 37 | // after most text. 38 | static function token(parser) { 39 | return skip(parser, whitespace); 40 | } 41 | 42 | // This gets reused for both array and object parsing. 43 | static function commaSep(parser) { 44 | return sepBy(parser, token(','.string())); 45 | } 46 | 47 | // The basic tokens in Json, with optional whitespace afterward. 48 | static var lbrace = token('{'.string()); 49 | static var rbrace = token('}'.string()); 50 | static var lbracket = token('['.string()); 51 | static var rbracket = token(']'.string()); 52 | static var comma = token(','.string()); 53 | static var colon = token(':'.string()); 54 | 55 | // `.result` is like `.map` but it takes a value instead of a function, and 56 | // `.always returns the same value. 57 | static var nullLiteral = token('null'.string()).result(JsonNull); 58 | static var trueLiteral = token('true'.string()).result(JsonTrue); 59 | static var falseLiteral = token('false'.string()).result(JsonFalse); 60 | 61 | // regexp based parsers should generally be named for better error reporting. 62 | static var stringLiteral = 63 | token(~/"((?:\\.|.)*?)"/.regexp(1)) 64 | // Turn escaped characters into real ones (e.g. "\\n" becoems "\n"). 65 | .map(function interpretEscapes(str) { 66 | var escapes = [ 67 | 'b' => '\\b', 68 | 'f' => '\\f', 69 | 'n' => '\\n', 70 | 'r' => '\\r', 71 | 't' => '\\t' 72 | ]; 73 | 74 | return JsonString(~/\\(u[0-9a-fA-F]{4}|[^u])/.map(str, function(reg) { 75 | var escape = reg.matched(0); 76 | var type = escape.charAt(0); 77 | var hex = escape.substr(1); 78 | if (type == 'u') return String.fromCharCode(Std.parseInt(hex)); 79 | if (escapes.exists(type)) return escapes[type]; 80 | return type; 81 | })); 82 | }).as('string'); 83 | 84 | static var numberLiteral = 85 | token(~/-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]+)?/.regexp()) 86 | .map(function(result) return JsonNumber(Std.parseFloat(result))) 87 | .as('number'); 88 | 89 | // Array parsing is just ignoring brackets and commas and parsing as many nested 90 | // Json documents as possible. Notice that we're using the parser `json` we just 91 | // defined above. Arrays and objects in the Json grammar are recursive because 92 | // they can contain any other Json document within them. 93 | static var array = lbracket.then(commaSep(json)).skip(rbracket) 94 | .map(function(results) return JsonArray(results)); 95 | 96 | // Object parsing is a little trickier because we have to collect all the key- 97 | // value pairs in order as length-2 arrays, then manually copy them into an 98 | // object. 99 | static var pair = 100 | [stringLiteral.skip(colon), json].seq() 101 | .map(function(results) return JsonPair(results[0], results[1])); 102 | 103 | static var object = 104 | lbrace.then(commaSep(pair)).skip(rbrace) 105 | .map(function(pairs) return JsonObject(pairs)); 106 | 107 | public static function build() { 108 | return json.apply; 109 | } 110 | } 111 | -------------------------------------------------------------------------------- /src/parsihax/Parser.hx: -------------------------------------------------------------------------------- 1 | package parsihax; 2 | 3 | using parsihax.Parser; 4 | 5 | /** 6 | Defines grammar and encapsulates parsing logic. A `ParseObject` takes as input a 7 | `String` source and parses it when the `ParseObject.apply` method is called. 8 | A structure `ParseResult` is returned. 9 | **/ 10 | class Parser { 11 | 12 | /** 13 | Yields current position in stream 14 | **/ 15 | public static function index() : ParseObject { 16 | return function(stream : String, i : Int = 0) : ParseResult { 17 | return ParseUtil.makeSuccess(i, i); 18 | }; 19 | } 20 | 21 | /** 22 | Equivalent to `Parser.regexp(~/[a-z]/i)` 23 | **/ 24 | public static inline function letter() : ParseObject { 25 | return ~/[a-z]/i.regexp().as('a letter'); 26 | } 27 | 28 | /** 29 | Equivalent to `Parser.regexp(~/[a-z]* /i)` 30 | **/ 31 | public static inline function letters() : ParseObject { 32 | return ~/[a-z]*/i.regexp(); 33 | } 34 | 35 | /** 36 | Equivalent to `Parser.regexp(~/[0-9]/)` 37 | **/ 38 | public static inline function digit() : ParseObject { 39 | return ~/[0-9]/.regexp().as('a digit'); 40 | } 41 | 42 | /** 43 | Equivalent to `Parser.regexp(~/[0-9]* /)` 44 | **/ 45 | public static inline function digits() : ParseObject { 46 | return ~/[0-9]*/.regexp(); 47 | } 48 | 49 | /** 50 | Equivalent to `Parser.regexp(~/\s+/)` 51 | **/ 52 | public static inline function whitespace() : ParseObject { 53 | return ~/\s+/.regexp().as('whitespace'); 54 | } 55 | 56 | /** 57 | Equivalent to `Parser.regexp(~/\s* /)` 58 | **/ 59 | public static inline function optWhitespace() : ParseObject { 60 | return ~/\s*/.regexp(); 61 | } 62 | 63 | /** 64 | A `ParseObject` that consumes and yields the next character of the stream. 65 | **/ 66 | public static function any() : ParseObject { 67 | return function(stream : String, i : Int = 0) : ParseResult { 68 | return i >= stream.length 69 | ? ParseUtil.makeFailure(i, 'any character') 70 | : ParseUtil.makeSuccess(i+1, stream.charAt(i)); 71 | }; 72 | } 73 | 74 | /** 75 | A `ParseObject` that consumes and yields the entire remainder of the stream. 76 | **/ 77 | public static function all() : ParseObject { 78 | return function(stream : String, i : Int = 0) : ParseResult { 79 | return ParseUtil.makeSuccess(stream.length, stream.substring(i)); 80 | }; 81 | } 82 | 83 | /** 84 | A `ParseObject` that expects to be at the end of the stream (zero characters left). 85 | **/ 86 | public static function eof() : ParseObject { 87 | return function(stream : String, i : Int = 0) : ParseResult { 88 | return i < stream.length 89 | ? ParseUtil.makeFailure(i, 'EOF') 90 | : ParseUtil.makeSuccess(i, null); 91 | }; 92 | } 93 | 94 | /** 95 | Returns a `ParseObject` that looks for `String` and yields that exact value. 96 | **/ 97 | public static function string(string : String) : ParseObject { 98 | var len = string.length; 99 | var expected = "'"+string+"'"; 100 | 101 | return function(stream : String, i : Int = 0) : ParseResult { 102 | var head = stream.substring(i, i + len); 103 | 104 | if (head == string) { 105 | return ParseUtil.makeSuccess(i+len, head); 106 | } else { 107 | return ParseUtil.makeFailure(i, expected); 108 | } 109 | }; 110 | } 111 | 112 | /** 113 | Returns a `ParseObject` that looks for exactly one character from `String` and 114 | yields that exact value. This combinator is faster than `Parser.string` 115 | in case of matching single character. 116 | **/ 117 | public static function char(character : String) : ParseObject { 118 | return (function(ch) { return character == ch; }).test().as("'"+character+"'"); 119 | } 120 | 121 | /** 122 | Returns a `ParseObject` that looks for exactly one character from `String`, and 123 | yields that character. 124 | **/ 125 | public static function oneOf(string : String) : ParseObject { 126 | return (function(ch) { return string.indexOf(ch) >= 0; }).test(); 127 | } 128 | 129 | /** 130 | Returns a `ParseObject` that looks for exactly one character NOT from `String`, 131 | and yields that character. 132 | **/ 133 | public static function noneOf(string : String) : ParseObject { 134 | return (function(ch) { return string.indexOf(ch) < 0; }).test(); 135 | } 136 | 137 | /** 138 | Returns a `ParseObject` that looks for a match to the `EReg` and yields the given 139 | match group (defaulting to the entire match). The `EReg` will always match 140 | starting at the current parse location. The regexp may only use the 141 | following flags: imu. Any other flag will result in some weird behaviour. 142 | **/ 143 | public static function regexp(re : EReg, group : Int = 0) : ParseObject { 144 | var expected = Std.string(re); 145 | 146 | return function(stream : String, i : Int = 0) : ParseResult { 147 | var match = re.match(stream.substring(i)); 148 | 149 | if (match) { 150 | var groupMatch = re.matched(group); 151 | var pos = re.matchedPos(); 152 | if (groupMatch != null && pos.pos == 0) { 153 | return ParseUtil.makeSuccess(i + pos.len, groupMatch); 154 | } 155 | } 156 | 157 | return ParseUtil.makeFailure(i, expected); 158 | }; 159 | } 160 | 161 | /** 162 | Returns a `ParseObject` that doesn't consume any of the string, and yields 163 | `value`. 164 | **/ 165 | public static function succeed(value : A) : ParseObject { 166 | return function(stream : String, i : Int = 0) : ParseResult { 167 | return ParseUtil.makeSuccess(i, value); 168 | }; 169 | } 170 | 171 | /** 172 | Returns a failing `ParseObject` with the given `expected` message. 173 | **/ 174 | public static function fail(expected : String) : ParseObject { 175 | return function(stream : String, i : Int = 0) : ParseResult { 176 | return ParseUtil.makeFailure(i, expected); 177 | } 178 | } 179 | 180 | /** 181 | Returns a new failed `ParseObject` with 'empty' message 182 | **/ 183 | public static function empty() : ParseObject { 184 | return fail('empty'); 185 | } 186 | 187 | /** 188 | Accepts an array of parsers `Array` and returns a new 189 | `ParseObject` that expects them to match in order, yielding an array of 190 | all their results. 191 | **/ 192 | public static function seq(parsers : Array>) : ParseObject> { 193 | if (parsers.length == 0) return fail('sequence of parsers'); 194 | 195 | return function(stream : String, i : Int = 0) : ParseResult> { 196 | var result : ParseResult = null; 197 | var accum : Array = []; 198 | 199 | for (parser in parsers) { 200 | result = ParseUtil.mergeReplies(parser.apply(stream, i), result); 201 | if (!result.status) return cast(result); 202 | accum.push(result.value); 203 | i = result.index; 204 | } 205 | 206 | return ParseUtil.mergeReplies(ParseUtil.makeSuccess(i, accum), result); 207 | }; 208 | } 209 | 210 | /** 211 | Accepts an array of parsers `Array`, yielding the value of the first 212 | one that succeeds, backtracking in between. This means that the order of 213 | parsers matters. If two parsers match the same prefix, the longer of the two 214 | must come first. 215 | 216 | ```haxe 217 | Parser.alt([ 218 | Parser.string('ab'), 219 | Parser.string('a') 220 | ]).apply('ab'); 221 | // => {status: true, value: 'ab'} 222 | 223 | Parser.alt([ 224 | Parser.string('a'), 225 | Parser.string('ab') 226 | ]).apply('ab'); 227 | // => {status: false, ...} 228 | ``` 229 | 230 | In the second case, `Parser.alt` matches on the first parser, then 231 | there are extra characters left over (`'b'`), so `ParseObject` returns a failure. 232 | **/ 233 | public static function alt(parsers : Array>) : ParseObject { 234 | if (parsers.length == 0) return fail('at least one alt'); 235 | 236 | return function(stream : String, i : Int = 0) : ParseResult { 237 | var result : ParseResult = null; 238 | 239 | for (parser in parsers) { 240 | result = ParseUtil.mergeReplies(parser.apply(stream, i), result); 241 | if (result.status) return result; 242 | } 243 | 244 | return result; 245 | }; 246 | } 247 | 248 | /** 249 | Accepts two `ParseObject`s, and expects zero or more matches for content, 250 | separated by `separator`, yielding an array. 251 | 252 | ```haxe 253 | Parser.sepBy( 254 | Parser.oneOf('abc'), 255 | Parser.string('|') 256 | ).apply('a|b|c|c|c|a'); 257 | // => {status: true, value: ['a', 'b', 'c', 'c', 'c', 'a']} 258 | 259 | Parser.sepBy( 260 | Parser.oneOf('XYZ'), 261 | Parser.string('-') 262 | ).apply(''); 263 | // => {status: true, value: []} 264 | ``` 265 | **/ 266 | public static inline function sepBy(parser : ParseObject, separator : ParseObject) : ParseObject> { 267 | return parser.sepBy1(separator).or([].succeed()); 268 | } 269 | 270 | /** 271 | This is the same as `ParseObject.sepBy`, but matches the content parser at least 272 | once. 273 | **/ 274 | public static function sepBy1(parser : ParseObject, separator : ParseObject) : ParseObject> { 275 | var pairs = separator.then(parser).many(); 276 | 277 | return parser.flatMap(function(r) { 278 | return pairs.map(function(rs) { 279 | return [r].concat(rs); 280 | }); 281 | }); 282 | } 283 | 284 | /** 285 | Returns a `ParseObject` that yield a single character if it passes the `predicate` 286 | function `String -> Bool`. 287 | 288 | ```haxe 289 | var SameUpperLower = Parser.test(function(c) { 290 | return c.toUpperCase() == c.toLowerCase(); 291 | }); 292 | 293 | SameUpperLower.apply('a'); // => {status: false, ...} 294 | SameUpperLower.apply('-'); // => {status: true, ...} 295 | SameUpperLower.apply(':'); // => {status: true, ...} 296 | ``` 297 | **/ 298 | public static function test(predicate : String -> Bool) : ParseObject { 299 | return function(stream : String, i : Int = 0) : ParseResult { 300 | var char = stream.charAt(i); 301 | 302 | return i < stream.length && predicate(char) 303 | ? ParseUtil.makeSuccess(i+1, char) 304 | : ParseUtil.makeFailure(i, 'a character matching ' + predicate); 305 | }; 306 | } 307 | 308 | /** 309 | Returns a `ParseObject` yielding a string containing all the next characters that 310 | pass the `predicate : String -> Bool`. 311 | 312 | ```haxe 313 | var CustomString = 314 | Parser.string('%') 315 | .then(Parser.any()) 316 | .flatMap(function(start) { 317 | var end = [ 318 | '[' => ']', 319 | '(' => ')', 320 | '{' => '}', 321 | '<'=> '>' 322 | ][start]; 323 | end = end != null ? end : start; 324 | 325 | return Parser.takeWhile(function(c) { 326 | return c != end; 327 | }).skip(Parser.string(end)); 328 | }); 329 | 330 | CustomString.apply('%:a string:'); // => {status: true, value: 'a string'} 331 | CustomString.apply('%[a string]'); // => {status: true, value: 'a string'} 332 | CustomString.apply('%{a string}'); // => {status: true, value: 'a string'} 333 | CustomString.apply('%(a string)'); // => {status: true, value: 'a string'} 334 | CustomString.apply('%'); // => {status: true, value: 'a string'} 335 | ``` 336 | **/ 337 | public static function takeWhile(predicate : String -> Bool) : ParseObject { 338 | return function(stream : String, i : Int = 0) : ParseResult { 339 | var j = i; 340 | while (j < stream.length && predicate(stream.charAt(j))) j += 1; 341 | return ParseUtil.makeSuccess(j, stream.substring(i, j)); 342 | }; 343 | } 344 | 345 | /** 346 | Returns a new `ParseObject` which tries `parser`, and if it fails uses 347 | `alternative`. Example: 348 | 349 | ```haxe 350 | var numberPrefix = 351 | Parser.string('+') 352 | .or(Parser.of('-')) 353 | .or(Parser.of('')); 354 | 355 | numberPrefix.apply('+'); // => {status: true, value: '+'} 356 | numberPrefix.apply('-'); // => {status: true, value: '-'} 357 | numberPrefix.apply(''); // => {status: true, value: ''} 358 | ``` 359 | **/ 360 | public static function or(parser: ParseObject, alternative : ParseObject) : ParseObject { 361 | return [parser, alternative].alt(); 362 | } 363 | 364 | /** 365 | Returns a new `ParseObject` which tries `parser`, and on success calls the function 366 | `fun : A -> ParseObject` with the result of the parse, which is expected to 367 | return another parser, which will be tried next. This allows you to 368 | dynamically decide how to continue the parse, which is impossible with the 369 | other combinators. 370 | 371 | ```haxe 372 | var CustomString = 373 | Parser.string('%') 374 | .then(Parser.any()) 375 | .flatMap(function(start) { 376 | var end = [ 377 | '[' => ']', 378 | '(' => ')', 379 | '{' => '}', 380 | '<'=> '>' 381 | ][start]; 382 | end = end != null ? end : start; 383 | 384 | return Parser.takeWhile(function(c) { 385 | return c != end; 386 | }).skip(Parser.string(end)); 387 | }); 388 | 389 | CustomString.apply('%:a string:'); // => {status: true, value: 'a string'} 390 | CustomString.apply('%[a string]'); // => {status: true, value: 'a string'} 391 | CustomString.apply('%{a string}'); // => {status: true, value: 'a string'} 392 | CustomString.apply('%(a string)'); // => {status: true, value: 'a string'} 393 | CustomString.apply('%'); // => {status: true, value: 'a string'} 394 | ``` 395 | **/ 396 | public static function flatMap(parser: ParseObject, fun : A -> ParseObject) : ParseObject { 397 | return function(stream : String, i : Int = 0) : ParseResult { 398 | var result = parser.apply(stream, i); 399 | if (!result.status) return cast(result); 400 | var nextParseObject = fun(result.value); 401 | return ParseUtil.mergeReplies(nextParseObject.apply(stream, result.index), result); 402 | }; 403 | } 404 | 405 | /** 406 | Expects `next` to follow `parser`, and yields the result of `next`. 407 | 408 | ```haxe 409 | var parserA = p1.then(p2); // is equivalent to... 410 | var parserB = Parser.seq([p1, p2]).map(function(results) return results[1]); 411 | ``` 412 | **/ 413 | public static function then(parser: ParseObject, next : ParseObject) : ParseObject { 414 | return parser.flatMap(function(result) return next); 415 | } 416 | 417 | /** 418 | Transforms the output of `parser` with the given function `fun : A -> B`. 419 | 420 | ```haxe 421 | var pNum = Parser.regexp(~/[0-9]+/).map(Std.applyInt); 422 | 423 | pNum.apply('9'); // => {status: true, value: 9} 424 | pNum.apply('123'); // => {status: true, value: 123} 425 | pNum.apply('3.1'); // => {status: true, value: 3.1} 426 | ``` 427 | **/ 428 | public static function map(parser: ParseObject, fun : A -> B) : ParseObject { 429 | return function(stream : String, i : Int = 0) : ParseResult { 430 | var result = parser.apply(stream, i); 431 | if (!result.status) return cast(result); 432 | return ParseUtil.mergeReplies(ParseUtil.makeSuccess(result.index, fun(result.value)), result); 433 | }; 434 | } 435 | 436 | /** 437 | Returns a new `ParseObject` with the same behavior, but which yields `value`. 438 | Equivalent to `Parser.map(parser, function(x) return x)`. 439 | **/ 440 | public static function result(parser: ParseObject, value : B) : ParseObject { 441 | return parser.map(function(_) return value); 442 | } 443 | 444 | /** 445 | Expects `next` after `parser`, but yields the value of `parser`. 446 | 447 | ```haxe 448 | var parserA = p1.skip(p2); // is equivalent to... 449 | var parserB = Parser.seq([p1, p2]).map(function(results) return results[0]); 450 | ``` 451 | **/ 452 | public static function skip(parser: ParseObject, next : ParseObject) : ParseObject { 453 | return parser.flatMap(function(result) return next.result(result)); 454 | }; 455 | 456 | /** 457 | Expects `ParseObject` zero or more times, and yields an array of the results. 458 | **/ 459 | public static function many(parser: ParseObject) : ParseObject> { 460 | return function(stream : String, i : Int = 0) : ParseResult> { 461 | var accum : Array = []; 462 | var result = null; 463 | 464 | while (true) { 465 | result = ParseUtil.mergeReplies(parser.apply(stream, i), result); 466 | 467 | if (result.status) { 468 | i = result.index; 469 | accum.push(result.value); 470 | } else { 471 | return ParseUtil.mergeReplies(ParseUtil.makeSuccess(i, accum), result); 472 | } 473 | } 474 | }; 475 | } 476 | 477 | /** 478 | Expects `ParseObject` one or more times, and yields an array of the results. 479 | **/ 480 | public static inline function many1(parser: ParseObject) : ParseObject> { 481 | return parser.atLeast(1); 482 | } 483 | 484 | /** 485 | Expects `ParseObject` between `min` and `max` times (or exactly `min` times, when 486 | `max` is omitted), and yields an array of the results. 487 | **/ 488 | public static function times(parser: ParseObject, min : Int, ?max : Int) : ParseObject> { 489 | if (max == null) max = min; 490 | 491 | return function(stream : String, i : Int = 0) : ParseResult> { 492 | var accum = []; 493 | var start = i; 494 | var result = null; 495 | var prevParseResult = null; 496 | 497 | for (times in 0...min) { 498 | result = parser.apply(stream, i); 499 | prevParseResult = ParseUtil.mergeReplies(result, prevParseResult); 500 | if (result.status) { 501 | i = result.index; 502 | accum.push(result.value); 503 | } else return cast(prevParseResult); 504 | } 505 | 506 | for (times in 0...max) { 507 | result = parser.apply(stream, i); 508 | prevParseResult = ParseUtil.mergeReplies(result, prevParseResult); 509 | if (result.status) { 510 | i = result.index; 511 | accum.push(result.value); 512 | } else break; 513 | } 514 | 515 | return ParseUtil.mergeReplies(ParseUtil.makeSuccess(i, accum), prevParseResult); 516 | }; 517 | } 518 | 519 | /** 520 | Expects `ParseObject` at most `n` times. Yields an array of the results. 521 | **/ 522 | public static inline function atMost(parser: ParseObject, n : Int) : ParseObject> { 523 | return parser.times(0, n); 524 | } 525 | 526 | /** 527 | Expects `ParseObject` at least `n` times. Yields an array of the results. 528 | **/ 529 | public static function atLeast(parser: ParseObject, n : Int) : ParseObject> { 530 | return [parser.times(n), parser.many()].seq().map(function(results) { 531 | return results[0].concat(results[1]); 532 | }); 533 | } 534 | 535 | /** 536 | Returns a new `ParseObject` whose failure message is expected parameter. For example, 537 | `string('x').as('the letter x')` will indicate that 'the letter x' was 538 | expected. 539 | **/ 540 | public static function as(parser: ParseObject, expected : String) : ParseObject { 541 | return function(stream : String, i : Int = 0) : ParseResult { 542 | var reply = parser.apply(stream, i); 543 | if (!reply.status) reply.expected = [expected]; 544 | return reply; 545 | }; 546 | } 547 | 548 | /** 549 | Accepts a function that returns a `ParseObject`, which is evaluated the first 550 | time the parser is used. This is useful for referencing parsers that haven't 551 | yet been defined, and for implementing recursive parsers. 552 | 553 | ```haxe 554 | static var Value = Parser.lazy(function() { 555 | return Parser.alt([ 556 | Parser.string('x'), 557 | Parser.string('(') 558 | .then(Value) 559 | .skip(Parser.string(')')) 560 | ]); 561 | }); 562 | 563 | // ... 564 | Value.apply('X'); // => {status: true, value: 'X'} 565 | Value.apply('(X)'); // => {status: true, value: 'X'} 566 | Value.apply('((X))'); // => {status: true, value: 'X'} 567 | ``` 568 | **/ 569 | public static function lazy(fun : Void -> ParseObject) : ParseObject { 570 | var parser : ParseObject = null; 571 | 572 | return parser = function(stream : String, i : Int = 0) : ParseResult { 573 | return (parser.apply = fun().apply)(stream, i); 574 | }; 575 | } 576 | 577 | } 578 | --------------------------------------------------------------------------------