├── .gitignore ├── pom.xml ├── src └── main │ └── java │ └── dk │ └── ahnfelt │ └── parsercombinator │ ├── examples │ ├── Json.java │ ├── Main.java │ └── JsonParser.java │ ├── Parser.java │ └── Parsers.java └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | target/ 3 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 2 | 5 | 4.0.0 6 | 7 | dk.ahnfelt.parsercombinator 8 | core 9 | 1.0-SNAPSHOT 10 | 11 | 12 | UTF-8 13 | 14 | 15 | 16 | 17 | junit 18 | junit 19 | 4.11 20 | 21 | 22 | 23 | 24 | 25 | 26 | org.apache.maven.plugins 27 | maven-compiler-plugin 28 | 3.1 29 | 30 | 1.8 31 | 1.8 32 | 33 | 34 | 35 | 36 | 37 | 38 | -------------------------------------------------------------------------------- /src/main/java/dk/ahnfelt/parsercombinator/examples/Json.java: -------------------------------------------------------------------------------- 1 | package dk.ahnfelt.parsercombinator.examples; 2 | 3 | import dk.ahnfelt.parsercombinator.Parsers.Pair; 4 | 5 | import java.util.List; 6 | 7 | public class Json { 8 | 9 | public static class JsonNull extends Json {} 10 | 11 | public static class JsonBoolean extends Json { 12 | public final boolean value; 13 | 14 | public JsonBoolean(boolean value) { 15 | this.value = value; 16 | } 17 | } 18 | 19 | public static class JsonNumber extends Json { 20 | public final double value; 21 | 22 | public JsonNumber(double value) { 23 | this.value = value; 24 | } 25 | } 26 | 27 | public static class JsonString extends Json { 28 | public final String value; 29 | 30 | public JsonString(String value) { 31 | this.value = value; 32 | } 33 | } 34 | 35 | public static class JsonArray extends Json { 36 | public final List value; 37 | 38 | public JsonArray(List value) { 39 | this.value = value; 40 | } 41 | } 42 | 43 | public static class JsonObject extends Json { 44 | public final List> value; 45 | 46 | public JsonObject(List> value) { 47 | this.value = value; 48 | } 49 | } 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/dk/ahnfelt/parsercombinator/examples/Main.java: -------------------------------------------------------------------------------- 1 | package dk.ahnfelt.parsercombinator.examples; 2 | 3 | import dk.ahnfelt.parsercombinator.Parser; 4 | 5 | import java.util.Arrays; 6 | import java.util.function.Function; 7 | 8 | import static dk.ahnfelt.parsercombinator.Parsers.*; 9 | 10 | public class Main { 11 | 12 | public static void main(String[] arguments) throws Failure { 13 | 14 | String input = "{ \"name\": \"anna\", \"age\": 21, \"interests\": [\"diving\", \"programming\"] }"; 15 | 16 | Json output = JsonParser.jsonP.parse(input); 17 | 18 | System.out.println(output); 19 | 20 | // ifStatement() 21 | } 22 | 23 | public static void ifStatement() throws Failure { 24 | 25 | String test = "if a then b elseif c then d elseif c2 then d2 else e end"; 26 | 27 | Parser> parseElseIf = 28 | skip(keyword("elseif")). 29 | then(variable). 30 | skip(keyword("then")). 31 | then(variable); 32 | 33 | Parser parseIf = 34 | skip(keyword("if")).then(variable). 35 | skip(keyword("then")).then(variable). 36 | then(parseElseIf.zeroOrMore()). 37 | then(skip(keyword("else")).then(variable).optional()). 38 | skip(keyword("end")). 39 | skip(end()). 40 | map(match((condition, then, elseIfs, otherwise) -> elseIfs.toString())); 41 | 42 | System.out.println(parseIf.parse(test)); 43 | 44 | } 45 | 46 | private static Parser token = regex("([a-z][a-z0-9]*)(\\s+|$)").map(m -> m.group(1)); 47 | private static Parser variable = token.filter(t -> !Arrays.asList("if", "then", "else", "elseif", "end").contains(t)); 48 | private static Parser keyword(String k) { return token.filter(k::equals); } 49 | 50 | 51 | } 52 | -------------------------------------------------------------------------------- /src/main/java/dk/ahnfelt/parsercombinator/examples/JsonParser.java: -------------------------------------------------------------------------------- 1 | package dk.ahnfelt.parsercombinator.examples; 2 | 3 | import java.util.regex.Pattern; 4 | 5 | import static dk.ahnfelt.parsercombinator.Parsers.*; 6 | import dk.ahnfelt.parsercombinator.examples.Json.*; 7 | import dk.ahnfelt.parsercombinator.Parser; 8 | 9 | // An example of a full parser 10 | public class JsonParser { 11 | 12 | static Parser token(String keyword) { 13 | return regex(Pattern.quote(keyword) + "[\\s\\r\\n]*"); 14 | } 15 | 16 | static Parser valueP = 17 | token("null").map(t -> (Json) new JsonNull()). 18 | or(token("true").map(t -> new JsonBoolean(true))). 19 | or(token("false").map(t -> new JsonBoolean(false))). 20 | or(() -> JsonParser.stringP.map(JsonString::new)). 21 | or(() -> JsonParser.numberP.map(JsonNumber::new)). 22 | or(() -> JsonParser.objectP). 23 | or(() -> JsonParser.arrayP); 24 | 25 | static Parser stringP = 26 | regex("\"([^\"\\\\]*|\\\\[\"\\\\trnbf\\/]|\\\\u[0-9a-f]{4})*\"[\\s\\r\\n]*"). 27 | map(m -> /*StringEscapeUtils.unescapeEcmaScript(*/ m.group(1) /*)*/); 28 | 29 | static Parser numberP = 30 | regex("(-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]*)?)[\\s\\r\\n]*"). 31 | map(m -> Double.parseDouble(m.group(1))); 32 | 33 | static Parser> fieldP = 34 | stringP.skip(token(":")).then(valueP); 35 | 36 | static Parser objectP = 37 | skip(token("{")).then(fieldP.zeroOrMore(token(","))).skip(token("}")). 38 | map(JsonObject::new); 39 | 40 | static Parser arrayP = 41 | skip(token("[")).then(valueP.zeroOrMore(token(","))).skip(token("]")). 42 | map(JsonArray::new); 43 | 44 | // The final parser skips initial whitespace and requires that the whole input matches. 45 | public static Parser jsonP = 46 | skip(regex("[\\s\\r\\n]*")). 47 | then(valueP). 48 | skip(regex("$")); 49 | 50 | } 51 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # parsercombinator 2 | A parser combinator for Java 8. Working, but mostly a proof of concept. 3 | 4 | ```java 5 | import static dk.ahnfelt.parsercombinator.Parsers.*; 6 | ``` 7 | 8 | ```java 9 | Parser parseFoo = string("foo"); 10 | parseFoo.parse("foo") // returns "foo" 11 | parseFoo.parse("bar") // throws Parsers.Failure 12 | ``` 13 | 14 | ```java 15 | Parser parseIntegerMatch = regex("[0-9]+").map(m -> Integer.parseInt(m.group())); 16 | parseFoo.parse("42") // returns "42" 17 | parseFoo.parse("bar") // throws Parsers.Failure 18 | ``` 19 | 20 | ```java 21 | Parser> parseFooN = parseFoo.then(parseInteger); 22 | parseFoo.parse("foo7") // returns ("foo", 7) 23 | ``` 24 | 25 | ```java 26 | Parser parseFooN2 = skip(parseFoo).then(parseInteger); 27 | parseFooN2.parse("foo7") // returns 7 28 | ``` 29 | 30 | ```java 31 | Parser parseFooOrBar = choice(string("foo"), string("bar")); 32 | parseFoo.parse("foo") // returns "foo" 33 | parseFoo.parse("bar") // returns "bar" 34 | parseFoo.parse("quux") // throws Parsers.Failure 35 | ``` 36 | 37 | ```java 38 | // Alternative version of the above 39 | Parser parseFooOrBar2 = 40 | regex("[a-z]+"). 41 | map(m -> m.group()). 42 | filter(t -> t.equals("foo") || t.equals("bar")); 43 | ``` 44 | 45 | ```java 46 | Parser> parsePlus = 47 | parseInteger. 48 | skip(string("+")). 49 | then(parseInteger); 50 | Parser parseAndCompute = parsePlus.map(match((x, y) -> x + y)); 51 | parseAndCompute.parse("7+3") // returns 10 52 | ``` 53 | 54 | ```java 55 | Parser> parseList = parseInteger.zeroOrMore(string(",")); 56 | parseList.parse("1,2,4,8,16,32") // returns [1, 2, 4, 8, 16, 32] 57 | ``` 58 | 59 | ```java 60 | Parser parseToken = regex("\\s*([a-z0-9]+)\\s*").map(m -> m.group(1)); 61 | Parser keyword(String name) { return parseToken.filter(t -> t.equals(name)); } 62 | 63 | // The nested pairs can get hairy - use .map(match(...)) to get rid of them before you have to write types like this: 64 | Parser<, Optional>> parseIf = 65 | skip(keyword("if")).then(parseToken). 66 | skip(keyword("then")).then(parseToken). 67 | then(skip(keyword("else")).then(parseToken).optional()). 68 | skip(keyword("end")); 69 | 70 | parseIf.parse("if x then y else z end") // returns (("x", "y"), Optional["z"]) 71 | 72 | Parser parseIfAndCompute = 73 | parseIf.map(match((x, y, z) -> x.equals("true") ? y : z.orElse("void"))); 74 | 75 | parseIfAndCompute.parse("if true then y else z end") // returns "y" 76 | parseIfAndCompute.parse("if false then y end") // returns "void" 77 | ``` 78 | -------------------------------------------------------------------------------- /src/main/java/dk/ahnfelt/parsercombinator/Parser.java: -------------------------------------------------------------------------------- 1 | package dk.ahnfelt.parsercombinator; 2 | 3 | import java.util.ArrayList; 4 | import java.util.List; 5 | import java.util.Optional; 6 | import java.util.function.Function; 7 | import java.util.function.Predicate; 8 | import java.util.function.Supplier; 9 | 10 | public interface Parser { 11 | 12 | public A parse(Parsers.Input in) throws Parsers.Failure; 13 | 14 | public default A parse(CharSequence in) throws Parsers.Failure { 15 | return parse(new Parsers.Input(in, 0)); 16 | } 17 | 18 | public default Optional tryParse(CharSequence in) throws Parsers.Failure { 19 | return tryParse(new Parsers.Input(in, 0)); 20 | } 21 | 22 | public default Optional tryParse(Parsers.Input in) { 23 | int offset = in.offset; 24 | try { 25 | return Optional.of(parse(in)); 26 | } catch (Parsers.Failure e) { 27 | in.offset = offset; 28 | return Optional.empty(); 29 | } 30 | }; 31 | 32 | public default Parser> then(Parser that) { 33 | return in -> new Parsers.Pair<>(parse(in), that.parse(in)); 34 | } 35 | 36 | public default Parser> then(Supplier> that) { 37 | return in -> then(that.get()).parse(in); 38 | } 39 | 40 | public default Parser skip(Parser that) { 41 | return in -> { A result = parse(in); that.parse(in); return result; }; 42 | } 43 | 44 | public default Parser skip(Supplier> that) { 45 | return in -> skip(that.get()).parse(in); 46 | } 47 | 48 | public default Parser or(Parser that) { 49 | return new Parser() { 50 | public A parse(Parsers.Input in) throws Parsers.Failure { 51 | int offset = in.offset; 52 | try { 53 | return Parser.this.parse(in); 54 | } catch (Parsers.Failure e) { 55 | in.offset = offset; 56 | return that.parse(in); 57 | } 58 | } 59 | }; 60 | } 61 | 62 | public default Parser or(Supplier> that) { 63 | return in -> or(that.get()).parse(in); 64 | } 65 | 66 | public default Parser> optional() { 67 | return in -> {try { 68 | return Optional.of(parse(in)); 69 | } catch (Parsers.Failure e) { 70 | return Optional.empty(); 71 | }}; 72 | } 73 | 74 | public default Parser> zeroOrMore() { 75 | return in -> { 76 | List result = new ArrayList(); 77 | Optional element; 78 | while((element = tryParse(in)).isPresent()) { 79 | result.add(element.get()); 80 | } 81 | return result; 82 | }; 83 | } 84 | 85 | public default Parser> zeroOrMore(Parser separator) { 86 | return in -> { 87 | List result = new ArrayList<>(); 88 | Optional element = tryParse(in); 89 | if(element.isPresent()) { 90 | result.add(element.get()); 91 | while(separator.tryParse(in).isPresent()) { 92 | result.add(parse(in)); 93 | } 94 | } 95 | return result; 96 | }; 97 | } 98 | 99 | public default Parser> oneOrMore() { 100 | return in -> { 101 | List result = new ArrayList(); 102 | result.add(parse(in)); 103 | Optional element; 104 | while((element = tryParse(in)).isPresent()) { 105 | result.add(element.get()); 106 | } 107 | return result; 108 | }; 109 | } 110 | 111 | public default Parser> oneOrMore(Parser separator) { 112 | return in -> { 113 | List result = new ArrayList<>(); 114 | result.add(parse(in)); 115 | while(separator.tryParse(in).isPresent()) { 116 | result.add(parse(in)); 117 | } 118 | return result; 119 | }; 120 | } 121 | 122 | public default Parser filter(Predicate f) { 123 | return in -> { A result = parse(in); if(!f.test(result)) throw Parsers.Failure.exception; return result; }; 124 | } 125 | 126 | public default Parser map(Function f) { 127 | return in -> f.apply(parse(in)); 128 | } 129 | 130 | public default Parser flatMap(Function> f) { 131 | return in -> f.apply(parse(in)).parse(in); 132 | } 133 | 134 | } 135 | -------------------------------------------------------------------------------- /src/main/java/dk/ahnfelt/parsercombinator/Parsers.java: -------------------------------------------------------------------------------- 1 | package dk.ahnfelt.parsercombinator; 2 | 3 | import java.util.Arrays; 4 | import java.util.function.BiFunction; 5 | import java.util.function.Function; 6 | import java.util.function.Supplier; 7 | import java.util.regex.MatchResult; 8 | import java.util.regex.Matcher; 9 | import java.util.regex.Pattern; 10 | 11 | public class Parsers { 12 | 13 | public static Parser empty() { return success(unit); } 14 | public static Parser success(B value) { return in -> value; } 15 | public static Parser failure(String cause) { return in -> { throw Failure.exception; }; } 16 | @SafeVarargs public static Parser choice(Parser parser, Parser... parsers) { return Arrays.asList(parsers).stream().reduce(parser, Parser::or); } 17 | public static SkipParser skip(Parser parser) { return new SkipParser(parser); } 18 | public static Parser then(Supplier> parser) { return in -> parser.get().parse(in); } 19 | public static Parser begin() { return regex("^").map(m -> unit); } 20 | public static Parser end() { return regex("$").map(m -> unit); } 21 | public static Parser string(String string) { return regex(Pattern.quote(string)).map(MatchResult::group); } 22 | public static Parser regex(String regex) { return regex(Pattern.compile(regex)); } 23 | public static Parser regex(Pattern pattern) { return new RegexParser(pattern); } 24 | 25 | public static class Failure extends Exception { 26 | public static final Failure exception = new Failure(); 27 | } 28 | 29 | public static class Input { 30 | public Input(CharSequence charSequence, int offset) { 31 | this.charSequence = charSequence; 32 | this.offset = offset; 33 | } 34 | 35 | CharSequence charSequence; 36 | int offset; 37 | } 38 | 39 | 40 | public static class Pair { 41 | public final X first; 42 | public final Y second; 43 | 44 | public Pair(X first, Y second) { 45 | this.first = first; 46 | this.second = second; 47 | } 48 | 49 | @Override 50 | public String toString() { 51 | return "(" + first + ", " + second + ")"; 52 | } 53 | } 54 | 55 | // Functions to match on left-nested pairs, like those generated by a chain of .then(...) calls 56 | 57 | /** Match a single value, like the result of string("A") - note that match is mostly useful when dealing with multiple values. */ 58 | public static Function match(Function f) { 59 | return f; 60 | } 61 | 62 | /** Match two values, like the result of string("A").then(string("B")) */ 63 | public static Function, R> match(BiFunction f) { 64 | return p -> f.apply(p.first, p.second); 65 | } 66 | 67 | /** Match three values, like the result of string("A").then(string("B")).then(string("C")) */ 68 | public static Function, C>, R> match(Function3 f) { 69 | return p -> f.apply(p.first.first, p.first.second, p.second); 70 | } 71 | 72 | /** Match four values, like the result of string("A").then(string("B")).then(string("C")).then(string("D")) */ 73 | public static Function, C>, D>, R> match(Function4 f) { 74 | return p -> f.apply(p.first.first.first, p.first.first.second, p.first.second, p.second); 75 | } 76 | 77 | /** Match five values, like the result of string("A").then(string("B")).then(string("C")).then(string("D")).then(string("E")) */ 78 | public static Function, C>, D>, E>, R> match(Function5 f) { 79 | return p -> f.apply(p.first.first.first.first, p.first.first.first.second, p.first.first.second, p.first.second, p.second); 80 | } 81 | 82 | /** Match six values, like the result of string("A").then(string("B")).then(string("C"))...then(string("F")) */ 83 | public static Function, C>, D>, E>, F>, R> match(Function6 f) { 84 | return p -> f.apply(p.first.first.first.first.first, p.first.first.first.first.second, p.first.first.first.second, p.first.first.second, p.first.second, p.second); 85 | } 86 | 87 | /** Match seven values, like the result of string("A").then(string("B")).then(string("C"))...then(string("G")) */ 88 | public static Function, C>, D>, E>, F>, G>, R> match(Function7 f) { 89 | return p -> f.apply(p.first.first.first.first.first.first, p.first.first.first.first.first.second, p.first.first.first.first.second, p.first.first.first.second, p.first.first.second, p.first.second, p.second); 90 | } 91 | 92 | /** Match eight values, like the result of string("A").then(string("B")).then(string("C"))...then(string("H")) */ 93 | public static Function, C>, D>, E>, F>, G>, H>, R> match(Function8 f) { 94 | return p -> f.apply(p.first.first.first.first.first.first.first, p.first.first.first.first.first.first.second, p.first.first.first.first.first.second, p.first.first.first.first.second, p.first.first.first.second, p.first.first.second, p.first.second, p.second); 95 | } 96 | 97 | /** Match nine values, like the result of string("A").then(string("B")).then(string("C"))...then(string("I")) */ 98 | public static Function, C>, D>, E>, F>, G>, H>, I>, R> match(Function9 f) { 99 | return p -> f.apply(p.first.first.first.first.first.first.first.first, p.first.first.first.first.first.first.first.second, p.first.first.first.first.first.first.second, p.first.first.first.first.first.second, p.first.first.first.first.second, p.first.first.first.second, p.first.first.second, p.first.second, p.second); 100 | } 101 | 102 | @FunctionalInterface public interface Function3 { public R apply(A a, B b, C c); } 103 | @FunctionalInterface public interface Function4 { public R apply(A a, B b, C c, D d); } 104 | @FunctionalInterface public interface Function5 { public R apply(A a, B b, C c, D d, E e); } 105 | @FunctionalInterface public interface Function6 { public R apply(A a, B b, C c, D d, E e, F f); } 106 | @FunctionalInterface public interface Function7 { public R apply(A a, B b, C c, D d, E e, F f, G g); } 107 | @FunctionalInterface public interface Function8 { public R apply(A a, B b, C c, D d, E e, F f, G g, H h); } 108 | @FunctionalInterface public interface Function9 { public R apply(A a, B b, C c, D d, E e, F f, G g, H h, I i); } 109 | 110 | // A value to use when no result is needed from a parser (eg. for Parser) 111 | public static Object unit = new Object(); 112 | 113 | // Package private stuff 114 | 115 | public static class SkipParser { 116 | private final Parser parser; 117 | 118 | public SkipParser(Parser parser) { 119 | this.parser = parser; 120 | } 121 | 122 | public SkipParser skip(Parser that) { 123 | return new SkipParser(in -> { parser.parse(in); that.parse(in); return Parsers.unit; }); 124 | } 125 | 126 | public Parser then(Parser that) { 127 | return in -> { parser.parse(in); return that.parse(in); }; 128 | } 129 | } 130 | 131 | public static class RegexParser implements Parser { 132 | private Pattern pattern; 133 | 134 | public RegexParser(Pattern pattern) { 135 | this.pattern = pattern; 136 | } 137 | 138 | public MatchResult parse(Input in) throws Failure { 139 | Matcher matcher = pattern.matcher(in.charSequence.subSequence(in.offset, in.charSequence.length())); 140 | if(matcher.lookingAt()) { 141 | in.offset += matcher.end(); 142 | return matcher.toMatchResult(); 143 | } else { 144 | throw Failure.exception; 145 | } 146 | } 147 | } 148 | 149 | } 150 | --------------------------------------------------------------------------------