├── .gitignore
├── pom.xml
├── src
└── main
│ └── java
│ └── dk
│ └── ahnfelt
│ └── parsercombinator
│ ├── examples
│ ├── Json.java
│ ├── Main.java
│ └── JsonParser.java
│ ├── Parser.java
│ └── Parsers.java
└── README.md
/.gitignore:
--------------------------------------------------------------------------------
1 | .idea/
2 | target/
3 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
2 |
5 | 4.0.0
6 |
7 | dk.ahnfelt.parsercombinator
8 | core
9 | 1.0-SNAPSHOT
10 |
11 |
12 | UTF-8
13 |
14 |
15 |
16 |
17 | junit
18 | junit
19 | 4.11
20 |
21 |
22 |
23 |
24 |
25 |
26 | org.apache.maven.plugins
27 | maven-compiler-plugin
28 | 3.1
29 |
30 | 1.8
31 | 1.8
32 |
33 |
34 |
35 |
36 |
37 |
38 |
--------------------------------------------------------------------------------
/src/main/java/dk/ahnfelt/parsercombinator/examples/Json.java:
--------------------------------------------------------------------------------
1 | package dk.ahnfelt.parsercombinator.examples;
2 |
3 | import dk.ahnfelt.parsercombinator.Parsers.Pair;
4 |
5 | import java.util.List;
6 |
7 | public class Json {
8 |
9 | public static class JsonNull extends Json {}
10 |
11 | public static class JsonBoolean extends Json {
12 | public final boolean value;
13 |
14 | public JsonBoolean(boolean value) {
15 | this.value = value;
16 | }
17 | }
18 |
19 | public static class JsonNumber extends Json {
20 | public final double value;
21 |
22 | public JsonNumber(double value) {
23 | this.value = value;
24 | }
25 | }
26 |
27 | public static class JsonString extends Json {
28 | public final String value;
29 |
30 | public JsonString(String value) {
31 | this.value = value;
32 | }
33 | }
34 |
35 | public static class JsonArray extends Json {
36 | public final List value;
37 |
38 | public JsonArray(List value) {
39 | this.value = value;
40 | }
41 | }
42 |
43 | public static class JsonObject extends Json {
44 | public final List> value;
45 |
46 | public JsonObject(List> value) {
47 | this.value = value;
48 | }
49 | }
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/java/dk/ahnfelt/parsercombinator/examples/Main.java:
--------------------------------------------------------------------------------
1 | package dk.ahnfelt.parsercombinator.examples;
2 |
3 | import dk.ahnfelt.parsercombinator.Parser;
4 |
5 | import java.util.Arrays;
6 | import java.util.function.Function;
7 |
8 | import static dk.ahnfelt.parsercombinator.Parsers.*;
9 |
10 | public class Main {
11 |
12 | public static void main(String[] arguments) throws Failure {
13 |
14 | String input = "{ \"name\": \"anna\", \"age\": 21, \"interests\": [\"diving\", \"programming\"] }";
15 |
16 | Json output = JsonParser.jsonP.parse(input);
17 |
18 | System.out.println(output);
19 |
20 | // ifStatement()
21 | }
22 |
23 | public static void ifStatement() throws Failure {
24 |
25 | String test = "if a then b elseif c then d elseif c2 then d2 else e end";
26 |
27 | Parser> parseElseIf =
28 | skip(keyword("elseif")).
29 | then(variable).
30 | skip(keyword("then")).
31 | then(variable);
32 |
33 | Parser parseIf =
34 | skip(keyword("if")).then(variable).
35 | skip(keyword("then")).then(variable).
36 | then(parseElseIf.zeroOrMore()).
37 | then(skip(keyword("else")).then(variable).optional()).
38 | skip(keyword("end")).
39 | skip(end()).
40 | map(match((condition, then, elseIfs, otherwise) -> elseIfs.toString()));
41 |
42 | System.out.println(parseIf.parse(test));
43 |
44 | }
45 |
46 | private static Parser token = regex("([a-z][a-z0-9]*)(\\s+|$)").map(m -> m.group(1));
47 | private static Parser variable = token.filter(t -> !Arrays.asList("if", "then", "else", "elseif", "end").contains(t));
48 | private static Parser keyword(String k) { return token.filter(k::equals); }
49 |
50 |
51 | }
52 |
--------------------------------------------------------------------------------
/src/main/java/dk/ahnfelt/parsercombinator/examples/JsonParser.java:
--------------------------------------------------------------------------------
1 | package dk.ahnfelt.parsercombinator.examples;
2 |
3 | import java.util.regex.Pattern;
4 |
5 | import static dk.ahnfelt.parsercombinator.Parsers.*;
6 | import dk.ahnfelt.parsercombinator.examples.Json.*;
7 | import dk.ahnfelt.parsercombinator.Parser;
8 |
9 | // An example of a full parser
10 | public class JsonParser {
11 |
12 | static Parser> token(String keyword) {
13 | return regex(Pattern.quote(keyword) + "[\\s\\r\\n]*");
14 | }
15 |
16 | static Parser valueP =
17 | token("null").map(t -> (Json) new JsonNull()).
18 | or(token("true").map(t -> new JsonBoolean(true))).
19 | or(token("false").map(t -> new JsonBoolean(false))).
20 | or(() -> JsonParser.stringP.map(JsonString::new)).
21 | or(() -> JsonParser.numberP.map(JsonNumber::new)).
22 | or(() -> JsonParser.objectP).
23 | or(() -> JsonParser.arrayP);
24 |
25 | static Parser stringP =
26 | regex("\"([^\"\\\\]*|\\\\[\"\\\\trnbf\\/]|\\\\u[0-9a-f]{4})*\"[\\s\\r\\n]*").
27 | map(m -> /*StringEscapeUtils.unescapeEcmaScript(*/ m.group(1) /*)*/);
28 |
29 | static Parser numberP =
30 | regex("(-?(0|[1-9][0-9]*)([.][0-9]+)?([eE][+-]?[0-9]*)?)[\\s\\r\\n]*").
31 | map(m -> Double.parseDouble(m.group(1)));
32 |
33 | static Parser> fieldP =
34 | stringP.skip(token(":")).then(valueP);
35 |
36 | static Parser objectP =
37 | skip(token("{")).then(fieldP.zeroOrMore(token(","))).skip(token("}")).
38 | map(JsonObject::new);
39 |
40 | static Parser arrayP =
41 | skip(token("[")).then(valueP.zeroOrMore(token(","))).skip(token("]")).
42 | map(JsonArray::new);
43 |
44 | // The final parser skips initial whitespace and requires that the whole input matches.
45 | public static Parser jsonP =
46 | skip(regex("[\\s\\r\\n]*")).
47 | then(valueP).
48 | skip(regex("$"));
49 |
50 | }
51 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # parsercombinator
2 | A parser combinator for Java 8. Working, but mostly a proof of concept.
3 |
4 | ```java
5 | import static dk.ahnfelt.parsercombinator.Parsers.*;
6 | ```
7 |
8 | ```java
9 | Parser parseFoo = string("foo");
10 | parseFoo.parse("foo") // returns "foo"
11 | parseFoo.parse("bar") // throws Parsers.Failure
12 | ```
13 |
14 | ```java
15 | Parser parseIntegerMatch = regex("[0-9]+").map(m -> Integer.parseInt(m.group()));
16 | parseFoo.parse("42") // returns "42"
17 | parseFoo.parse("bar") // throws Parsers.Failure
18 | ```
19 |
20 | ```java
21 | Parser> parseFooN = parseFoo.then(parseInteger);
22 | parseFoo.parse("foo7") // returns ("foo", 7)
23 | ```
24 |
25 | ```java
26 | Parser parseFooN2 = skip(parseFoo).then(parseInteger);
27 | parseFooN2.parse("foo7") // returns 7
28 | ```
29 |
30 | ```java
31 | Parser parseFooOrBar = choice(string("foo"), string("bar"));
32 | parseFoo.parse("foo") // returns "foo"
33 | parseFoo.parse("bar") // returns "bar"
34 | parseFoo.parse("quux") // throws Parsers.Failure
35 | ```
36 |
37 | ```java
38 | // Alternative version of the above
39 | Parser parseFooOrBar2 =
40 | regex("[a-z]+").
41 | map(m -> m.group()).
42 | filter(t -> t.equals("foo") || t.equals("bar"));
43 | ```
44 |
45 | ```java
46 | Parser> parsePlus =
47 | parseInteger.
48 | skip(string("+")).
49 | then(parseInteger);
50 | Parser parseAndCompute = parsePlus.map(match((x, y) -> x + y));
51 | parseAndCompute.parse("7+3") // returns 10
52 | ```
53 |
54 | ```java
55 | Parser> parseList = parseInteger.zeroOrMore(string(","));
56 | parseList.parse("1,2,4,8,16,32") // returns [1, 2, 4, 8, 16, 32]
57 | ```
58 |
59 | ```java
60 | Parser parseToken = regex("\\s*([a-z0-9]+)\\s*").map(m -> m.group(1));
61 | Parser keyword(String name) { return parseToken.filter(t -> t.equals(name)); }
62 |
63 | // The nested pairs can get hairy - use .map(match(...)) to get rid of them before you have to write types like this:
64 | Parser<, Optional>> parseIf =
65 | skip(keyword("if")).then(parseToken).
66 | skip(keyword("then")).then(parseToken).
67 | then(skip(keyword("else")).then(parseToken).optional()).
68 | skip(keyword("end"));
69 |
70 | parseIf.parse("if x then y else z end") // returns (("x", "y"), Optional["z"])
71 |
72 | Parser parseIfAndCompute =
73 | parseIf.map(match((x, y, z) -> x.equals("true") ? y : z.orElse("void")));
74 |
75 | parseIfAndCompute.parse("if true then y else z end") // returns "y"
76 | parseIfAndCompute.parse("if false then y end") // returns "void"
77 | ```
78 |
--------------------------------------------------------------------------------
/src/main/java/dk/ahnfelt/parsercombinator/Parser.java:
--------------------------------------------------------------------------------
1 | package dk.ahnfelt.parsercombinator;
2 |
3 | import java.util.ArrayList;
4 | import java.util.List;
5 | import java.util.Optional;
6 | import java.util.function.Function;
7 | import java.util.function.Predicate;
8 | import java.util.function.Supplier;
9 |
10 | public interface Parser {
11 |
12 | public A parse(Parsers.Input in) throws Parsers.Failure;
13 |
14 | public default A parse(CharSequence in) throws Parsers.Failure {
15 | return parse(new Parsers.Input(in, 0));
16 | }
17 |
18 | public default Optional tryParse(CharSequence in) throws Parsers.Failure {
19 | return tryParse(new Parsers.Input(in, 0));
20 | }
21 |
22 | public default Optional tryParse(Parsers.Input in) {
23 | int offset = in.offset;
24 | try {
25 | return Optional.of(parse(in));
26 | } catch (Parsers.Failure e) {
27 | in.offset = offset;
28 | return Optional.empty();
29 | }
30 | };
31 |
32 | public default Parser> then(Parser that) {
33 | return in -> new Parsers.Pair<>(parse(in), that.parse(in));
34 | }
35 |
36 | public default Parser> then(Supplier> that) {
37 | return in -> then(that.get()).parse(in);
38 | }
39 |
40 | public default Parser skip(Parser> that) {
41 | return in -> { A result = parse(in); that.parse(in); return result; };
42 | }
43 |
44 | public default Parser skip(Supplier> that) {
45 | return in -> skip(that.get()).parse(in);
46 | }
47 |
48 | public default Parser or(Parser that) {
49 | return new Parser() {
50 | public A parse(Parsers.Input in) throws Parsers.Failure {
51 | int offset = in.offset;
52 | try {
53 | return Parser.this.parse(in);
54 | } catch (Parsers.Failure e) {
55 | in.offset = offset;
56 | return that.parse(in);
57 | }
58 | }
59 | };
60 | }
61 |
62 | public default Parser or(Supplier> that) {
63 | return in -> or(that.get()).parse(in);
64 | }
65 |
66 | public default Parser> optional() {
67 | return in -> {try {
68 | return Optional.of(parse(in));
69 | } catch (Parsers.Failure e) {
70 | return Optional.empty();
71 | }};
72 | }
73 |
74 | public default Parser> zeroOrMore() {
75 | return in -> {
76 | List result = new ArrayList();
77 | Optional element;
78 | while((element = tryParse(in)).isPresent()) {
79 | result.add(element.get());
80 | }
81 | return result;
82 | };
83 | }
84 |
85 | public default Parser> zeroOrMore(Parser> separator) {
86 | return in -> {
87 | List result = new ArrayList<>();
88 | Optional element = tryParse(in);
89 | if(element.isPresent()) {
90 | result.add(element.get());
91 | while(separator.tryParse(in).isPresent()) {
92 | result.add(parse(in));
93 | }
94 | }
95 | return result;
96 | };
97 | }
98 |
99 | public default Parser> oneOrMore() {
100 | return in -> {
101 | List result = new ArrayList();
102 | result.add(parse(in));
103 | Optional element;
104 | while((element = tryParse(in)).isPresent()) {
105 | result.add(element.get());
106 | }
107 | return result;
108 | };
109 | }
110 |
111 | public default Parser> oneOrMore(Parser> separator) {
112 | return in -> {
113 | List result = new ArrayList<>();
114 | result.add(parse(in));
115 | while(separator.tryParse(in).isPresent()) {
116 | result.add(parse(in));
117 | }
118 | return result;
119 | };
120 | }
121 |
122 | public default Parser filter(Predicate f) {
123 | return in -> { A result = parse(in); if(!f.test(result)) throw Parsers.Failure.exception; return result; };
124 | }
125 |
126 | public default Parser map(Function f) {
127 | return in -> f.apply(parse(in));
128 | }
129 |
130 | public default Parser flatMap(Function> f) {
131 | return in -> f.apply(parse(in)).parse(in);
132 | }
133 |
134 | }
135 |
--------------------------------------------------------------------------------
/src/main/java/dk/ahnfelt/parsercombinator/Parsers.java:
--------------------------------------------------------------------------------
1 | package dk.ahnfelt.parsercombinator;
2 |
3 | import java.util.Arrays;
4 | import java.util.function.BiFunction;
5 | import java.util.function.Function;
6 | import java.util.function.Supplier;
7 | import java.util.regex.MatchResult;
8 | import java.util.regex.Matcher;
9 | import java.util.regex.Pattern;
10 |
11 | public class Parsers {
12 |
13 | public static Parser> empty() { return success(unit); }
14 | public static Parser success(B value) { return in -> value; }
15 | public static Parser failure(String cause) { return in -> { throw Failure.exception; }; }
16 | @SafeVarargs public static Parser choice(Parser parser, Parser... parsers) { return Arrays.asList(parsers).stream().reduce(parser, Parser::or); }
17 | public static SkipParser skip(Parser> parser) { return new SkipParser(parser); }
18 | public static Parser then(Supplier> parser) { return in -> parser.get().parse(in); }
19 | public static Parser> begin() { return regex("^").map(m -> unit); }
20 | public static Parser> end() { return regex("$").map(m -> unit); }
21 | public static Parser string(String string) { return regex(Pattern.quote(string)).map(MatchResult::group); }
22 | public static Parser regex(String regex) { return regex(Pattern.compile(regex)); }
23 | public static Parser regex(Pattern pattern) { return new RegexParser(pattern); }
24 |
25 | public static class Failure extends Exception {
26 | public static final Failure exception = new Failure();
27 | }
28 |
29 | public static class Input {
30 | public Input(CharSequence charSequence, int offset) {
31 | this.charSequence = charSequence;
32 | this.offset = offset;
33 | }
34 |
35 | CharSequence charSequence;
36 | int offset;
37 | }
38 |
39 |
40 | public static class Pair {
41 | public final X first;
42 | public final Y second;
43 |
44 | public Pair(X first, Y second) {
45 | this.first = first;
46 | this.second = second;
47 | }
48 |
49 | @Override
50 | public String toString() {
51 | return "(" + first + ", " + second + ")";
52 | }
53 | }
54 |
55 | // Functions to match on left-nested pairs, like those generated by a chain of .then(...) calls
56 |
57 | /** Match a single value, like the result of string("A") - note that match is mostly useful when dealing with multiple values. */
58 | public static Function match(Function f) {
59 | return f;
60 | }
61 |
62 | /** Match two values, like the result of string("A").then(string("B")) */
63 | public static Function, R> match(BiFunction f) {
64 | return p -> f.apply(p.first, p.second);
65 | }
66 |
67 | /** Match three values, like the result of string("A").then(string("B")).then(string("C")) */
68 | public static Function, C>, R> match(Function3 f) {
69 | return p -> f.apply(p.first.first, p.first.second, p.second);
70 | }
71 |
72 | /** Match four values, like the result of string("A").then(string("B")).then(string("C")).then(string("D")) */
73 | public static Function, C>, D>, R> match(Function4 f) {
74 | return p -> f.apply(p.first.first.first, p.first.first.second, p.first.second, p.second);
75 | }
76 |
77 | /** Match five values, like the result of string("A").then(string("B")).then(string("C")).then(string("D")).then(string("E")) */
78 | public static Function, C>, D>, E>, R> match(Function5 f) {
79 | return p -> f.apply(p.first.first.first.first, p.first.first.first.second, p.first.first.second, p.first.second, p.second);
80 | }
81 |
82 | /** Match six values, like the result of string("A").then(string("B")).then(string("C"))...then(string("F")) */
83 | public static Function, C>, D>, E>, F>, R> match(Function6 f) {
84 | return p -> f.apply(p.first.first.first.first.first, p.first.first.first.first.second, p.first.first.first.second, p.first.first.second, p.first.second, p.second);
85 | }
86 |
87 | /** Match seven values, like the result of string("A").then(string("B")).then(string("C"))...then(string("G")) */
88 | public static Function, C>, D>, E>, F>, G>, R> match(Function7 f) {
89 | return p -> f.apply(p.first.first.first.first.first.first, p.first.first.first.first.first.second, p.first.first.first.first.second, p.first.first.first.second, p.first.first.second, p.first.second, p.second);
90 | }
91 |
92 | /** Match eight values, like the result of string("A").then(string("B")).then(string("C"))...then(string("H")) */
93 | public static Function, C>, D>, E>, F>, G>, H>, R> match(Function8 f) {
94 | return p -> f.apply(p.first.first.first.first.first.first.first, p.first.first.first.first.first.first.second, p.first.first.first.first.first.second, p.first.first.first.first.second, p.first.first.first.second, p.first.first.second, p.first.second, p.second);
95 | }
96 |
97 | /** Match nine values, like the result of string("A").then(string("B")).then(string("C"))...then(string("I")) */
98 | public static Function, C>, D>, E>, F>, G>, H>, I>, R> match(Function9 f) {
99 | return p -> f.apply(p.first.first.first.first.first.first.first.first, p.first.first.first.first.first.first.first.second, p.first.first.first.first.first.first.second, p.first.first.first.first.first.second, p.first.first.first.first.second, p.first.first.first.second, p.first.first.second, p.first.second, p.second);
100 | }
101 |
102 | @FunctionalInterface public interface Function3 { public R apply(A a, B b, C c); }
103 | @FunctionalInterface public interface Function4 { public R apply(A a, B b, C c, D d); }
104 | @FunctionalInterface public interface Function5 { public R apply(A a, B b, C c, D d, E e); }
105 | @FunctionalInterface public interface Function6 { public R apply(A a, B b, C c, D d, E e, F f); }
106 | @FunctionalInterface public interface Function7 { public R apply(A a, B b, C c, D d, E e, F f, G g); }
107 | @FunctionalInterface public interface Function8 { public R apply(A a, B b, C c, D d, E e, F f, G g, H h); }
108 | @FunctionalInterface public interface Function9 { public R apply(A a, B b, C c, D d, E e, F f, G g, H h, I i); }
109 |
110 | // A value to use when no result is needed from a parser (eg. for Parser>)
111 | public static Object unit = new Object();
112 |
113 | // Package private stuff
114 |
115 | public static class SkipParser {
116 | private final Parser> parser;
117 |
118 | public SkipParser(Parser> parser) {
119 | this.parser = parser;
120 | }
121 |
122 | public SkipParser skip(Parser> that) {
123 | return new SkipParser(in -> { parser.parse(in); that.parse(in); return Parsers.unit; });
124 | }
125 |
126 | public Parser then(Parser that) {
127 | return in -> { parser.parse(in); return that.parse(in); };
128 | }
129 | }
130 |
131 | public static class RegexParser implements Parser {
132 | private Pattern pattern;
133 |
134 | public RegexParser(Pattern pattern) {
135 | this.pattern = pattern;
136 | }
137 |
138 | public MatchResult parse(Input in) throws Failure {
139 | Matcher matcher = pattern.matcher(in.charSequence.subSequence(in.offset, in.charSequence.length()));
140 | if(matcher.lookingAt()) {
141 | in.offset += matcher.end();
142 | return matcher.toMatchResult();
143 | } else {
144 | throw Failure.exception;
145 | }
146 | }
147 | }
148 |
149 | }
150 |
--------------------------------------------------------------------------------