├── .gitignore
├── .travis.yml
├── LICENSE.txt
├── README.md
├── pom.xml
└── src
├── main
└── kotlin
│ └── net
│ └── raboof
│ └── parsekt
│ ├── CharParsers.kt
│ ├── Parser.kt
│ ├── Parsers.kt
│ ├── Reference.kt
│ ├── Result.kt
│ ├── StringParser.kt
│ └── samples
│ ├── MiniML.kt
│ └── PrefixCalc.kt
└── test
└── kotlin
└── net
└── raboof
└── parsekt
├── CharParsersTest.kt
├── ReferenceTest.kt
└── samples
├── MiniMLTest.kt
└── PrefixCalcTest.kt
/.gitignore:
--------------------------------------------------------------------------------
1 | /target
--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
1 | # allows docker containers to be used for builds
2 | sudo: false
3 |
4 | language: java
5 |
6 | jdk:
7 | - oraclejdk8
8 |
9 | cache:
10 | directories:
11 | - $HOME/.m2
12 |
13 | # code coverage
14 | after_success:
15 | - bash <(curl -s https://codecov.io/bash)
--------------------------------------------------------------------------------
/LICENSE.txt:
--------------------------------------------------------------------------------
1 |
2 | Apache License
3 | Version 2.0, January 2004
4 | http://www.apache.org/licenses/
5 |
6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
7 |
8 | 1. Definitions.
9 |
10 | "License" shall mean the terms and conditions for use, reproduction,
11 | and distribution as defined by Sections 1 through 9 of this document.
12 |
13 | "Licensor" shall mean the copyright owner or entity authorized by
14 | the copyright owner that is granting the License.
15 |
16 | "Legal Entity" shall mean the union of the acting entity and all
17 | other entities that control, are controlled by, or are under common
18 | control with that entity. For the purposes of this definition,
19 | "control" means (i) the power, direct or indirect, to cause the
20 | direction or management of such entity, whether by contract or
21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
22 | outstanding shares, or (iii) beneficial ownership of such entity.
23 |
24 | "You" (or "Your") shall mean an individual or Legal Entity
25 | exercising permissions granted by this License.
26 |
27 | "Source" form shall mean the preferred form for making modifications,
28 | including but not limited to software source code, documentation
29 | source, and configuration files.
30 |
31 | "Object" form shall mean any form resulting from mechanical
32 | transformation or translation of a Source form, including but
33 | not limited to compiled object code, generated documentation,
34 | and conversions to other media types.
35 |
36 | "Work" shall mean the work of authorship, whether in Source or
37 | Object form, made available under the License, as indicated by a
38 | copyright notice that is included in or attached to the work
39 | (an example is provided in the Appendix below).
40 |
41 | "Derivative Works" shall mean any work, whether in Source or Object
42 | form, that is based on (or derived from) the Work and for which the
43 | editorial revisions, annotations, elaborations, or other modifications
44 | represent, as a whole, an original work of authorship. For the purposes
45 | of this License, Derivative Works shall not include works that remain
46 | separable from, or merely link (or bind by name) to the interfaces of,
47 | the Work and Derivative Works thereof.
48 |
49 | "Contribution" shall mean any work of authorship, including
50 | the original version of the Work and any modifications or additions
51 | to that Work or Derivative Works thereof, that is intentionally
52 | submitted to Licensor for inclusion in the Work by the copyright owner
53 | or by an individual or Legal Entity authorized to submit on behalf of
54 | the copyright owner. For the purposes of this definition, "submitted"
55 | means any form of electronic, verbal, or written communication sent
56 | to the Licensor or its representatives, including but not limited to
57 | communication on electronic mailing lists, source code control systems,
58 | and issue tracking systems that are managed by, or on behalf of, the
59 | Licensor for the purpose of discussing and improving the Work, but
60 | excluding communication that is conspicuously marked or otherwise
61 | designated in writing by the copyright owner as "Not a Contribution."
62 |
63 | "Contributor" shall mean Licensor and any individual or Legal Entity
64 | on behalf of whom a Contribution has been received by Licensor and
65 | subsequently incorporated within the Work.
66 |
67 | 2. Grant of Copyright License. Subject to the terms and conditions of
68 | this License, each Contributor hereby grants to You a perpetual,
69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
70 | copyright license to reproduce, prepare Derivative Works of,
71 | publicly display, publicly perform, sublicense, and distribute the
72 | Work and such Derivative Works in Source or Object form.
73 |
74 | 3. Grant of Patent License. Subject to the terms and conditions of
75 | this License, each Contributor hereby grants to You a perpetual,
76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
77 | (except as stated in this section) patent license to make, have made,
78 | use, offer to sell, sell, import, and otherwise transfer the Work,
79 | where such license applies only to those patent claims licensable
80 | by such Contributor that are necessarily infringed by their
81 | Contribution(s) alone or by combination of their Contribution(s)
82 | with the Work to which such Contribution(s) was submitted. If You
83 | institute patent litigation against any entity (including a
84 | cross-claim or counterclaim in a lawsuit) alleging that the Work
85 | or a Contribution incorporated within the Work constitutes direct
86 | or contributory patent infringement, then any patent licenses
87 | granted to You under this License for that Work shall terminate
88 | as of the date such litigation is filed.
89 |
90 | 4. Redistribution. You may reproduce and distribute copies of the
91 | Work or Derivative Works thereof in any medium, with or without
92 | modifications, and in Source or Object form, provided that You
93 | meet the following conditions:
94 |
95 | (a) You must give any other recipients of the Work or
96 | Derivative Works a copy of this License; and
97 |
98 | (b) You must cause any modified files to carry prominent notices
99 | stating that You changed the files; and
100 |
101 | (c) You must retain, in the Source form of any Derivative Works
102 | that You distribute, all copyright, patent, trademark, and
103 | attribution notices from the Source form of the Work,
104 | excluding those notices that do not pertain to any part of
105 | the Derivative Works; and
106 |
107 | (d) If the Work includes a "NOTICE" text file as part of its
108 | distribution, then any Derivative Works that You distribute must
109 | include a readable copy of the attribution notices contained
110 | within such NOTICE file, excluding those notices that do not
111 | pertain to any part of the Derivative Works, in at least one
112 | of the following places: within a NOTICE text file distributed
113 | as part of the Derivative Works; within the Source form or
114 | documentation, if provided along with the Derivative Works; or,
115 | within a display generated by the Derivative Works, if and
116 | wherever such third-party notices normally appear. The contents
117 | of the NOTICE file are for informational purposes only and
118 | do not modify the License. You may add Your own attribution
119 | notices within Derivative Works that You distribute, alongside
120 | or as an addendum to the NOTICE text from the Work, provided
121 | that such additional attribution notices cannot be construed
122 | as modifying the License.
123 |
124 | You may add Your own copyright statement to Your modifications and
125 | may provide additional or different license terms and conditions
126 | for use, reproduction, or distribution of Your modifications, or
127 | for any such Derivative Works as a whole, provided Your use,
128 | reproduction, and distribution of the Work otherwise complies with
129 | the conditions stated in this License.
130 |
131 | 5. Submission of Contributions. Unless You explicitly state otherwise,
132 | any Contribution intentionally submitted for inclusion in the Work
133 | by You to the Licensor shall be under the terms and conditions of
134 | this License, without any additional terms or conditions.
135 | Notwithstanding the above, nothing herein shall supersede or modify
136 | the terms of any separate license agreement you may have executed
137 | with Licensor regarding such Contributions.
138 |
139 | 6. Trademarks. This License does not grant permission to use the trade
140 | names, trademarks, service marks, or product names of the Licensor,
141 | except as required for reasonable and customary use in describing the
142 | origin of the Work and reproducing the content of the NOTICE file.
143 |
144 | 7. Disclaimer of Warranty. Unless required by applicable law or
145 | agreed to in writing, Licensor provides the Work (and each
146 | Contributor provides its Contributions) on an "AS IS" BASIS,
147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
148 | implied, including, without limitation, any warranties or conditions
149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
150 | PARTICULAR PURPOSE. You are solely responsible for determining the
151 | appropriateness of using or redistributing the Work and assume any
152 | risks associated with Your exercise of permissions under this License.
153 |
154 | 8. Limitation of Liability. In no event and under no legal theory,
155 | whether in tort (including negligence), contract, or otherwise,
156 | unless required by applicable law (such as deliberate and grossly
157 | negligent acts) or agreed to in writing, shall any Contributor be
158 | liable to You for damages, including any direct, indirect, special,
159 | incidental, or consequential damages of any character arising as a
160 | result of this License or out of the use or inability to use the
161 | Work (including but not limited to damages for loss of goodwill,
162 | work stoppage, computer failure or malfunction, or any and all
163 | other commercial damages or losses), even if such Contributor
164 | has been advised of the possibility of such damages.
165 |
166 | 9. Accepting Warranty or Additional Liability. While redistributing
167 | the Work or Derivative Works thereof, You may choose to offer,
168 | and charge a fee for, acceptance of support, warranty, indemnity,
169 | or other liability obligations and/or rights consistent with this
170 | License. However, in accepting such obligations, You may act only
171 | on Your own behalf and on Your sole responsibility, not on behalf
172 | of any other Contributor, and only if You agree to indemnify,
173 | defend, and hold each Contributor harmless for any liability
174 | incurred by, or claims asserted against, such Contributor by reason
175 | of your accepting any such warranty or additional liability.
176 |
177 | END OF TERMS AND CONDITIONS
178 |
179 | APPENDIX: How to apply the Apache License to your work.
180 |
181 | To apply the Apache License to your work, attach the following
182 | boilerplate notice, with the fields enclosed by brackets "[]"
183 | replaced with your own identifying information. (Don't include
184 | the brackets!) The text should be enclosed in the appropriate
185 | comment syntax for the file format. We also recommend that a
186 | file or class name and description of purpose be included on the
187 | same "printed page" as the copyright notice for easier
188 | identification within third-party archives.
189 |
190 | Copyright [yyyy] [name of copyright owner]
191 |
192 | Licensed under the Apache License, Version 2.0 (the "License");
193 | you may not use this file except in compliance with the License.
194 | You may obtain a copy of the License at
195 |
196 | http://www.apache.org/licenses/LICENSE-2.0
197 |
198 | Unless required by applicable law or agreed to in writing, software
199 | distributed under the License is distributed on an "AS IS" BASIS,
200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
201 | See the License for the specific language governing permissions and
202 | limitations under the License.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # ParSekt 🍷 [](https://travis-ci.org/absurdhero/parsekt) [](https://codecov.io/github/absurdhero/parsekt?branch=master)
2 |
3 | Parser Combinator library for [Kotlin](http://kotlinlang.org) based on
4 | [Luke Hoban's blog post](http://blogs.msdn.com/b/lukeh/archive/2007/08/19/monadic-parser-combinators-using-c-3-0.aspx)
5 | and the [Parsec Paper](http://research.microsoft.com/apps/pubs/default.aspx?id=65201).
6 |
7 | Included are two sample parsers. The [prefix calculator](src/main/kotlin/net/raboof/parsekt/samples/PrefixCalc.kt) is a good place to start.
8 | The guts of the parser live in the Parser, Parsers, and CharParsers classes.
9 |
10 | This library is primarily built to demonstrate
11 | Kotlin's type system and features with a minimum of code.
12 | Its error reporting is incomplete and it is missing several convenience features.
13 | For a more mature Kotlin/Java alternative for production use, look at
14 | [JParsec](https://github.com/jparsec/jparsec) or contribute to this project.
15 | I translated the JParsec Tutorial calculator into Kotlin
16 | on [my blog](http://tumblr.raboof.net/post/135542198863/jparsec-tutorial-in-kotlin)
17 | to get you started with JParsec.
18 |
19 | ## Concepts
20 |
21 | For those who are not knee-deep in functional programming, the terminology
22 | alone can be confusing. However, the concepts are simple and the library is not
23 | terribly difficult to use or understand. Here is a small overview.
24 | Consult the many links above for even more information.
25 |
26 | A parser combinator works by providing the programmer with a set of
27 | functions that take an input and return a result.
28 | For example, a parser could read characters and output tokens,
29 | or read tokens and output an AST, or read characters and output
30 | just a single number. Any combination of input or output is possible.
31 | Most of the library code does not care.
32 | As a consequence, this means you can write both one and two pass parsers
33 | for a compiler. DSLs are also a good fit for this type of parser.
34 |
35 | The magical thing about a parser *combinator* library is that a small
36 | list of primitive parser functions can be composed together by other functions
37 | that describe how parsers should be combined. These functions are
38 | called parser combinators.
39 |
40 | Some of the most basic combinators are `repeat()` and `repeat1()`
41 | which are equivalent to `*` and `+` in regex and BNF. The library also provides
42 | other useful combinators like `or()` which will return the result of whichever
43 | parser matched and more specialized ones like `concat()` which takes the
44 | characters matched from two parsers and combines them into a single character
45 | string.
46 |
47 | ## To Do
48 |
49 | These are some improvements that I think would turn this into a generally
50 | useful parser library.
51 |
52 | - track character numbers in error reporting
53 | - for character inputs, make it possible to track line numbers
54 | - infix calculator example with generic infix parsers
55 | - find a more readable way to express monadic operations (Parser::mapJoin could be nicer)
56 | - optimize "or" combinator
57 |
--------------------------------------------------------------------------------
/pom.xml:
--------------------------------------------------------------------------------
1 |
4 |
5 | 4.0.0
6 |
7 | net.raboof
8 | parsekt
9 | 1.0-SNAPSHOT
10 | jar
11 |
12 | Parsekt - Parser Combinator
13 | http://github.com/absurdhero/parsekt
14 | Parser combinator library for Kotlin which makes writing parsers easier
15 |
16 |
17 |
18 | The Apache License, Version 2.0
19 | http://www.apache.org/licenses/LICENSE-2.0.txt
20 |
21 |
22 |
23 |
24 |
25 | absurdhero
26 | None
27 | https://github.com/absurdhero
28 |
29 |
30 |
31 |
32 | scm:git:git@github.com:absurdhero/parsekt.git
33 | scm:git:git@github.com:absurdhero/parsekt.git
34 | git:git@github.com:absurdhero/parsekt.git
35 |
36 |
37 |
38 | UTF-8
39 | 1.2.61
40 | 4.12
41 | true
42 |
43 |
44 |
45 |
46 | org.jetbrains.kotlin
47 | kotlin-stdlib
48 | ${kotlin.version}
49 |
50 |
51 |
52 | org.jetbrains.kotlin
53 | kotlin-test
54 | ${kotlin.version}
55 | test
56 |
57 |
58 |
59 | junit
60 | junit
61 | ${junit.version}
62 | test
63 |
64 |
65 |
66 |
67 | src/main/kotlin
68 | src/test/kotlin
69 |
70 |
71 | org.jetbrains.kotlin
72 | kotlin-maven-plugin
73 | ${kotlin.version}
74 |
75 |
76 | compile
77 | compile
78 |
79 | compile
80 |
81 |
82 |
83 | test-compile
84 | test-compile
85 |
86 | test-compile
87 |
88 |
89 |
90 |
91 |
92 |
93 | org.jacoco
94 | jacoco-maven-plugin
95 | 0.8.2
96 |
97 |
98 |
99 | prepare-agent
100 |
101 |
102 |
103 | report
104 | test
105 |
106 | report
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 | release
117 |
118 |
119 |
120 |
121 | org.apache.maven.plugins
122 | maven-release-plugin
123 | 2.5.3
124 |
125 | -Dgpg.passphrase=${gpg.passphrase}
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 | release-sign-artifacts
134 |
135 |
136 | performRelease
137 | true
138 |
139 |
140 |
141 |
142 |
143 | org.apache.maven.plugins
144 | maven-gpg-plugin
145 | 1.6
146 |
147 | ${gpg.passphrase}
148 |
149 |
150 |
151 | sign-artifacts
152 | verify
153 |
154 | sign
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 | ossrh
167 | https://oss.sonatype.org/content/repositories/snapshots
168 |
169 |
170 |
171 |
172 |
--------------------------------------------------------------------------------
/src/main/kotlin/net/raboof/parsekt/CharParsers.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt
2 |
3 | import kotlin.collections.arrayListOf
4 | import kotlin.collections.joinToString
5 | import kotlin.collections.plus
6 | import kotlin.text.Regex
7 |
8 | /** Extends the basic combinators with many character-specific parsers */
9 | abstract class CharParsers : Parsers() {
10 | // implement anyChar to read a character from a sequence
11 | abstract val anyChar: Parser
12 |
13 | fun char(ch: Char): Parser {
14 | return anyChar.filter { c -> c == ch }.withErrorLabel("char($ch)")
15 | }
16 |
17 | fun char(predicate: (Char) -> Boolean): Parser {
18 | return anyChar.filter(predicate).withErrorLabel("char(predicate)")
19 | }
20 |
21 | fun char(regex: Regex): Parser {
22 | return anyChar.filter { ch: Char -> regex.matches(ch.toString()) }.withErrorLabel("char(/$regex/)")
23 | }
24 |
25 | //public val whitespace: Parser> = repeat(char(' ') or char('\t') or char('\n') or char('\r'));
26 | val whitespace = repeat(char(Regex("""\s"""))).withErrorLabel("whitespace")
27 | val wordChar = char(Regex("""\w"""))
28 | fun wsChar(ch: Char) = whitespace and char(ch)
29 | val token = repeat1(wordChar).between(whitespace)
30 |
31 | fun concat(p1: Parser, p2: Parser>): Parser> {
32 | return p1.project({v: Char, l: List -> arrayListOf(v) + l })({p2})
33 | }
34 |
35 | fun concat(vararg charParsers: Parser): Parser> {
36 | var parser : Parser> = succeed(emptyList())
37 |
38 | for (p in charParsers) {
39 | parser = parser.project({l: List, v: Char -> l + v })({p})
40 | }
41 |
42 | return parser
43 | }
44 | fun charPrefix(prefix: Char, parser: Parser>): Parser> {
45 | return concat(char(prefix), parser) or parser
46 | }
47 |
48 | /** greedy regex matcher */
49 | fun substring(regex: Regex): Parser> {
50 | return Parser { input ->
51 | var result = anyChar(input)
52 | when (result) {
53 | is Result.ParseError -> Result.ParseError(result)
54 | is Result.Value -> {
55 | val temp = StringBuilder()
56 | var lastRest: TInput = result.rest
57 | var everMatched = false
58 |
59 | while (result !is Result.ParseError) {
60 | result as Result.Value
61 |
62 | temp.append(result.value)
63 | if (regex.matches(temp)) {
64 | everMatched = true
65 | } else if (everMatched) {
66 | temp.deleteCharAt(temp.length-1)
67 | break
68 | }
69 |
70 | lastRest = result.rest
71 | result = anyChar(result.rest)
72 | }
73 |
74 | if (everMatched) {
75 | Result.Value(temp.toList(), lastRest)
76 | } else {
77 | Result.ParseError("/$regex/", lastRest)
78 | }
79 | }
80 | }
81 | }
82 | }
83 |
84 | }
85 |
86 | fun Parser>.string(): Parser {
87 | return this.mapResult { Result.Value(it.value.joinToString(""), it.rest) }
88 | }
--------------------------------------------------------------------------------
/src/main/kotlin/net/raboof/parsekt/Parser.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt
2 |
3 | // based on http://blogs.msdn.com/b/lukeh/archive/2007/08/19/monadic-parser-combinators-using-c-3-0.aspx
4 |
5 | /** A Parser is both a function and an object with methods that return derivative parsers */
6 | open class Parser(val f: (TInput) -> Result) {
7 |
8 | /** A parser can be invoked as a function of an input that returns a result */
9 | operator fun invoke(input: TInput): Result = f(input)
10 |
11 | /* the following filter and map functions are the building blocks used to derive new parsers */
12 |
13 | fun filter(pred: (TValue) -> Boolean): Parser {
14 | return Parser { input ->
15 | val result = this(input)
16 | when (result) {
17 | is Result.Value -> if (pred(result.value)) {
18 | result
19 | } else {
20 | Result.ParseError("filter", null, result.rest)
21 | }
22 | is Result.ParseError -> result
23 | }
24 | }
25 | }
26 |
27 | fun mapResult(selector: (Result.Value) -> Result): Parser {
28 | return Parser { input ->
29 | val result = this(input)
30 | when (result) {
31 | is Result.Value -> selector(result)
32 | is Result.ParseError -> Result.ParseError(result)
33 | }
34 | }
35 | }
36 |
37 | fun map(selector: (TValue) -> TValue2): Parser
38 | = mapResult { result -> Result.Value(selector(result.value), result.rest) }
39 |
40 | /** This function is a convenient way to build parsers that act on more that one input parser.
41 | *
42 | * It invokes "this" followed by the parser returned from the selector function.
43 | * It then passes the two resulting values to the projector which returns one result.
44 | *
45 | * The selector "maps" the value from "this" to an intermediate parser.
46 | * Then the projector "joins" the original value and the mapped value into a new value.
47 | *
48 | * See usages of this function in this library for examples of how to make use of it.
49 | */
50 | fun mapJoin(
51 | selector: (TValue) -> Parser,
52 | projector: (TValue, TIntermediate) -> TValue2
53 | ): Parser {
54 | return Parser { input ->
55 | val res = this(input)
56 | when (res) {
57 | is Result.ParseError -> Result.ParseError(res)
58 | is Result.Value -> {
59 | val v = res.value
60 | val res2 = selector(v)(res.rest)
61 | when (res2) {
62 | is Result.ParseError -> Result.ParseError(res2)
63 | is Result.Value -> Result.Value(projector(v, res2.value), res2.rest)
64 | }
65 | }
66 | }
67 | }
68 | }
69 |
70 | /* These are some essential combinators which are
71 | functions that take parsers as arguments and return a new parser
72 | */
73 |
74 | infix fun or(other: Parser): Parser {
75 | return Parser { input ->
76 | val result = this(input)
77 | when (result) {
78 | is Result.Value -> result
79 | is Result.ParseError -> other(input)
80 | }
81 | }
82 | }
83 |
84 | infix fun and(other: Parser): Parser =
85 | this.mapJoin({ other }, { _, i -> i })
86 |
87 | // like "and" but returns the value of the first parser
88 | infix fun before(other: Parser): Parser =
89 | this.mapJoin({ other }, { v, _ -> v })
90 |
91 |
92 | /* error tracking */
93 |
94 | /** Allows a reported error from a parser to be modified.
95 | *
96 | * This is useful when the combinator knows more about why an error happened.
97 | */
98 | fun mapError(errorFunc: (Result.ParseError) -> Result.ParseError): Parser {
99 | return Parser { input ->
100 | val result = this(input)
101 | when (result) {
102 | is Result.Value -> result
103 | is Result.ParseError -> errorFunc(result)
104 | }
105 | }
106 | }
107 |
108 | fun withErrorLabel(label: String) : Parser {
109 | return mapError { Result.ParseError(label, it.child, it.rest) }
110 | }
111 |
112 | fun wrapError(label: String) : Parser {
113 | return mapError { Result.ParseError(label, it) }
114 | }
115 |
116 | /* Generally useful functions */
117 |
118 | /** curry the projector function in mapJoin
119 | *
120 | * @see mapJoin
121 | */
122 | fun project(projector: (TValue, TIntermediate) -> TValue2)
123 | : ((TValue) -> Parser) -> Parser {
124 | return { selector: (TValue) -> Parser ->
125 | mapJoin(selector, projector)
126 | }
127 | }
128 |
129 | // extract the result of this parser from the input between two other parsers
130 | fun between(start: Parser, end: Parser = start): Parser {
131 | return (start and this before end).wrapError("between")
132 | }
133 |
134 | fun asList(): Parser> {
135 | return mapResult { Result.Value(listOf(it.value), it.rest) }
136 | }
137 |
138 | // sometimes useful for working around covariance problems (or from T to T?)
139 | fun cast() : Parser {
140 | @Suppress("UNCHECKED_CAST")
141 | return this as Parser
142 | }
143 | }
--------------------------------------------------------------------------------
/src/main/kotlin/net/raboof/parsekt/Parsers.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt
2 |
3 | import kotlin.collections.arrayListOf
4 | import kotlin.collections.emptyList
5 | import kotlin.collections.plus
6 |
7 | /** Base parser combinator class which contains the core combinators */
8 | abstract class Parsers {
9 | fun succeed(value: TValue): Parser {
10 | return Parser { input -> Result.Value(value, input) }
11 | }
12 |
13 | fun repeat(parser: Parser): Parser> {
14 | return repeat1(parser) or succeed(emptyList())
15 | }
16 |
17 | fun repeat1(parser: Parser): Parser> {
18 | return parser.mapJoin({ repeat(parser) }, { v: TValue, l: List -> arrayListOf(v) + l })
19 | .wrapError("repeat1")
20 | }
21 | }
22 |
--------------------------------------------------------------------------------
/src/main/kotlin/net/raboof/parsekt/Reference.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt
2 |
3 | /** useful when declaring mutually recursive parsers */
4 | class Reference {
5 | private var parser: Parser = Parser {throw NullPointerException("parser reference not set") }
6 |
7 | fun set(to : Parser) {
8 | parser = to
9 | }
10 |
11 | fun get() : Parser {
12 | return Parser { input -> parser.invoke(input)}
13 | }
14 | }
--------------------------------------------------------------------------------
/src/main/kotlin/net/raboof/parsekt/Result.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt
2 |
3 | /** A parser can return one of two Results. Success with a value or error information. */
4 | sealed class Result {
5 |
6 | class Value(val value: TValue, val rest: TInput) : Result() {
7 | override fun toString(): String {
8 | return "Value{value=$value, rest=$rest}"
9 | }
10 |
11 | override fun equals(other: Any?): Boolean{
12 | if (this === other) return true
13 | if (other?.javaClass != javaClass) return false
14 |
15 | other as Value<*, *>
16 |
17 | if (value != other.value) return false
18 | if (rest != other.rest) return false
19 |
20 | return true
21 | }
22 |
23 | override fun hashCode(): Int{
24 | var result = value?.hashCode() ?: 0
25 | result += 31 * result + (rest?.hashCode() ?: 0)
26 | return result
27 | }
28 | }
29 |
30 | class ParseError(val productionLabel: String,
31 | val child: ParseError?,
32 | val rest: TInput) : Result() {
33 |
34 | /** make a parent of another error */
35 | constructor(production: String, error: ParseError) : this(production, error, error.rest)
36 |
37 | /** no child */
38 | constructor(production: String, rest: TInput) : this(production, null, rest)
39 |
40 | /** copy constructor */
41 | constructor(error: ParseError) : this(error.productionLabel, error.child, error.rest)
42 |
43 | override fun toString(): String {
44 | return "Error{production=$productionLabel, child=${child?.innerToString()}, rest=$rest}"
45 | }
46 |
47 | fun innerToString(): String {
48 | return "Error{production=$productionLabel, child=${child?.innerToString()}}"
49 | }
50 | }
51 |
52 | /** convenience method for tests and other error tolerant usage */
53 | fun valueOrFail() : TValue {
54 | return when (this) {
55 | is ParseError -> throw RuntimeException("parse error: $this")
56 | is Value -> this.value
57 | }
58 | }
59 | }
60 |
61 |
62 |
--------------------------------------------------------------------------------
/src/main/kotlin/net/raboof/parsekt/StringParser.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt
2 |
3 | import kotlin.text.substring
4 |
5 | /** The class of parsers that takes a String as input */
6 | open class StringParser : CharParsers() {
7 | override val anyChar: Parser
8 | get() = Parser { input: String ->
9 | when (input.length) {
10 | 0 -> Result.ParseError("EOF", null, "")
11 | 1 -> Result.Value(input[0], "")
12 | else -> Result.Value(input[0], input.substring(1))
13 | }
14 | }
15 | }
16 |
17 |
--------------------------------------------------------------------------------
/src/main/kotlin/net/raboof/parsekt/samples/MiniML.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt.samples
2 |
3 | import net.raboof.parsekt.CharParsers
4 | import net.raboof.parsekt.Parser
5 | import net.raboof.parsekt.Reference
6 | import net.raboof.parsekt.string
7 | import kotlin.collections.emptyList
8 | import kotlin.text.isLetter
9 | import kotlin.text.isLetterOrDigit
10 |
11 | /* Translated from MiniML on Luke Hoban's Blog
12 | http://blogs.msdn.com/b/lukeh/archive/2007/08/19/monadic-parser-combinators-using-c-3-0.aspx?PageIndex=2#comments
13 | */
14 |
15 | // AST for the MiniML language
16 | interface Terminal
17 | data class LambdaTerm(val ident: String, val term: Terminal) : Terminal
18 | data class LetTerm(val ident: String, val rhs: Terminal, val body: Terminal) : Terminal
19 | data class AppTerm(val func: Terminal, val args: List = emptyList()) : Terminal
20 | data class VarTerm(val ident: String) : Terminal
21 |
22 | abstract class MiniML : CharParsers() {
23 |
24 | val Id = whitespace and concat(char(Char::isLetter), repeat(char(Char::isLetterOrDigit))).string()
25 | val Ident = Id.filter { it != "let" && it != "in" }
26 | val LetId = Id.filter { it == "let" }
27 | val InId = Id.filter { it == "in" }
28 |
29 | val Lambda: Parser = Ident.between(wsChar('\\'), wsChar('.'))
30 | .mapJoin({ Term }, { x, t -> LambdaTerm(x,t)})
31 |
32 | val Let : Parser = Ident.between(LetId, wsChar('='))
33 | .mapJoin(
34 | {(Term before InId).mapJoin({Term}, {v, s -> Pair(v,s)})},
35 | { v, s -> LetTerm(v, s.first, s.second)})
36 |
37 | private val Term1Ref : Reference = Reference()
38 | val Term1 = Term1Ref.get()
39 |
40 | val App : Parser = Term1.mapJoin({repeat(Term1)}, {t, ts -> AppTerm(t, ts)})
41 |
42 | val Term = Lambda or Let or App
43 |
44 | init { Term1Ref.set(Ident.map { VarTerm(it) as Terminal } or Term.between(char('('), char(')'))) }
45 |
46 | val All : Parser = Term before wsChar(';')
47 | }
48 |
49 |
--------------------------------------------------------------------------------
/src/main/kotlin/net/raboof/parsekt/samples/PrefixCalc.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt.samples
2 |
3 | import net.raboof.parsekt.*
4 | import kotlin.collections.map
5 | import kotlin.collections.reduce
6 | import kotlin.text.isDigit
7 | import kotlin.text.toLong
8 |
9 |
10 | /** A 4-function calculator for integers that uses prefix notation. */
11 | abstract class PrefixCalc : CharParsers() {
12 | /** evaluate the input or return null */
13 | fun evaluate(input: TInput) : Long {
14 | return operation(input).valueOrFail().evaluate()
15 | }
16 |
17 | interface Expr {
18 | fun evaluate() : Long
19 | }
20 |
21 | data class Operation(val operator: Char, val exprs: List) : Expr {
22 | override fun evaluate() : Long {
23 | val terms = exprs.map { it.evaluate() }
24 | return when (operator) {
25 | '+' -> terms.reduce {acc, next -> acc + next}
26 | '-' -> terms.reduce {acc, next -> acc - next}
27 | '*' -> terms.reduce {acc, next -> acc * next}
28 | '/' -> terms.reduce {acc, next -> acc / next}
29 | else -> throw IllegalArgumentException()
30 | }
31 | }
32 | }
33 |
34 | data class Number(val value: String) : Expr {
35 | override fun evaluate() : Long {
36 | return value.toLong()
37 | }
38 | }
39 |
40 | val exprRef: Reference = Reference()
41 | val expr = exprRef.get()
42 |
43 | val number: Parser = whitespace and charPrefix('-', repeat1(char(Char::isDigit))).string().map { Number(it) as Expr }
44 | val plusOp = prefixOp('+')
45 | val minusOp = prefixOp('-')
46 | val multiplyOp = prefixOp('*')
47 | val divideOp = prefixOp('/')
48 |
49 | val operation = plusOp or minusOp or multiplyOp or divideOp
50 |
51 | init { exprRef.set(number or operation.between(wsChar('('), wsChar(')'))) }
52 |
53 | private fun prefixOp(opChar: Char): Parser = wsChar(opChar) and repeat1(expr).map { Operation(opChar, it) as Expr }
54 | }
--------------------------------------------------------------------------------
/src/test/kotlin/net/raboof/parsekt/CharParsersTest.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt
2 |
3 | import org.junit.Test
4 | import kotlin.collections.*
5 | import kotlin.test.assertEquals
6 | import kotlin.test.assertTrue
7 | import kotlin.test.fail
8 |
9 | class CharParsersTest {
10 |
11 | private val parser = StringParser()
12 |
13 | @Test
14 | fun firstChar() {
15 | assertEquals(Result.Value('t', "est"), parser.anyChar("test"))
16 | }
17 |
18 | @Test
19 | fun whitespace() {
20 | assertEquals(Result.Value(emptyList(), "test"), parser.whitespace("test"))
21 | assertEquals(Result.Value(listOf(' ', ' '), "test"), parser.whitespace(" test"))
22 | }
23 |
24 | @Test
25 | fun chars() {
26 | assertTrue(parser.char('(')("x") is Result.ParseError)
27 | assertEquals(Result.Value('(', "test)"), parser.char('(')("(test)"))
28 | assertEquals(Result.Value(listOf('('), "test)"), parser.char('(').asList()("(test)"))
29 | }
30 |
31 | @Test
32 | fun tokens() {
33 | assertEquals(Result.Value(listOf('a', 'b', 'c'), ""), parser.token("abc"))
34 |
35 | // consumes whitespace both before and after
36 | assertEquals(Result.Value(listOf('a', 'b', 'c'), ""), parser.token(" abc "))
37 | assertEquals(Result.Value("test", ""), parser.token.string()(" test "))
38 |
39 | // does not match plain whitespace
40 | assertTrue(parser.token(" ") is Result.ParseError)
41 | }
42 |
43 | @Test
44 | fun parenWrappedToken() {
45 | val parenWrappedToken = parser.token.between(
46 | parser.char('(') and parser.whitespace,
47 | parser.whitespace and parser.char(')'))
48 |
49 | assertEquals(Result.Value(listOf('x'), ""), parenWrappedToken("(x)"))
50 | assertEquals(Result.Value("test", ""), parenWrappedToken.string()("(test)"))
51 | assertEquals(Result.Value("test", " "), parenWrappedToken.string()("( test ) "))
52 | }
53 |
54 | @Test
55 | fun substring() {
56 | assertTrue(parser.substring(Regex("a"))("x") is Result.ParseError)
57 | assertEquals(Result.Value("(", "test)"), parser.substring(Regex("\\(")).string()("(test)"))
58 | assertEquals(Result.Value("(test", ")"), parser.substring(Regex("\\([^)]*")).string()("(test)"))
59 | assertEquals(Result.Value("(test)", ""), parser.substring(Regex("\\([^)]+\\)")).string()("(test)"))
60 | assertEquals(Result.Value("(test)", ""), parser.substring(Regex(".*")).string()("(test)"))
61 |
62 | assertEquals(Result.Value("\"\\\"foo\"", " abc"), parser.substring(Regex(""""(\\.|[^\\"])*"""")).string()("\"\\\"foo\" abc"))
63 | }
64 |
65 |
66 | @Test
67 | fun errorInformation() {
68 | val result = parser.concat(parser.char('b'), parser.char('o'), parser.char('p')).string()("bolt")
69 | when (result) {
70 | is Result.ParseError -> {
71 | assertEquals("char(p)", result.productionLabel)
72 | assertEquals("t", result.rest)
73 | assertEquals("Error{production=char(p), child=null, rest=t}", result.toString())
74 | assertEquals("Error{production=char(p), child=null}", result.innerToString())
75 | }
76 | else -> fail()
77 | }
78 | }
79 |
80 | }
--------------------------------------------------------------------------------
/src/test/kotlin/net/raboof/parsekt/ReferenceTest.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt
2 |
3 | import org.junit.Test
4 |
5 | import kotlin.test.assertEquals
6 | import kotlin.test.fail
7 |
8 | class ReferenceTest {
9 | @Test
10 | fun setAndGet() {
11 | val exprRef: Reference = Reference()
12 | val parser = StringParser().char('x')
13 | exprRef.set(parser)
14 |
15 | // use the error production label to figure out that the wrapped
16 | // parser is still calling the original char parser underneath
17 |
18 | val result = exprRef.get()("y")
19 | when (result) {
20 | is Result.ParseError -> assertEquals("char(x)", result.productionLabel)
21 | else -> fail()
22 | }
23 | }
24 |
25 | @Test
26 | fun throwWhenInvokedIfNotSet() {
27 | val exprRef: Reference = Reference()
28 |
29 | try {
30 | exprRef.get()("y")
31 | } catch(ignored: NullPointerException) {
32 | return
33 | }
34 |
35 | fail()
36 | }
37 |
38 | }
--------------------------------------------------------------------------------
/src/test/kotlin/net/raboof/parsekt/samples/MiniMLTest.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt.samples
2 |
3 | import net.raboof.parsekt.Parser
4 | import net.raboof.parsekt.Result
5 | import org.junit.Test
6 | import kotlin.test.assertEquals
7 | import kotlin.test.assertNotNull
8 | import kotlin.test.assertTrue
9 | import kotlin.text.substring
10 |
11 |
12 | class MiniMLTest {
13 | class MiniMLStringParser : MiniML() {
14 | override val anyChar: Parser
15 | get() = Parser { input: String ->
16 | when (input.length) {
17 | 0 -> Result.ParseError("EOF", null, "")
18 | 1 -> Result.Value(input[0], "")
19 | else -> Result.Value(input[0], input.substring(1))
20 | }
21 | }
22 | }
23 |
24 | val parser = MiniMLStringParser()
25 |
26 | @Test fun ident() {
27 | assertEquals("A123", parser.Ident("""A123""").valueOrFail())
28 | }
29 |
30 | @Test fun lambda() {
31 | assertEquals(
32 | LambdaTerm("x", LambdaTerm("y", AppTerm(VarTerm("z")))),
33 | parser.Lambda("""\x.\y.z""").valueOrFail())
34 | }
35 |
36 | @Test fun term1() {
37 | assertEquals(VarTerm("A123"), parser.Term1("""A123""").valueOrFail())
38 | assertEquals(AppTerm(VarTerm("x")), parser.Term1("""(x)""").valueOrFail())
39 | }
40 |
41 | @Test fun term() {
42 | // lambda
43 | assertEquals(
44 | LambdaTerm("x", LambdaTerm("y", AppTerm(VarTerm("z")))),
45 | parser.Term("""\x.\y.z""").valueOrFail())
46 | // app
47 | assertEquals((AppTerm(VarTerm("A123"))), parser.Term("""A123""").valueOrFail())
48 | }
49 |
50 | @Test fun let() {
51 | assertEquals(
52 | LetTerm("x", AppTerm(VarTerm("y")), AppTerm(VarTerm("z"))) as Terminal,
53 | parser.Let("""let x = y in z""").valueOrFail())
54 | }
55 |
56 | @Test fun program() {
57 | assertNotNull(parser.All("\\x.y;"))
58 | assertTrue(parser.All("\\x.y") is Result.ParseError, "do not match if semicolon missing")
59 |
60 | assertNotNull(parser.All("""
61 | let true = \x.\y.x in
62 | let false = \x.\y.y in
63 | let if = \b.\l.\r.(b l) r in
64 | if true then false else true;"""))
65 | }
66 | }
--------------------------------------------------------------------------------
/src/test/kotlin/net/raboof/parsekt/samples/PrefixCalcTest.kt:
--------------------------------------------------------------------------------
1 | package net.raboof.parsekt.samples
2 |
3 | import net.raboof.parsekt.Parser
4 | import net.raboof.parsekt.Result
5 | import org.junit.Test
6 | import kotlin.collections.listOf
7 | import kotlin.test.*
8 | import kotlin.text.substring
9 |
10 | class PrefixCalcTest {
11 |
12 | class PrefixCalcStringParser : PrefixCalc() {
13 | override val anyChar: Parser
14 | get() = Parser { input: String ->
15 | when (input.length) {
16 | 0 -> Result.ParseError("EOF", null, "")
17 | 1 -> Result.Value(input[0], "")
18 | else -> Result.Value(input[0], input.substring(1))
19 | }
20 | }
21 | }
22 |
23 | val parser = PrefixCalcStringParser()
24 |
25 | // These tests show the calculator in action
26 |
27 | @Test fun evaluate() {
28 | check("+ 5 6", 11)
29 | check("- 20 8", 12)
30 | check("* 4 6", 24)
31 | check("/ 6 2", 3)
32 | check("- (* 10 10) (+ 1 1 1)", 97)
33 | }
34 |
35 | @Test fun extraSpaces() {
36 | check(" - ( * 10 10 ) ( + 1 1 1 ) ", 97)
37 | }
38 |
39 | private fun check(input: String, value: Long) {
40 | assertEquals(value, parser.evaluate(input))
41 | }
42 |
43 | // These tests show how text is parsed into a tree structure
44 |
45 | @Test fun number() {
46 | assertEquals(PrefixCalc.Number("123"), (parser.number("123").valueOrFail()))
47 | }
48 |
49 | @Test fun plusNegatives() {
50 | assertEquals(PrefixCalc.Operation('+', listOf(PrefixCalc.Number("-1"), PrefixCalc.Number("-123"))), parser.operation("+ -1 -123").valueOrFail())
51 | }
52 |
53 | @Test fun ops() {
54 | for(op in listOf('+', '*', '/', '-')) {
55 | assertEquals(PrefixCalc.Operation(op, listOf(PrefixCalc.Number("1"), PrefixCalc.Number("-123"))), parser.operation("$op 1 -123").valueOrFail())
56 | }
57 | }
58 |
59 | @Test fun minusNegatives() {
60 | assertEquals(PrefixCalc.Operation('-', listOf(PrefixCalc.Number("-1"), PrefixCalc.Number("-123"))), parser.operation("- -1 -123").valueOrFail())
61 | }
62 |
63 | @Test fun nestedExpression() {
64 | assertEquals(PrefixCalc.Operation(
65 | '+',
66 | listOf(PrefixCalc.Number("-1"),
67 | PrefixCalc.Operation('+', listOf(PrefixCalc.Number("2"), PrefixCalc.Number("123"))))),
68 | parser.operation("+ -1 (+ 2 123)").valueOrFail())
69 | }
70 | }
--------------------------------------------------------------------------------