├── .gitignore ├── .travis.yml ├── LICENSE.txt ├── README.md ├── pom.xml └── src ├── main └── kotlin │ └── net │ └── raboof │ └── parsekt │ ├── CharParsers.kt │ ├── Parser.kt │ ├── Parsers.kt │ ├── Reference.kt │ ├── Result.kt │ ├── StringParser.kt │ └── samples │ ├── MiniML.kt │ └── PrefixCalc.kt └── test └── kotlin └── net └── raboof └── parsekt ├── CharParsersTest.kt ├── ReferenceTest.kt └── samples ├── MiniMLTest.kt └── PrefixCalcTest.kt /.gitignore: -------------------------------------------------------------------------------- 1 | /target -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | # allows docker containers to be used for builds 2 | sudo: false 3 | 4 | language: java 5 | 6 | jdk: 7 | - oraclejdk8 8 | 9 | cache: 10 | directories: 11 | - $HOME/.m2 12 | 13 | # code coverage 14 | after_success: 15 | - bash <(curl -s https://codecov.io/bash) -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | 2 | Apache License 3 | Version 2.0, January 2004 4 | http://www.apache.org/licenses/ 5 | 6 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 7 | 8 | 1. Definitions. 9 | 10 | "License" shall mean the terms and conditions for use, reproduction, 11 | and distribution as defined by Sections 1 through 9 of this document. 12 | 13 | "Licensor" shall mean the copyright owner or entity authorized by 14 | the copyright owner that is granting the License. 15 | 16 | "Legal Entity" shall mean the union of the acting entity and all 17 | other entities that control, are controlled by, or are under common 18 | control with that entity. For the purposes of this definition, 19 | "control" means (i) the power, direct or indirect, to cause the 20 | direction or management of such entity, whether by contract or 21 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 22 | outstanding shares, or (iii) beneficial ownership of such entity. 23 | 24 | "You" (or "Your") shall mean an individual or Legal Entity 25 | exercising permissions granted by this License. 26 | 27 | "Source" form shall mean the preferred form for making modifications, 28 | including but not limited to software source code, documentation 29 | source, and configuration files. 30 | 31 | "Object" form shall mean any form resulting from mechanical 32 | transformation or translation of a Source form, including but 33 | not limited to compiled object code, generated documentation, 34 | and conversions to other media types. 35 | 36 | "Work" shall mean the work of authorship, whether in Source or 37 | Object form, made available under the License, as indicated by a 38 | copyright notice that is included in or attached to the work 39 | (an example is provided in the Appendix below). 40 | 41 | "Derivative Works" shall mean any work, whether in Source or Object 42 | form, that is based on (or derived from) the Work and for which the 43 | editorial revisions, annotations, elaborations, or other modifications 44 | represent, as a whole, an original work of authorship. For the purposes 45 | of this License, Derivative Works shall not include works that remain 46 | separable from, or merely link (or bind by name) to the interfaces of, 47 | the Work and Derivative Works thereof. 48 | 49 | "Contribution" shall mean any work of authorship, including 50 | the original version of the Work and any modifications or additions 51 | to that Work or Derivative Works thereof, that is intentionally 52 | submitted to Licensor for inclusion in the Work by the copyright owner 53 | or by an individual or Legal Entity authorized to submit on behalf of 54 | the copyright owner. For the purposes of this definition, "submitted" 55 | means any form of electronic, verbal, or written communication sent 56 | to the Licensor or its representatives, including but not limited to 57 | communication on electronic mailing lists, source code control systems, 58 | and issue tracking systems that are managed by, or on behalf of, the 59 | Licensor for the purpose of discussing and improving the Work, but 60 | excluding communication that is conspicuously marked or otherwise 61 | designated in writing by the copyright owner as "Not a Contribution." 62 | 63 | "Contributor" shall mean Licensor and any individual or Legal Entity 64 | on behalf of whom a Contribution has been received by Licensor and 65 | subsequently incorporated within the Work. 66 | 67 | 2. Grant of Copyright License. Subject to the terms and conditions of 68 | this License, each Contributor hereby grants to You a perpetual, 69 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 70 | copyright license to reproduce, prepare Derivative Works of, 71 | publicly display, publicly perform, sublicense, and distribute the 72 | Work and such Derivative Works in Source or Object form. 73 | 74 | 3. Grant of Patent License. Subject to the terms and conditions of 75 | this License, each Contributor hereby grants to You a perpetual, 76 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 77 | (except as stated in this section) patent license to make, have made, 78 | use, offer to sell, sell, import, and otherwise transfer the Work, 79 | where such license applies only to those patent claims licensable 80 | by such Contributor that are necessarily infringed by their 81 | Contribution(s) alone or by combination of their Contribution(s) 82 | with the Work to which such Contribution(s) was submitted. If You 83 | institute patent litigation against any entity (including a 84 | cross-claim or counterclaim in a lawsuit) alleging that the Work 85 | or a Contribution incorporated within the Work constitutes direct 86 | or contributory patent infringement, then any patent licenses 87 | granted to You under this License for that Work shall terminate 88 | as of the date such litigation is filed. 89 | 90 | 4. Redistribution. You may reproduce and distribute copies of the 91 | Work or Derivative Works thereof in any medium, with or without 92 | modifications, and in Source or Object form, provided that You 93 | meet the following conditions: 94 | 95 | (a) You must give any other recipients of the Work or 96 | Derivative Works a copy of this License; and 97 | 98 | (b) You must cause any modified files to carry prominent notices 99 | stating that You changed the files; and 100 | 101 | (c) You must retain, in the Source form of any Derivative Works 102 | that You distribute, all copyright, patent, trademark, and 103 | attribution notices from the Source form of the Work, 104 | excluding those notices that do not pertain to any part of 105 | the Derivative Works; and 106 | 107 | (d) If the Work includes a "NOTICE" text file as part of its 108 | distribution, then any Derivative Works that You distribute must 109 | include a readable copy of the attribution notices contained 110 | within such NOTICE file, excluding those notices that do not 111 | pertain to any part of the Derivative Works, in at least one 112 | of the following places: within a NOTICE text file distributed 113 | as part of the Derivative Works; within the Source form or 114 | documentation, if provided along with the Derivative Works; or, 115 | within a display generated by the Derivative Works, if and 116 | wherever such third-party notices normally appear. The contents 117 | of the NOTICE file are for informational purposes only and 118 | do not modify the License. You may add Your own attribution 119 | notices within Derivative Works that You distribute, alongside 120 | or as an addendum to the NOTICE text from the Work, provided 121 | that such additional attribution notices cannot be construed 122 | as modifying the License. 123 | 124 | You may add Your own copyright statement to Your modifications and 125 | may provide additional or different license terms and conditions 126 | for use, reproduction, or distribution of Your modifications, or 127 | for any such Derivative Works as a whole, provided Your use, 128 | reproduction, and distribution of the Work otherwise complies with 129 | the conditions stated in this License. 130 | 131 | 5. Submission of Contributions. Unless You explicitly state otherwise, 132 | any Contribution intentionally submitted for inclusion in the Work 133 | by You to the Licensor shall be under the terms and conditions of 134 | this License, without any additional terms or conditions. 135 | Notwithstanding the above, nothing herein shall supersede or modify 136 | the terms of any separate license agreement you may have executed 137 | with Licensor regarding such Contributions. 138 | 139 | 6. Trademarks. This License does not grant permission to use the trade 140 | names, trademarks, service marks, or product names of the Licensor, 141 | except as required for reasonable and customary use in describing the 142 | origin of the Work and reproducing the content of the NOTICE file. 143 | 144 | 7. Disclaimer of Warranty. Unless required by applicable law or 145 | agreed to in writing, Licensor provides the Work (and each 146 | Contributor provides its Contributions) on an "AS IS" BASIS, 147 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 148 | implied, including, without limitation, any warranties or conditions 149 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 150 | PARTICULAR PURPOSE. You are solely responsible for determining the 151 | appropriateness of using or redistributing the Work and assume any 152 | risks associated with Your exercise of permissions under this License. 153 | 154 | 8. Limitation of Liability. In no event and under no legal theory, 155 | whether in tort (including negligence), contract, or otherwise, 156 | unless required by applicable law (such as deliberate and grossly 157 | negligent acts) or agreed to in writing, shall any Contributor be 158 | liable to You for damages, including any direct, indirect, special, 159 | incidental, or consequential damages of any character arising as a 160 | result of this License or out of the use or inability to use the 161 | Work (including but not limited to damages for loss of goodwill, 162 | work stoppage, computer failure or malfunction, or any and all 163 | other commercial damages or losses), even if such Contributor 164 | has been advised of the possibility of such damages. 165 | 166 | 9. Accepting Warranty or Additional Liability. While redistributing 167 | the Work or Derivative Works thereof, You may choose to offer, 168 | and charge a fee for, acceptance of support, warranty, indemnity, 169 | or other liability obligations and/or rights consistent with this 170 | License. However, in accepting such obligations, You may act only 171 | on Your own behalf and on Your sole responsibility, not on behalf 172 | of any other Contributor, and only if You agree to indemnify, 173 | defend, and hold each Contributor harmless for any liability 174 | incurred by, or claims asserted against, such Contributor by reason 175 | of your accepting any such warranty or additional liability. 176 | 177 | END OF TERMS AND CONDITIONS 178 | 179 | APPENDIX: How to apply the Apache License to your work. 180 | 181 | To apply the Apache License to your work, attach the following 182 | boilerplate notice, with the fields enclosed by brackets "[]" 183 | replaced with your own identifying information. (Don't include 184 | the brackets!) The text should be enclosed in the appropriate 185 | comment syntax for the file format. We also recommend that a 186 | file or class name and description of purpose be included on the 187 | same "printed page" as the copyright notice for easier 188 | identification within third-party archives. 189 | 190 | Copyright [yyyy] [name of copyright owner] 191 | 192 | Licensed under the Apache License, Version 2.0 (the "License"); 193 | you may not use this file except in compliance with the License. 194 | You may obtain a copy of the License at 195 | 196 | http://www.apache.org/licenses/LICENSE-2.0 197 | 198 | Unless required by applicable law or agreed to in writing, software 199 | distributed under the License is distributed on an "AS IS" BASIS, 200 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 201 | See the License for the specific language governing permissions and 202 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # ParSekt 🍷 [![Build Status](https://travis-ci.org/absurdhero/parsekt.svg?branch=master)](https://travis-ci.org/absurdhero/parsekt) [![codecov.io](https://codecov.io/github/absurdhero/parsekt/coverage.svg?branch=master)](https://codecov.io/github/absurdhero/parsekt?branch=master) 2 | 3 | Parser Combinator library for [Kotlin](http://kotlinlang.org) based on 4 | [Luke Hoban's blog post](http://blogs.msdn.com/b/lukeh/archive/2007/08/19/monadic-parser-combinators-using-c-3-0.aspx) 5 | and the [Parsec Paper](http://research.microsoft.com/apps/pubs/default.aspx?id=65201). 6 | 7 | Included are two sample parsers. The [prefix calculator](src/main/kotlin/net/raboof/parsekt/samples/PrefixCalc.kt) is a good place to start. 8 | The guts of the parser live in the Parser, Parsers, and CharParsers classes. 9 | 10 | This library is primarily built to demonstrate 11 | Kotlin's type system and features with a minimum of code. 12 | Its error reporting is incomplete and it is missing several convenience features. 13 | For a more mature Kotlin/Java alternative for production use, look at 14 | [JParsec](https://github.com/jparsec/jparsec) or contribute to this project. 15 | I translated the JParsec Tutorial calculator into Kotlin 16 | on [my blog](http://tumblr.raboof.net/post/135542198863/jparsec-tutorial-in-kotlin) 17 | to get you started with JParsec. 18 | 19 | ## Concepts 20 | 21 | For those who are not knee-deep in functional programming, the terminology 22 | alone can be confusing. However, the concepts are simple and the library is not 23 | terribly difficult to use or understand. Here is a small overview. 24 | Consult the many links above for even more information. 25 | 26 | A parser combinator works by providing the programmer with a set of 27 | functions that take an input and return a result. 28 | For example, a parser could read characters and output tokens, 29 | or read tokens and output an AST, or read characters and output 30 | just a single number. Any combination of input or output is possible. 31 | Most of the library code does not care. 32 | As a consequence, this means you can write both one and two pass parsers 33 | for a compiler. DSLs are also a good fit for this type of parser. 34 | 35 | The magical thing about a parser *combinator* library is that a small 36 | list of primitive parser functions can be composed together by other functions 37 | that describe how parsers should be combined. These functions are 38 | called parser combinators. 39 | 40 | Some of the most basic combinators are `repeat()` and `repeat1()` 41 | which are equivalent to `*` and `+` in regex and BNF. The library also provides 42 | other useful combinators like `or()` which will return the result of whichever 43 | parser matched and more specialized ones like `concat()` which takes the 44 | characters matched from two parsers and combines them into a single character 45 | string. 46 | 47 | ## To Do 48 | 49 | These are some improvements that I think would turn this into a generally 50 | useful parser library. 51 | 52 | - track character numbers in error reporting 53 | - for character inputs, make it possible to track line numbers 54 | - infix calculator example with generic infix parsers 55 | - find a more readable way to express monadic operations (Parser::mapJoin could be nicer) 56 | - optimize "or" combinator 57 | -------------------------------------------------------------------------------- /pom.xml: -------------------------------------------------------------------------------- 1 | 4 | 5 | 4.0.0 6 | 7 | net.raboof 8 | parsekt 9 | 1.0-SNAPSHOT 10 | jar 11 | 12 | Parsekt - Parser Combinator 13 | http://github.com/absurdhero/parsekt 14 | Parser combinator library for Kotlin which makes writing parsers easier 15 | 16 | 17 | 18 | The Apache License, Version 2.0 19 | http://www.apache.org/licenses/LICENSE-2.0.txt 20 | 21 | 22 | 23 | 24 | 25 | absurdhero 26 | None 27 | https://github.com/absurdhero 28 | 29 | 30 | 31 | 32 | scm:git:git@github.com:absurdhero/parsekt.git 33 | scm:git:git@github.com:absurdhero/parsekt.git 34 | git:git@github.com:absurdhero/parsekt.git 35 | 36 | 37 | 38 | UTF-8 39 | 1.2.61 40 | 4.12 41 | true 42 | 43 | 44 | 45 | 46 | org.jetbrains.kotlin 47 | kotlin-stdlib 48 | ${kotlin.version} 49 | 50 | 51 | 52 | org.jetbrains.kotlin 53 | kotlin-test 54 | ${kotlin.version} 55 | test 56 | 57 | 58 | 59 | junit 60 | junit 61 | ${junit.version} 62 | test 63 | 64 | 65 | 66 | 67 | src/main/kotlin 68 | src/test/kotlin 69 | 70 | 71 | org.jetbrains.kotlin 72 | kotlin-maven-plugin 73 | ${kotlin.version} 74 | 75 | 76 | compile 77 | compile 78 | 79 | compile 80 | 81 | 82 | 83 | test-compile 84 | test-compile 85 | 86 | test-compile 87 | 88 | 89 | 90 | 91 | 92 | 93 | org.jacoco 94 | jacoco-maven-plugin 95 | 0.8.2 96 | 97 | 98 | 99 | prepare-agent 100 | 101 | 102 | 103 | report 104 | test 105 | 106 | report 107 | 108 | 109 | 110 | 111 | 112 | 113 | 114 | 115 | 116 | release 117 | 118 | 119 | 120 | 121 | org.apache.maven.plugins 122 | maven-release-plugin 123 | 2.5.3 124 | 125 | -Dgpg.passphrase=${gpg.passphrase} 126 | 127 | 128 | 129 | 130 | 131 | 132 | 133 | release-sign-artifacts 134 | 135 | 136 | performRelease 137 | true 138 | 139 | 140 | 141 | 142 | 143 | org.apache.maven.plugins 144 | maven-gpg-plugin 145 | 1.6 146 | 147 | ${gpg.passphrase} 148 | 149 | 150 | 151 | sign-artifacts 152 | verify 153 | 154 | sign 155 | 156 | 157 | 158 | 159 | 160 | 161 | 162 | 163 | 164 | 165 | 166 | ossrh 167 | https://oss.sonatype.org/content/repositories/snapshots 168 | 169 | 170 | 171 | 172 | -------------------------------------------------------------------------------- /src/main/kotlin/net/raboof/parsekt/CharParsers.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt 2 | 3 | import kotlin.collections.arrayListOf 4 | import kotlin.collections.joinToString 5 | import kotlin.collections.plus 6 | import kotlin.text.Regex 7 | 8 | /** Extends the basic combinators with many character-specific parsers */ 9 | abstract class CharParsers : Parsers() { 10 | // implement anyChar to read a character from a sequence 11 | abstract val anyChar: Parser 12 | 13 | fun char(ch: Char): Parser { 14 | return anyChar.filter { c -> c == ch }.withErrorLabel("char($ch)") 15 | } 16 | 17 | fun char(predicate: (Char) -> Boolean): Parser { 18 | return anyChar.filter(predicate).withErrorLabel("char(predicate)") 19 | } 20 | 21 | fun char(regex: Regex): Parser { 22 | return anyChar.filter { ch: Char -> regex.matches(ch.toString()) }.withErrorLabel("char(/$regex/)") 23 | } 24 | 25 | //public val whitespace: Parser> = repeat(char(' ') or char('\t') or char('\n') or char('\r')); 26 | val whitespace = repeat(char(Regex("""\s"""))).withErrorLabel("whitespace") 27 | val wordChar = char(Regex("""\w""")) 28 | fun wsChar(ch: Char) = whitespace and char(ch) 29 | val token = repeat1(wordChar).between(whitespace) 30 | 31 | fun concat(p1: Parser, p2: Parser>): Parser> { 32 | return p1.project({v: Char, l: List -> arrayListOf(v) + l })({p2}) 33 | } 34 | 35 | fun concat(vararg charParsers: Parser): Parser> { 36 | var parser : Parser> = succeed(emptyList()) 37 | 38 | for (p in charParsers) { 39 | parser = parser.project({l: List, v: Char -> l + v })({p}) 40 | } 41 | 42 | return parser 43 | } 44 | fun charPrefix(prefix: Char, parser: Parser>): Parser> { 45 | return concat(char(prefix), parser) or parser 46 | } 47 | 48 | /** greedy regex matcher */ 49 | fun substring(regex: Regex): Parser> { 50 | return Parser { input -> 51 | var result = anyChar(input) 52 | when (result) { 53 | is Result.ParseError -> Result.ParseError(result) 54 | is Result.Value -> { 55 | val temp = StringBuilder() 56 | var lastRest: TInput = result.rest 57 | var everMatched = false 58 | 59 | while (result !is Result.ParseError) { 60 | result as Result.Value 61 | 62 | temp.append(result.value) 63 | if (regex.matches(temp)) { 64 | everMatched = true 65 | } else if (everMatched) { 66 | temp.deleteCharAt(temp.length-1) 67 | break 68 | } 69 | 70 | lastRest = result.rest 71 | result = anyChar(result.rest) 72 | } 73 | 74 | if (everMatched) { 75 | Result.Value(temp.toList(), lastRest) 76 | } else { 77 | Result.ParseError("/$regex/", lastRest) 78 | } 79 | } 80 | } 81 | } 82 | } 83 | 84 | } 85 | 86 | fun Parser>.string(): Parser { 87 | return this.mapResult { Result.Value(it.value.joinToString(""), it.rest) } 88 | } -------------------------------------------------------------------------------- /src/main/kotlin/net/raboof/parsekt/Parser.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt 2 | 3 | // based on http://blogs.msdn.com/b/lukeh/archive/2007/08/19/monadic-parser-combinators-using-c-3-0.aspx 4 | 5 | /** A Parser is both a function and an object with methods that return derivative parsers */ 6 | open class Parser(val f: (TInput) -> Result) { 7 | 8 | /** A parser can be invoked as a function of an input that returns a result */ 9 | operator fun invoke(input: TInput): Result = f(input) 10 | 11 | /* the following filter and map functions are the building blocks used to derive new parsers */ 12 | 13 | fun filter(pred: (TValue) -> Boolean): Parser { 14 | return Parser { input -> 15 | val result = this(input) 16 | when (result) { 17 | is Result.Value -> if (pred(result.value)) { 18 | result 19 | } else { 20 | Result.ParseError("filter", null, result.rest) 21 | } 22 | is Result.ParseError -> result 23 | } 24 | } 25 | } 26 | 27 | fun mapResult(selector: (Result.Value) -> Result): Parser { 28 | return Parser { input -> 29 | val result = this(input) 30 | when (result) { 31 | is Result.Value -> selector(result) 32 | is Result.ParseError -> Result.ParseError(result) 33 | } 34 | } 35 | } 36 | 37 | fun map(selector: (TValue) -> TValue2): Parser 38 | = mapResult { result -> Result.Value(selector(result.value), result.rest) } 39 | 40 | /** This function is a convenient way to build parsers that act on more that one input parser. 41 | * 42 | * It invokes "this" followed by the parser returned from the selector function. 43 | * It then passes the two resulting values to the projector which returns one result. 44 | * 45 | * The selector "maps" the value from "this" to an intermediate parser. 46 | * Then the projector "joins" the original value and the mapped value into a new value. 47 | * 48 | * See usages of this function in this library for examples of how to make use of it. 49 | */ 50 | fun mapJoin( 51 | selector: (TValue) -> Parser, 52 | projector: (TValue, TIntermediate) -> TValue2 53 | ): Parser { 54 | return Parser { input -> 55 | val res = this(input) 56 | when (res) { 57 | is Result.ParseError -> Result.ParseError(res) 58 | is Result.Value -> { 59 | val v = res.value 60 | val res2 = selector(v)(res.rest) 61 | when (res2) { 62 | is Result.ParseError -> Result.ParseError(res2) 63 | is Result.Value -> Result.Value(projector(v, res2.value), res2.rest) 64 | } 65 | } 66 | } 67 | } 68 | } 69 | 70 | /* These are some essential combinators which are 71 | functions that take parsers as arguments and return a new parser 72 | */ 73 | 74 | infix fun or(other: Parser): Parser { 75 | return Parser { input -> 76 | val result = this(input) 77 | when (result) { 78 | is Result.Value -> result 79 | is Result.ParseError -> other(input) 80 | } 81 | } 82 | } 83 | 84 | infix fun and(other: Parser): Parser = 85 | this.mapJoin({ other }, { _, i -> i }) 86 | 87 | // like "and" but returns the value of the first parser 88 | infix fun before(other: Parser): Parser = 89 | this.mapJoin({ other }, { v, _ -> v }) 90 | 91 | 92 | /* error tracking */ 93 | 94 | /** Allows a reported error from a parser to be modified. 95 | * 96 | * This is useful when the combinator knows more about why an error happened. 97 | */ 98 | fun mapError(errorFunc: (Result.ParseError) -> Result.ParseError): Parser { 99 | return Parser { input -> 100 | val result = this(input) 101 | when (result) { 102 | is Result.Value -> result 103 | is Result.ParseError -> errorFunc(result) 104 | } 105 | } 106 | } 107 | 108 | fun withErrorLabel(label: String) : Parser { 109 | return mapError { Result.ParseError(label, it.child, it.rest) } 110 | } 111 | 112 | fun wrapError(label: String) : Parser { 113 | return mapError { Result.ParseError(label, it) } 114 | } 115 | 116 | /* Generally useful functions */ 117 | 118 | /** curry the projector function in mapJoin 119 | * 120 | * @see mapJoin 121 | */ 122 | fun project(projector: (TValue, TIntermediate) -> TValue2) 123 | : ((TValue) -> Parser) -> Parser { 124 | return { selector: (TValue) -> Parser -> 125 | mapJoin(selector, projector) 126 | } 127 | } 128 | 129 | // extract the result of this parser from the input between two other parsers 130 | fun between(start: Parser, end: Parser = start): Parser { 131 | return (start and this before end).wrapError("between") 132 | } 133 | 134 | fun asList(): Parser> { 135 | return mapResult { Result.Value(listOf(it.value), it.rest) } 136 | } 137 | 138 | // sometimes useful for working around covariance problems (or from T to T?) 139 | fun cast() : Parser { 140 | @Suppress("UNCHECKED_CAST") 141 | return this as Parser 142 | } 143 | } -------------------------------------------------------------------------------- /src/main/kotlin/net/raboof/parsekt/Parsers.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt 2 | 3 | import kotlin.collections.arrayListOf 4 | import kotlin.collections.emptyList 5 | import kotlin.collections.plus 6 | 7 | /** Base parser combinator class which contains the core combinators */ 8 | abstract class Parsers { 9 | fun succeed(value: TValue): Parser { 10 | return Parser { input -> Result.Value(value, input) } 11 | } 12 | 13 | fun repeat(parser: Parser): Parser> { 14 | return repeat1(parser) or succeed(emptyList()) 15 | } 16 | 17 | fun repeat1(parser: Parser): Parser> { 18 | return parser.mapJoin({ repeat(parser) }, { v: TValue, l: List -> arrayListOf(v) + l }) 19 | .wrapError("repeat1") 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /src/main/kotlin/net/raboof/parsekt/Reference.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt 2 | 3 | /** useful when declaring mutually recursive parsers */ 4 | class Reference { 5 | private var parser: Parser = Parser {throw NullPointerException("parser reference not set") } 6 | 7 | fun set(to : Parser) { 8 | parser = to 9 | } 10 | 11 | fun get() : Parser { 12 | return Parser { input -> parser.invoke(input)} 13 | } 14 | } -------------------------------------------------------------------------------- /src/main/kotlin/net/raboof/parsekt/Result.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt 2 | 3 | /** A parser can return one of two Results. Success with a value or error information. */ 4 | sealed class Result { 5 | 6 | class Value(val value: TValue, val rest: TInput) : Result() { 7 | override fun toString(): String { 8 | return "Value{value=$value, rest=$rest}" 9 | } 10 | 11 | override fun equals(other: Any?): Boolean{ 12 | if (this === other) return true 13 | if (other?.javaClass != javaClass) return false 14 | 15 | other as Value<*, *> 16 | 17 | if (value != other.value) return false 18 | if (rest != other.rest) return false 19 | 20 | return true 21 | } 22 | 23 | override fun hashCode(): Int{ 24 | var result = value?.hashCode() ?: 0 25 | result += 31 * result + (rest?.hashCode() ?: 0) 26 | return result 27 | } 28 | } 29 | 30 | class ParseError(val productionLabel: String, 31 | val child: ParseError?, 32 | val rest: TInput) : Result() { 33 | 34 | /** make a parent of another error */ 35 | constructor(production: String, error: ParseError) : this(production, error, error.rest) 36 | 37 | /** no child */ 38 | constructor(production: String, rest: TInput) : this(production, null, rest) 39 | 40 | /** copy constructor */ 41 | constructor(error: ParseError) : this(error.productionLabel, error.child, error.rest) 42 | 43 | override fun toString(): String { 44 | return "Error{production=$productionLabel, child=${child?.innerToString()}, rest=$rest}" 45 | } 46 | 47 | fun innerToString(): String { 48 | return "Error{production=$productionLabel, child=${child?.innerToString()}}" 49 | } 50 | } 51 | 52 | /** convenience method for tests and other error tolerant usage */ 53 | fun valueOrFail() : TValue { 54 | return when (this) { 55 | is ParseError -> throw RuntimeException("parse error: $this") 56 | is Value -> this.value 57 | } 58 | } 59 | } 60 | 61 | 62 | -------------------------------------------------------------------------------- /src/main/kotlin/net/raboof/parsekt/StringParser.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt 2 | 3 | import kotlin.text.substring 4 | 5 | /** The class of parsers that takes a String as input */ 6 | open class StringParser : CharParsers() { 7 | override val anyChar: Parser 8 | get() = Parser { input: String -> 9 | when (input.length) { 10 | 0 -> Result.ParseError("EOF", null, "") 11 | 1 -> Result.Value(input[0], "") 12 | else -> Result.Value(input[0], input.substring(1)) 13 | } 14 | } 15 | } 16 | 17 | -------------------------------------------------------------------------------- /src/main/kotlin/net/raboof/parsekt/samples/MiniML.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt.samples 2 | 3 | import net.raboof.parsekt.CharParsers 4 | import net.raboof.parsekt.Parser 5 | import net.raboof.parsekt.Reference 6 | import net.raboof.parsekt.string 7 | import kotlin.collections.emptyList 8 | import kotlin.text.isLetter 9 | import kotlin.text.isLetterOrDigit 10 | 11 | /* Translated from MiniML on Luke Hoban's Blog 12 | http://blogs.msdn.com/b/lukeh/archive/2007/08/19/monadic-parser-combinators-using-c-3-0.aspx?PageIndex=2#comments 13 | */ 14 | 15 | // AST for the MiniML language 16 | interface Terminal 17 | data class LambdaTerm(val ident: String, val term: Terminal) : Terminal 18 | data class LetTerm(val ident: String, val rhs: Terminal, val body: Terminal) : Terminal 19 | data class AppTerm(val func: Terminal, val args: List = emptyList()) : Terminal 20 | data class VarTerm(val ident: String) : Terminal 21 | 22 | abstract class MiniML : CharParsers() { 23 | 24 | val Id = whitespace and concat(char(Char::isLetter), repeat(char(Char::isLetterOrDigit))).string() 25 | val Ident = Id.filter { it != "let" && it != "in" } 26 | val LetId = Id.filter { it == "let" } 27 | val InId = Id.filter { it == "in" } 28 | 29 | val Lambda: Parser = Ident.between(wsChar('\\'), wsChar('.')) 30 | .mapJoin({ Term }, { x, t -> LambdaTerm(x,t)}) 31 | 32 | val Let : Parser = Ident.between(LetId, wsChar('=')) 33 | .mapJoin( 34 | {(Term before InId).mapJoin({Term}, {v, s -> Pair(v,s)})}, 35 | { v, s -> LetTerm(v, s.first, s.second)}) 36 | 37 | private val Term1Ref : Reference = Reference() 38 | val Term1 = Term1Ref.get() 39 | 40 | val App : Parser = Term1.mapJoin({repeat(Term1)}, {t, ts -> AppTerm(t, ts)}) 41 | 42 | val Term = Lambda or Let or App 43 | 44 | init { Term1Ref.set(Ident.map { VarTerm(it) as Terminal } or Term.between(char('('), char(')'))) } 45 | 46 | val All : Parser = Term before wsChar(';') 47 | } 48 | 49 | -------------------------------------------------------------------------------- /src/main/kotlin/net/raboof/parsekt/samples/PrefixCalc.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt.samples 2 | 3 | import net.raboof.parsekt.* 4 | import kotlin.collections.map 5 | import kotlin.collections.reduce 6 | import kotlin.text.isDigit 7 | import kotlin.text.toLong 8 | 9 | 10 | /** A 4-function calculator for integers that uses prefix notation. */ 11 | abstract class PrefixCalc : CharParsers() { 12 | /** evaluate the input or return null */ 13 | fun evaluate(input: TInput) : Long { 14 | return operation(input).valueOrFail().evaluate() 15 | } 16 | 17 | interface Expr { 18 | fun evaluate() : Long 19 | } 20 | 21 | data class Operation(val operator: Char, val exprs: List) : Expr { 22 | override fun evaluate() : Long { 23 | val terms = exprs.map { it.evaluate() } 24 | return when (operator) { 25 | '+' -> terms.reduce {acc, next -> acc + next} 26 | '-' -> terms.reduce {acc, next -> acc - next} 27 | '*' -> terms.reduce {acc, next -> acc * next} 28 | '/' -> terms.reduce {acc, next -> acc / next} 29 | else -> throw IllegalArgumentException() 30 | } 31 | } 32 | } 33 | 34 | data class Number(val value: String) : Expr { 35 | override fun evaluate() : Long { 36 | return value.toLong() 37 | } 38 | } 39 | 40 | val exprRef: Reference = Reference() 41 | val expr = exprRef.get() 42 | 43 | val number: Parser = whitespace and charPrefix('-', repeat1(char(Char::isDigit))).string().map { Number(it) as Expr } 44 | val plusOp = prefixOp('+') 45 | val minusOp = prefixOp('-') 46 | val multiplyOp = prefixOp('*') 47 | val divideOp = prefixOp('/') 48 | 49 | val operation = plusOp or minusOp or multiplyOp or divideOp 50 | 51 | init { exprRef.set(number or operation.between(wsChar('('), wsChar(')'))) } 52 | 53 | private fun prefixOp(opChar: Char): Parser = wsChar(opChar) and repeat1(expr).map { Operation(opChar, it) as Expr } 54 | } -------------------------------------------------------------------------------- /src/test/kotlin/net/raboof/parsekt/CharParsersTest.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt 2 | 3 | import org.junit.Test 4 | import kotlin.collections.* 5 | import kotlin.test.assertEquals 6 | import kotlin.test.assertTrue 7 | import kotlin.test.fail 8 | 9 | class CharParsersTest { 10 | 11 | private val parser = StringParser() 12 | 13 | @Test 14 | fun firstChar() { 15 | assertEquals(Result.Value('t', "est"), parser.anyChar("test")) 16 | } 17 | 18 | @Test 19 | fun whitespace() { 20 | assertEquals(Result.Value(emptyList(), "test"), parser.whitespace("test")) 21 | assertEquals(Result.Value(listOf(' ', ' '), "test"), parser.whitespace(" test")) 22 | } 23 | 24 | @Test 25 | fun chars() { 26 | assertTrue(parser.char('(')("x") is Result.ParseError) 27 | assertEquals(Result.Value('(', "test)"), parser.char('(')("(test)")) 28 | assertEquals(Result.Value(listOf('('), "test)"), parser.char('(').asList()("(test)")) 29 | } 30 | 31 | @Test 32 | fun tokens() { 33 | assertEquals(Result.Value(listOf('a', 'b', 'c'), ""), parser.token("abc")) 34 | 35 | // consumes whitespace both before and after 36 | assertEquals(Result.Value(listOf('a', 'b', 'c'), ""), parser.token(" abc ")) 37 | assertEquals(Result.Value("test", ""), parser.token.string()(" test ")) 38 | 39 | // does not match plain whitespace 40 | assertTrue(parser.token(" ") is Result.ParseError) 41 | } 42 | 43 | @Test 44 | fun parenWrappedToken() { 45 | val parenWrappedToken = parser.token.between( 46 | parser.char('(') and parser.whitespace, 47 | parser.whitespace and parser.char(')')) 48 | 49 | assertEquals(Result.Value(listOf('x'), ""), parenWrappedToken("(x)")) 50 | assertEquals(Result.Value("test", ""), parenWrappedToken.string()("(test)")) 51 | assertEquals(Result.Value("test", " "), parenWrappedToken.string()("( test ) ")) 52 | } 53 | 54 | @Test 55 | fun substring() { 56 | assertTrue(parser.substring(Regex("a"))("x") is Result.ParseError) 57 | assertEquals(Result.Value("(", "test)"), parser.substring(Regex("\\(")).string()("(test)")) 58 | assertEquals(Result.Value("(test", ")"), parser.substring(Regex("\\([^)]*")).string()("(test)")) 59 | assertEquals(Result.Value("(test)", ""), parser.substring(Regex("\\([^)]+\\)")).string()("(test)")) 60 | assertEquals(Result.Value("(test)", ""), parser.substring(Regex(".*")).string()("(test)")) 61 | 62 | assertEquals(Result.Value("\"\\\"foo\"", " abc"), parser.substring(Regex(""""(\\.|[^\\"])*"""")).string()("\"\\\"foo\" abc")) 63 | } 64 | 65 | 66 | @Test 67 | fun errorInformation() { 68 | val result = parser.concat(parser.char('b'), parser.char('o'), parser.char('p')).string()("bolt") 69 | when (result) { 70 | is Result.ParseError -> { 71 | assertEquals("char(p)", result.productionLabel) 72 | assertEquals("t", result.rest) 73 | assertEquals("Error{production=char(p), child=null, rest=t}", result.toString()) 74 | assertEquals("Error{production=char(p), child=null}", result.innerToString()) 75 | } 76 | else -> fail() 77 | } 78 | } 79 | 80 | } -------------------------------------------------------------------------------- /src/test/kotlin/net/raboof/parsekt/ReferenceTest.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt 2 | 3 | import org.junit.Test 4 | 5 | import kotlin.test.assertEquals 6 | import kotlin.test.fail 7 | 8 | class ReferenceTest { 9 | @Test 10 | fun setAndGet() { 11 | val exprRef: Reference = Reference() 12 | val parser = StringParser().char('x') 13 | exprRef.set(parser) 14 | 15 | // use the error production label to figure out that the wrapped 16 | // parser is still calling the original char parser underneath 17 | 18 | val result = exprRef.get()("y") 19 | when (result) { 20 | is Result.ParseError -> assertEquals("char(x)", result.productionLabel) 21 | else -> fail() 22 | } 23 | } 24 | 25 | @Test 26 | fun throwWhenInvokedIfNotSet() { 27 | val exprRef: Reference = Reference() 28 | 29 | try { 30 | exprRef.get()("y") 31 | } catch(ignored: NullPointerException) { 32 | return 33 | } 34 | 35 | fail() 36 | } 37 | 38 | } -------------------------------------------------------------------------------- /src/test/kotlin/net/raboof/parsekt/samples/MiniMLTest.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt.samples 2 | 3 | import net.raboof.parsekt.Parser 4 | import net.raboof.parsekt.Result 5 | import org.junit.Test 6 | import kotlin.test.assertEquals 7 | import kotlin.test.assertNotNull 8 | import kotlin.test.assertTrue 9 | import kotlin.text.substring 10 | 11 | 12 | class MiniMLTest { 13 | class MiniMLStringParser : MiniML() { 14 | override val anyChar: Parser 15 | get() = Parser { input: String -> 16 | when (input.length) { 17 | 0 -> Result.ParseError("EOF", null, "") 18 | 1 -> Result.Value(input[0], "") 19 | else -> Result.Value(input[0], input.substring(1)) 20 | } 21 | } 22 | } 23 | 24 | val parser = MiniMLStringParser() 25 | 26 | @Test fun ident() { 27 | assertEquals("A123", parser.Ident("""A123""").valueOrFail()) 28 | } 29 | 30 | @Test fun lambda() { 31 | assertEquals( 32 | LambdaTerm("x", LambdaTerm("y", AppTerm(VarTerm("z")))), 33 | parser.Lambda("""\x.\y.z""").valueOrFail()) 34 | } 35 | 36 | @Test fun term1() { 37 | assertEquals(VarTerm("A123"), parser.Term1("""A123""").valueOrFail()) 38 | assertEquals(AppTerm(VarTerm("x")), parser.Term1("""(x)""").valueOrFail()) 39 | } 40 | 41 | @Test fun term() { 42 | // lambda 43 | assertEquals( 44 | LambdaTerm("x", LambdaTerm("y", AppTerm(VarTerm("z")))), 45 | parser.Term("""\x.\y.z""").valueOrFail()) 46 | // app 47 | assertEquals((AppTerm(VarTerm("A123"))), parser.Term("""A123""").valueOrFail()) 48 | } 49 | 50 | @Test fun let() { 51 | assertEquals( 52 | LetTerm("x", AppTerm(VarTerm("y")), AppTerm(VarTerm("z"))) as Terminal, 53 | parser.Let("""let x = y in z""").valueOrFail()) 54 | } 55 | 56 | @Test fun program() { 57 | assertNotNull(parser.All("\\x.y;")) 58 | assertTrue(parser.All("\\x.y") is Result.ParseError, "do not match if semicolon missing") 59 | 60 | assertNotNull(parser.All(""" 61 | let true = \x.\y.x in 62 | let false = \x.\y.y in 63 | let if = \b.\l.\r.(b l) r in 64 | if true then false else true;""")) 65 | } 66 | } -------------------------------------------------------------------------------- /src/test/kotlin/net/raboof/parsekt/samples/PrefixCalcTest.kt: -------------------------------------------------------------------------------- 1 | package net.raboof.parsekt.samples 2 | 3 | import net.raboof.parsekt.Parser 4 | import net.raboof.parsekt.Result 5 | import org.junit.Test 6 | import kotlin.collections.listOf 7 | import kotlin.test.* 8 | import kotlin.text.substring 9 | 10 | class PrefixCalcTest { 11 | 12 | class PrefixCalcStringParser : PrefixCalc() { 13 | override val anyChar: Parser 14 | get() = Parser { input: String -> 15 | when (input.length) { 16 | 0 -> Result.ParseError("EOF", null, "") 17 | 1 -> Result.Value(input[0], "") 18 | else -> Result.Value(input[0], input.substring(1)) 19 | } 20 | } 21 | } 22 | 23 | val parser = PrefixCalcStringParser() 24 | 25 | // These tests show the calculator in action 26 | 27 | @Test fun evaluate() { 28 | check("+ 5 6", 11) 29 | check("- 20 8", 12) 30 | check("* 4 6", 24) 31 | check("/ 6 2", 3) 32 | check("- (* 10 10) (+ 1 1 1)", 97) 33 | } 34 | 35 | @Test fun extraSpaces() { 36 | check(" - ( * 10 10 ) ( + 1 1 1 ) ", 97) 37 | } 38 | 39 | private fun check(input: String, value: Long) { 40 | assertEquals(value, parser.evaluate(input)) 41 | } 42 | 43 | // These tests show how text is parsed into a tree structure 44 | 45 | @Test fun number() { 46 | assertEquals(PrefixCalc.Number("123"), (parser.number("123").valueOrFail())) 47 | } 48 | 49 | @Test fun plusNegatives() { 50 | assertEquals(PrefixCalc.Operation('+', listOf(PrefixCalc.Number("-1"), PrefixCalc.Number("-123"))), parser.operation("+ -1 -123").valueOrFail()) 51 | } 52 | 53 | @Test fun ops() { 54 | for(op in listOf('+', '*', '/', '-')) { 55 | assertEquals(PrefixCalc.Operation(op, listOf(PrefixCalc.Number("1"), PrefixCalc.Number("-123"))), parser.operation("$op 1 -123").valueOrFail()) 56 | } 57 | } 58 | 59 | @Test fun minusNegatives() { 60 | assertEquals(PrefixCalc.Operation('-', listOf(PrefixCalc.Number("-1"), PrefixCalc.Number("-123"))), parser.operation("- -1 -123").valueOrFail()) 61 | } 62 | 63 | @Test fun nestedExpression() { 64 | assertEquals(PrefixCalc.Operation( 65 | '+', 66 | listOf(PrefixCalc.Number("-1"), 67 | PrefixCalc.Operation('+', listOf(PrefixCalc.Number("2"), PrefixCalc.Number("123"))))), 68 | parser.operation("+ -1 (+ 2 123)").valueOrFail()) 69 | } 70 | } --------------------------------------------------------------------------------