├── .gitignore ├── CHANGELOG.md ├── LICENSE ├── README.md ├── doc ├── intro.md └── viz.jpeg ├── notes ├── project.clj ├── src ├── ambiparse.clj └── ambiparse │ ├── gll.clj │ ├── util.clj │ └── viz.clj └── test ├── ambiparse ├── adaptive_test.clj ├── benchmark.clj ├── calc_test.clj ├── edn_test.clj └── manual_test.clj ├── ambiparse_test.clj └── resources └── stuff.edn /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | pom.xml 5 | pom.xml.asc 6 | *.jar 7 | *.class 8 | /.lein-* 9 | /.nrepl-port 10 | .hgignore 11 | .hg/ 12 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Change Log 2 | All notable changes to this project will be documented in this file. This change log follows the conventions of [keepachangelog.com](http://keepachangelog.com/). 3 | 4 | ## [Unreleased] 5 | ### Changed 6 | - Add a new arity to `make-widget-async` to provide a different widget shape. 7 | 8 | ## [0.1.1] - 2016-11-15 9 | ### Changed 10 | - Documentation on how to make the widgets. 11 | 12 | ### Removed 13 | - `make-widget-sync` - we're all async, all the time. 14 | 15 | ### Fixed 16 | - Fixed widget maker to keep working when daylight savings switches over. 17 | 18 | ## 0.1.0 - 2016-11-15 19 | ### Added 20 | - Files from the new template. 21 | - Widget maker public API - `make-widget-sync`. 22 | 23 | [Unreleased]: https://github.com/your-name/gll/compare/0.1.1...HEAD 24 | [0.1.1]: https://github.com/your-name/gll/compare/0.1.0...0.1.1 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE PUBLIC 2 | LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION OF THE PROGRAM 3 | CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 4 | 5 | 1. DEFINITIONS 6 | 7 | "Contribution" means: 8 | 9 | a) in the case of the initial Contributor, the initial code and 10 | documentation distributed under this Agreement, and 11 | 12 | b) in the case of each subsequent Contributor: 13 | 14 | i) changes to the Program, and 15 | 16 | ii) additions to the Program; 17 | 18 | where such changes and/or additions to the Program originate from and are 19 | distributed by that particular Contributor. A Contribution 'originates' from 20 | a Contributor if it was added to the Program by such Contributor itself or 21 | anyone acting on such Contributor's behalf. Contributions do not include 22 | additions to the Program which: (i) are separate modules of software 23 | distributed in conjunction with the Program under their own license 24 | agreement, and (ii) are not derivative works of the Program. 25 | 26 | "Contributor" means any person or entity that distributes the Program. 27 | 28 | "Licensed Patents" mean patent claims licensable by a Contributor which are 29 | necessarily infringed by the use or sale of its Contribution alone or when 30 | combined with the Program. 31 | 32 | "Program" means the Contributions distributed in accordance with this 33 | Agreement. 34 | 35 | "Recipient" means anyone who receives the Program under this Agreement, 36 | including all Contributors. 37 | 38 | 2. GRANT OF RIGHTS 39 | 40 | a) Subject to the terms of this Agreement, each Contributor hereby grants 41 | Recipient a non-exclusive, worldwide, royalty-free copyright license to 42 | reproduce, prepare derivative works of, publicly display, publicly perform, 43 | distribute and sublicense the Contribution of such Contributor, if any, and 44 | such derivative works, in source code and object code form. 45 | 46 | b) Subject to the terms of this Agreement, each Contributor hereby grants 47 | Recipient a non-exclusive, worldwide, royalty-free patent license under 48 | Licensed Patents to make, use, sell, offer to sell, import and otherwise 49 | transfer the Contribution of such Contributor, if any, in source code and 50 | object code form. This patent license shall apply to the combination of the 51 | Contribution and the Program if, at the time the Contribution is added by the 52 | Contributor, such addition of the Contribution causes such combination to be 53 | covered by the Licensed Patents. The patent license shall not apply to any 54 | other combinations which include the Contribution. No hardware per se is 55 | licensed hereunder. 56 | 57 | c) Recipient understands that although each Contributor grants the licenses 58 | to its Contributions set forth herein, no assurances are provided by any 59 | Contributor that the Program does not infringe the patent or other 60 | intellectual property rights of any other entity. Each Contributor disclaims 61 | any liability to Recipient for claims brought by any other entity based on 62 | infringement of intellectual property rights or otherwise. As a condition to 63 | exercising the rights and licenses granted hereunder, each Recipient hereby 64 | assumes sole responsibility to secure any other intellectual property rights 65 | needed, if any. For example, if a third party patent license is required to 66 | allow Recipient to distribute the Program, it is Recipient's responsibility 67 | to acquire that license before distributing the Program. 68 | 69 | d) Each Contributor represents that to its knowledge it has sufficient 70 | copyright rights in its Contribution, if any, to grant the copyright license 71 | set forth in this Agreement. 72 | 73 | 3. REQUIREMENTS 74 | 75 | A Contributor may choose to distribute the Program in object code form under 76 | its own license agreement, provided that: 77 | 78 | a) it complies with the terms and conditions of this Agreement; and 79 | 80 | b) its license agreement: 81 | 82 | i) effectively disclaims on behalf of all Contributors all warranties and 83 | conditions, express and implied, including warranties or conditions of title 84 | and non-infringement, and implied warranties or conditions of merchantability 85 | and fitness for a particular purpose; 86 | 87 | ii) effectively excludes on behalf of all Contributors all liability for 88 | damages, including direct, indirect, special, incidental and consequential 89 | damages, such as lost profits; 90 | 91 | iii) states that any provisions which differ from this Agreement are offered 92 | by that Contributor alone and not by any other party; and 93 | 94 | iv) states that source code for the Program is available from such 95 | Contributor, and informs licensees how to obtain it in a reasonable manner on 96 | or through a medium customarily used for software exchange. 97 | 98 | When the Program is made available in source code form: 99 | 100 | a) it must be made available under this Agreement; and 101 | 102 | b) a copy of this Agreement must be included with each copy of the Program. 103 | 104 | Contributors may not remove or alter any copyright notices contained within 105 | the Program. 106 | 107 | Each Contributor must identify itself as the originator of its Contribution, 108 | if any, in a manner that reasonably allows subsequent Recipients to identify 109 | the originator of the Contribution. 110 | 111 | 4. COMMERCIAL DISTRIBUTION 112 | 113 | Commercial distributors of software may accept certain responsibilities with 114 | respect to end users, business partners and the like. While this license is 115 | intended to facilitate the commercial use of the Program, the Contributor who 116 | includes the Program in a commercial product offering should do so in a 117 | manner which does not create potential liability for other Contributors. 118 | Therefore, if a Contributor includes the Program in a commercial product 119 | offering, such Contributor ("Commercial Contributor") hereby agrees to defend 120 | and indemnify every other Contributor ("Indemnified Contributor") against any 121 | losses, damages and costs (collectively "Losses") arising from claims, 122 | lawsuits and other legal actions brought by a third party against the 123 | Indemnified Contributor to the extent caused by the acts or omissions of such 124 | Commercial Contributor in connection with its distribution of the Program in 125 | a commercial product offering. The obligations in this section do not apply 126 | to any claims or Losses relating to any actual or alleged intellectual 127 | property infringement. In order to qualify, an Indemnified Contributor must: 128 | a) promptly notify the Commercial Contributor in writing of such claim, and 129 | b) allow the Commercial Contributor to control, and cooperate with the 130 | Commercial Contributor in, the defense and any related settlement 131 | negotiations. The Indemnified Contributor may participate in any such claim 132 | at its own expense. 133 | 134 | For example, a Contributor might include the Program in a commercial product 135 | offering, Product X. That Contributor is then a Commercial Contributor. If 136 | that Commercial Contributor then makes performance claims, or offers 137 | warranties related to Product X, those performance claims and warranties are 138 | such Commercial Contributor's responsibility alone. Under this section, the 139 | Commercial Contributor would have to defend claims against the other 140 | Contributors related to those performance claims and warranties, and if a 141 | court requires any other Contributor to pay any damages as a result, the 142 | Commercial Contributor must pay those damages. 143 | 144 | 5. NO WARRANTY 145 | 146 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, THE PROGRAM IS PROVIDED ON 147 | AN "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER 148 | EXPRESS OR IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR 149 | CONDITIONS OF TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A 150 | PARTICULAR PURPOSE. Each Recipient is solely responsible for determining the 151 | appropriateness of using and distributing the Program and assumes all risks 152 | associated with its exercise of rights under this Agreement , including but 153 | not limited to the risks and costs of program errors, compliance with 154 | applicable laws, damage to or loss of data, programs or equipment, and 155 | unavailability or interruption of operations. 156 | 157 | 6. DISCLAIMER OF LIABILITY 158 | 159 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, NEITHER RECIPIENT NOR ANY 160 | CONTRIBUTORS SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, 161 | SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION 162 | LOST PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 163 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 164 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 165 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE POSSIBILITY 166 | OF SUCH DAMAGES. 167 | 168 | 7. GENERAL 169 | 170 | If any provision of this Agreement is invalid or unenforceable under 171 | applicable law, it shall not affect the validity or enforceability of the 172 | remainder of the terms of this Agreement, and without further action by the 173 | parties hereto, such provision shall be reformed to the minimum extent 174 | necessary to make such provision valid and enforceable. 175 | 176 | If Recipient institutes patent litigation against any entity (including a 177 | cross-claim or counterclaim in a lawsuit) alleging that the Program itself 178 | (excluding combinations of the Program with other software or hardware) 179 | infringes such Recipient's patent(s), then such Recipient's rights granted 180 | under Section 2(b) shall terminate as of the date such litigation is filed. 181 | 182 | All Recipient's rights under this Agreement shall terminate if it fails to 183 | comply with any of the material terms or conditions of this Agreement and 184 | does not cure such failure in a reasonable period of time after becoming 185 | aware of such noncompliance. If all Recipient's rights under this Agreement 186 | terminate, Recipient agrees to cease use and distribution of the Program as 187 | soon as reasonably practicable. However, Recipient's obligations under this 188 | Agreement and any licenses granted by Recipient relating to the Program shall 189 | continue and survive. 190 | 191 | Everyone is permitted to copy and distribute copies of this Agreement, but in 192 | order to avoid inconsistency the Agreement is copyrighted and may only be 193 | modified in the following manner. The Agreement Steward reserves the right to 194 | publish new versions (including revisions) of this Agreement from time to 195 | time. No one other than the Agreement Steward has the right to modify this 196 | Agreement. The Eclipse Foundation is the initial Agreement Steward. The 197 | Eclipse Foundation may assign the responsibility to serve as the Agreement 198 | Steward to a suitable separate entity. Each new version of the Agreement will 199 | be given a distinguishing version number. The Program (including 200 | Contributions) may always be distributed subject to the version of the 201 | Agreement under which it was received. In addition, after a new version of 202 | the Agreement is published, Contributor may elect to distribute the Program 203 | (including its Contributions) under the new version. Except as expressly 204 | stated in Sections 2(a) and 2(b) above, Recipient receives no rights or 205 | licenses to the intellectual property of any Contributor under this 206 | Agreement, whether expressly, by implication, estoppel or otherwise. All 207 | rights in the Program not expressly granted under this Agreement are 208 | reserved. 209 | 210 | This Agreement is governed by the laws of the State of New York and the 211 | intellectual property laws of the United States of America. No party to this 212 | Agreement will bring a legal action under this Agreement more than one year 213 | after the cause of action arose. Each party waives its rights to a jury trial 214 | in any resulting litigation. 215 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Ambiparse 2 | 3 | Ambiparse is an embedded Clojure DSL that provides parsing for very rich 4 | grammars including ambiguity and adaptability. 5 | 6 | ## Overview 7 | 8 | Ambiparse is capable of parsing any context free language, including 9 | both left-and-right recursive grammars, as well as ambiguous grammars. The 10 | parser is capable of returning a parse tree for ambiguous parses. Furthermore, 11 | grammar productions can be modified during parsing, allowing langauges to 12 | express arbitrary syntax extensions. This means that the parser is actually 13 | capable of parsing many context-sensitive grammars as well. 14 | 15 | Ambiguity can be reduced or eliminated from grammars with "disambiguation 16 | filters", which allow for declarative operator associativity and precedence, as 17 | well as filtering by arbitrary predicates. 18 | 19 | Being an embedded DSL enables inline semantic actions. Semantic actions can 20 | rewrite _concrete_ syntax trees at parse time in to arbitrary _abstract_ 21 | syntax trees, or even perform simple interpretation. 22 | 23 | To the best of my knowledge, this confluence of features is novel. 24 | 25 | ## Usage 26 | 27 | Ambiparse is still very early days, so no release has been published, and a 28 | metric ton of documentation is yet to be written. This project was built to 29 | serve a higher-priority project, so it may be a while, if ever, that this 30 | changes. 31 | 32 | See the [calculator][1] and [edn parser][2] test code for examples. 33 | 34 | The public API is provided by [ambiparse.clj][8] exclusively. 35 | 36 | Note that performance is expectedly awful. This too may never change. 37 | 38 | ## References 39 | 40 | - [GLL Parsing][5] by Scott and Johnstone. 41 | - [Disambiguation Filters for Scannerless Generalized LR Parsers][6] 42 | by M.G.J. van den Brand et al. 43 | - [Faster, Practical GLL Parsing][7] by Afroozeh and Izmaylova. 44 | - [Recursive Adaptable Grammars][9] by John Shutt. 45 | 46 | ## Acknowledgements 47 | 48 | Thanks to Mark Engelberg for [Instaparse][3], an excellent library that you 49 | probably should use over Ambiparse, [his talk][4] which is an excellent 50 | explanation of the GLL algorithm, as well as several excellent discussions 51 | on the finer points of the algorithm's implementation. 52 | 53 | ## License 54 | 55 | Copyright © 2016 Brandon Bloom 56 | 57 | Distributed under the Eclipse Public License version 1.0. 58 | 59 | [1]: ./test/ambiparse/calc_test.clj 60 | [2]: ./test/ambiparse/edn_test.clj 61 | [3]: https://github.com/Engelberg/instaparse 62 | [4]: https://www.youtube.com/watch?v=b2AUW6psVcE 63 | [5]: http://dotat.at/tmp/gll.pdf 64 | [6]: http://www.st.ewi.tudelft.nl/~eelco/papers/BSVV02.pdf 65 | [7]: http://oai.cwi.nl/oai/asset/24026/24026B.pdf 66 | [8]: ./src/ambiparse.clj 67 | [9]: http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.45.2424&rep=rep1&type=pdf 68 | -------------------------------------------------------------------------------- /doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to gll 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /doc/viz.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/brandonbloom/ambiparse/eeb047878e4990a877810ac4805a45d8cfe9acfb/doc/viz.jpeg -------------------------------------------------------------------------------- /notes: -------------------------------------------------------------------------------- 1 | TODO 2 | - improved error reporting and recovery 3 | - form to filter internal ambiguity 4 | - more disambiguation filters: 5 | - preceeds/follows restruction 6 | - Consider cut operator. See: 7 | Packrat Parsers Can Handle Practical Grammars in Mostly Constant Space 8 | "Tell" version of errors - can "commit" or "abort" current node. 9 | 10 | Known Issues 11 | - epsilon infinite loops 12 | - eof infinite loops 13 | - bad error messages from nested alts or multiple-rightmost partial parses 14 | - * and + do not set ::a/structure 15 | - caught exceptions are frequently swallowed by unprincipaled recovery 16 | 17 | Perf Ideas 18 | - avoid nodes for constant terminals 19 | - records/protocols for pattern objects 20 | - record for concrete syntax trees 21 | - can sub graphs be garbage collected after buffered nodes are flushed? 22 | - use pre-allocated exception for fail! 23 | - dispatch node 24 | - based on what's parsed, gen rule. 25 | - like alt with the opposite of a head-fail 26 | - filter earlier (consider flat/left/right don't even try to match nested) 27 | 28 | 29 | Concern: Dispatch dynamically creates patterns. 30 | What if a pattern created has a closure? 31 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject ambiparse "0.1.0-SNAPSHOT" 2 | :description "FIXME: write description" 3 | :url "http://example.com/FIXME" 4 | :license {:name "Eclipse Public License" 5 | :url "http://www.eclipse.org/legal/epl-v10.html"} 6 | :dependencies [[org.clojure/clojure "1.9.0-alpha14"]] 7 | :profiles {:dev {:dependencies [[dorothy-bbloom "0.0.9-SNAPSHOT"] 8 | [fipp "0.6.7"]] 9 | :resource-paths ["test/resources"]}} 10 | :global-vars {*warn-on-reflection* true 11 | *print-length* 30 12 | *print-level* 10 13 | ;*unchecked-math* :warn-on-boxed 14 | }) 15 | -------------------------------------------------------------------------------- /src/ambiparse.clj: -------------------------------------------------------------------------------- 1 | (ns ambiparse 2 | (:refer-clojure :exclude [cat * + filter remove cons resolve]) 3 | (:require [ambiparse.gll :as gll] 4 | [ambiparse.util :refer :all])) 5 | 6 | ;;XXX Avoid single-segment namespace due to JVM "unnamed package" restrictions. 7 | 8 | (alias 'a 'ambiparse) 9 | 10 | ;; Reserve the ambiparse namespace for non-label metadata. 11 | (create-ns 'ambiparse.core) 12 | (alias 'c 'ambiparse.core) 13 | 14 | ;;; Primitives. 15 | 16 | (def eof `eof) 17 | 18 | (defn lit [x] 19 | (with-meta (list `lit x) 20 | {::a/head-fail #(not= % x)})) 21 | 22 | (defn -pred [expr f] 23 | (with-meta (list `-pred expr f) 24 | {::a/head-fail (comp not f)})) 25 | 26 | (defmacro pred [f] 27 | `(-pred '~f ~f)) 28 | 29 | (defn cat [& pats] 30 | (with-meta (list* `cat pats) 31 | {::a/head-fail (-> pats first meta ::a/head-fail)})) 32 | 33 | (defn alt [& pats] 34 | (let [pat (list* `alt pats)] 35 | (if-let [head-fail (and (seq pats) 36 | (every? (comp ::a/head-fail meta) pats) 37 | (apply every-pred (comp ::a/head-fail meta) pats))] 38 | (with-meta pat {::a/head-fail head-fail}) 39 | pat))) 40 | 41 | (defn * [pat] 42 | (list `* pat)) 43 | 44 | (defn + [pat] 45 | (with-meta (list `+ pat) 46 | {::a/head-fail (-> pat meta ::a/head-fail)})) 47 | 48 | (defn ? [pat] 49 | (list `? pat)) 50 | 51 | (defn -rule [pat body f] 52 | (with-meta (list `-rule pat body f) 53 | {::a/head-fail (-> pat meta ::a/head-fail)})) 54 | 55 | (defmacro rule [pat & body] 56 | `(-rule ~pat '~body 57 | (fn [~'%] 58 | (assoc ~'% ::value (do ~@body))))) 59 | 60 | (defn label [name pat] 61 | (with-meta (list `label name pat) 62 | {::a/head-fail (-> pat meta ::a/head-fail)})) 63 | 64 | (defn -prefer [expr pat f] 65 | (with-meta (list `-prefer expr pat f) 66 | {::a/head-fail (-> pat meta ::a/head-fail)})) 67 | 68 | (defmacro prefer [f pat & pats] 69 | `(-prefer '~f 70 | ~(if (seq pats) 71 | `(alt ~pat ~@pats) 72 | pat) 73 | ~f)) 74 | 75 | (defn -filter [expr pat f] 76 | (with-meta (list `-filter expr pat f) 77 | {::a/head-fail (-> pat meta ::a/head-fail)})) 78 | 79 | (defmacro filter [f pat] 80 | `(-filter '~f ~pat ~f)) 81 | 82 | (defn unambiguous 83 | ([pat] 84 | (with-meta (list `unambiguous pat) 85 | {::a/head-fail (-> pat meta ::a/head-fail)})) 86 | ([pat & pats] 87 | (unambiguous (apply alt pat pats)))) 88 | 89 | (defn scope 90 | ([pat] 91 | (with-meta (list `scope pat) 92 | {::a/head-fail (-> pat meta ::a/head-fail)})) 93 | ([pat & pats] 94 | (scope (apply cat pat pats)))) 95 | 96 | (defn -dispatch [pat body f] 97 | (with-meta (list `-dispatch pat body f) 98 | {::a/head-fail (-> pat meta ::a/head-fail)})) 99 | 100 | (defmacro dispatch [pat & body] 101 | `(-dispatch ~pat '~body (fn [~'%] ~@body))) 102 | 103 | (defn resolve 104 | "Retreives the bound value of a qualified keyword in the current parsing 105 | environment, or nil." 106 | [id] 107 | (assert (qualified-keyword? id)) 108 | (some-> (gll/env id) first val)) 109 | 110 | (defmacro bind! 111 | "Changes the parsing environment to bind the qualified keyword id to resolve 112 | to val. Any prior binding for id is removed. If val has unstable value 113 | identity (such as a freshly allocated lexical closure), a stable key can 114 | be supplied to prevent the parser from going in to an infinite loop. 115 | If the current binding has the same key, body is not evaluated and the 116 | binding environment remains unchanged." 117 | ([id val] 118 | `(let [id# ~id 119 | val# ~val] 120 | (bind! id# val# val#))) 121 | ([id key & body] 122 | `(let [id# ~id 123 | key# ~key 124 | binding# (first (gll/env id#))] 125 | (assert (qualified-keyword? ~id)) 126 | (when (or (nil? binding#) (not= (key binding#) key#)) 127 | (change! gll/env assoc id# {key# (do ~@body)})) 128 | nil))) 129 | 130 | (defmacro add! 131 | "Extends the parsing environment with an additional alt pattern for var. 132 | Replaces existing extensions with the same var and key. See bind! for a 133 | warning about environment binding keys." 134 | [var key & body] 135 | `(let [v# ~var] 136 | (assert (var? v#)) 137 | (change! gll/env update-in [v# ~key] #(doto (or % (do ~@body)) assert)) 138 | nil)) 139 | 140 | (defn del! [var key] 141 | (assert (var? var)) 142 | (change! gll/env update var dissoc key) 143 | nil) 144 | 145 | (defmacro extend-env 146 | "Executes body in the given parsing environment, returning the changed env." 147 | [env & body] 148 | `(binding [gll/env ~env] 149 | ~@body 150 | gll/env)) 151 | 152 | (defmacro build-env 153 | "Like extend-env with an empty environment." 154 | [& body] 155 | `(extend-env {} ~@body)) 156 | 157 | (defn fail! 158 | ([msg] (fail! msg {})) 159 | ([msg data] 160 | (throw (ex-info msg (assoc data ::a/failure true))))) 161 | 162 | 163 | ;;; Execution. 164 | 165 | (defn parses 166 | "Returns a lazy-seq of successful parses or a failure. 167 | 168 | Options: 169 | :unique Produces an ambiguity error if there are multiple successful parses. 170 | :fuel Limits the number of steps to perform before giving up. 171 | :viz Set to true to pop open a debug visualization of the parser network. 172 | :env Initial environment." 173 | ([pat s] (parses pat s {})) 174 | ([pat s opts] 175 | (gll/run pat s opts))) 176 | 177 | (defn parse 178 | "Returns a pair of an unambiguous parse and a failure. 179 | Only one will be non-nil. See parses." 180 | ([pat s] (parse pat s {})) 181 | ([pat s opts] 182 | (let [ps (parses pat s (assoc opts :unique true))] 183 | (if (seq? ps) 184 | [(first ps) nil] 185 | [nil ps])))) 186 | 187 | (defn parse! 188 | "Returns the success result of parse or throws the failure." 189 | ([pat s] (parse! pat s {})) 190 | ([pat s opts] 191 | (let [[p err] (parse pat s opts)] 192 | (if err 193 | (throw (ex-info "Parse failed" err)) 194 | p)))) 195 | 196 | 197 | ;;; Library. 198 | 199 | (def digit 200 | (pred #(and (char? %) (Character/isDigit ^Character %)))) 201 | 202 | (def alpha 203 | (pred #(and (char? %) (Character/isLetter ^Character %)))) 204 | 205 | (defn length [t] 206 | (- (-> t ::end :index) (-> t ::begin :index))) 207 | 208 | (defn longest [pat] 209 | (prefer (comparator-key length) pat)) 210 | 211 | (defn greedy [pat] 212 | (prefer (comparator-key count) pat)) 213 | 214 | (defmacro remove [f pat] 215 | `(-filter '~(list 'comp 'not f) ~pat (comp not ~f))) 216 | 217 | (defn nested-at? [f t] 218 | (= (-> t ::elements f ::structure) (::structure t))) 219 | 220 | (defn nested? [t] 221 | (let [s (::structure t)] 222 | (some #(= (::structure %) s) (::elements t)))) 223 | 224 | (defn nested-left? [t] 225 | (nested-at? first t)) 226 | 227 | (defn nested-right? [t] 228 | (nested-at? peek t)) 229 | 230 | (defn left [pat] 231 | (remove nested-right? pat)) 232 | 233 | (defn right [pat] 234 | (remove nested-left? pat)) 235 | 236 | ;;TODO: Test this. 237 | (defn flat [pat] 238 | (remove nested? pat)) 239 | 240 | (defn interpose* [sep elem] 241 | (rule (? (cat (label ::c/first elem) (label ::c/rest (* (cat sep elem))))) 242 | (if (-> % ::value seq) 243 | (into [(::c/first %)] (->> % ::c/rest (map second))) 244 | []))) 245 | 246 | (defn cons [x seq] 247 | (rule (cat (label ::c/first x) (label ::c/rest seq)) 248 | (list* (::c/first %) (::c/rest %)))) 249 | -------------------------------------------------------------------------------- /src/ambiparse/gll.clj: -------------------------------------------------------------------------------- 1 | (ns ambiparse.gll 2 | (:refer-clojure :exclude [send]) 3 | (:require [clojure.spec :as s] 4 | [clojure.set :as set] 5 | [ambiparse.util :refer :all] 6 | [ambiparse.viz :as viz])) 7 | 8 | (create-ns 'ambiparse) 9 | (alias 'a 'ambiparse) 10 | 11 | ;;; Glossary: 12 | ;;; i = index in to source string 13 | ;;; b = begin index of span 14 | ;;; e = end index of span 15 | ;;; pat = pattern 16 | ;;; k = key of node (pair of i and pat) 17 | ;;; n = node 18 | ;;; t = tree 19 | ;;; c = char (or other atomic terminal) 20 | ;;; s = string (ie. sequence of terminals) 21 | ;;; src = source key of edge 22 | ;;; dst = destination key of edge 23 | ;;; d = decorator attached to edges 24 | 25 | ;;; Essential state. 26 | (def ^:dynamic input) 27 | (def ^:dynamic graph) 28 | (def ^:dynamic queue) 29 | (def ^:dynamic buffered) 30 | (def ^:dynamic root) 31 | (def ^{:dynamic true 32 | :doc "Vector mapping line minus one to index of previous newline."} 33 | breaks) 34 | (def ^{:dynamic true 35 | :doc "Furthest index into the input examined."} 36 | traveled) 37 | 38 | ;;; Debug state. 39 | (def trace false) 40 | (def ^{:dynamic true 41 | :doc "Steps to perform before giving up. Set to 0 to disable."} 42 | fuel) 43 | 44 | (defmacro log [& xs] 45 | (require 'fipp.edn) 46 | (when trace 47 | `(fipp.edn/pprint (list ~@xs) {:width 200}))) 48 | 49 | (defn state [] 50 | {:input input 51 | :graph graph 52 | :root root 53 | :queue queue 54 | :fuel fuel}) 55 | 56 | (defrecord Context [^long i, env]) 57 | 58 | (defn context? [x] 59 | (instance? Context x)) 60 | 61 | (defrecord Key [pat, ^Context ctx]) 62 | 63 | (defn key? [x] 64 | (instance? Key x)) 65 | 66 | (s/def ::token any?) 67 | (s/def ::pos (s/keys :req-un [::index] :opt-un [::line ::column ::token])) 68 | (s/def ::line nat-int?) 69 | (s/def ::column nat-int?) 70 | (s/def ::index nat-int?) 71 | 72 | (s/def ::pattern some?) 73 | 74 | (s/def ::var (s/or :var var? :kw qualified-keyword?)) 75 | (s/def ::env (s/every-kv ::var (s/every-kv any? ::pattern, :kind map?))) 76 | 77 | (s/def ::span (s/cat :begin ::index :end ::index)) 78 | 79 | (s/def ::a/begin ::pos) 80 | (s/def ::a/end ::pos) 81 | (s/def ::a/children (s/every ::tree :kind vector?)) 82 | (s/def ::a/pattern ::pattern) 83 | (s/def ::a/matched (s/every-kv ::pattern (s/every ::span :kind set?))) 84 | (s/def ::a/structure ::pattern) 85 | (s/def ::a/elements (s/every ::tree :kind vector?)) 86 | (s/def ::a/env ::env) 87 | (s/def ::a/continue (s/every ::pattern)) 88 | 89 | (s/def ::passed 90 | (s/keys :req [::a/begin ::a/end ::a/value ::a/env] 91 | :opt [::a/children ::a/structure])) 92 | 93 | (s/def ::tree 94 | (s/merge ::passed 95 | (s/keys :req [::a/pattern] 96 | :opt [::a/elements ::a/continue ::a/matched]))) 97 | 98 | (defn scan-breaks [^long i] 99 | (let [lt (long traveled)] 100 | (when (< lt i) 101 | (doseq [^long n (range lt (min (inc i) (dec (count input))))] 102 | (when (and (= (nth input i) \newline) 103 | breaks 104 | (< (peek breaks) n)) 105 | (change! breaks conj n))) 106 | (set! traveled i)))) 107 | 108 | (defn input-at [^long i] 109 | (if (< i (count input)) 110 | (do (scan-breaks i) 111 | (nth input i)) 112 | ::a/eof)) 113 | 114 | (defn pos-at [^long i] 115 | (scan-breaks i) 116 | (if breaks 117 | ;;TODO: Binary search? 118 | (reduce-kv (fn [res, ^long n, ^long b] 119 | (if (<= i b) 120 | (reduced res) 121 | {:index i :line (inc n) :column (- i b -1)})) 122 | {:index i :line 1 :column (inc i)} 123 | breaks) 124 | {:index i :token (input-at i)})) 125 | 126 | (defn node-path [^Key k] 127 | (let [^Context ctx (.ctx k) 128 | i (.i ctx)] 129 | [i k])) 130 | 131 | (defn get-node [^Key k] 132 | (get-in graph (node-path k))) 133 | 134 | (defn rightmost [kw xs] 135 | ;XXX return _all_ rightmost, otherwise nested alts mask other alts. 136 | (when (seq xs) 137 | (apply max-key #(-> % kw :index) xs))) 138 | 139 | (defn rightmost-received [k] 140 | (rightmost ::a/end (-> k get-node :received))) 141 | 142 | (defn classify [pat] 143 | (cond 144 | (sequential? pat) (first pat) 145 | (= pat 'ambiparse/eof) 'ambiparse/eof 146 | :else (class pat))) 147 | 148 | ;; Fully re-create multimethods for dev sanity. 149 | (doseq [sym '[init passed -failure]] 150 | (ns-unmap *ns* sym)) 151 | 152 | ;; For each of these, (= (Key. pat ctx) k) 153 | ;; They are separated out for ease of type hinting and field access. 154 | 155 | (defmulti init 156 | "Called when a parse node for a given key is first created." 157 | (fn [pat ctx k] 158 | (classify pat))) 159 | 160 | (defmulti passed 161 | "Tells actor at (Key. pat ctx) about a successful sub-parse." 162 | (fn [pat ctx k t] 163 | (classify pat))) 164 | 165 | (defmulti -failure 166 | "Asks actor at (Key. pat ctx) for a failure." 167 | (fn [pat ctx k] 168 | (classify pat))) 169 | 170 | (defn errors-at [i & errs] 171 | {:pos (pos-at i) 172 | :errors (->> errs (remove nil?) set)}) 173 | 174 | (s/def ::expected any?) 175 | (s/def ::predicate ifn?) 176 | (s/def ::message string?) 177 | (s/def ::exception #(instance? Exception %)) 178 | (s/def ::error 179 | (s/keys :opt-un [::expected ::message ::predicate 180 | ::expression ::exception ::data])) 181 | (s/def ::errors (s/every ::error :kind set?)) 182 | (s/def ::failure (s/keys :req-un [::pos ::errors])) 183 | 184 | (def ^:dynamic inside) 185 | 186 | ;;TODO: Extend failure objects to include both begin and end instead of just 187 | ;; pos. Begin is where to report error, end how far parser got for sake of 188 | ;; finding the "best" error. 189 | (s/fdef failure 190 | :args (s/alt :root (s/cat) :specific (s/cat :k key?)) 191 | :ret (s/nilable ::failure)) 192 | 193 | (defn failure 194 | ([] 195 | (binding [inside #{}] 196 | (failure root))) 197 | ([^Key k] 198 | (let [^Context ctx (.ctx k) 199 | n (get-node k)] 200 | (when-not (inside k) 201 | (binding [inside (conj inside k)] 202 | ;;XXX Let individual -failure methods handle :exception so 203 | ;; that they can include :expr or :body in the error. 204 | (if-let [^Exception ex (:exception n)] 205 | (errors-at (or (-> (rightmost-received k) ::a/end :index) 206 | (.i ctx)) 207 | (if (-> ex ex-data ::a/failure) 208 | {:message (.getMessage ex) 209 | :data (-> ex ex-data (dissoc ::a/failure))} 210 | {:exception ex})) 211 | (-failure (.pat k) ctx k))))))) 212 | 213 | (defn send [msg] 214 | ;(log 'send msg) 215 | (change! queue conj msg) 216 | nil) 217 | 218 | (defn head-fail [i pat] 219 | (cond 220 | (char? pat) (not= (input-at i) pat) 221 | (= pat 'ambiparse/eof) (not= i (count input)) 222 | :else (when-let [f (-> pat meta ::a/head-fail)] 223 | (f (input-at i))))) 224 | 225 | (s/fdef add-node 226 | :args (s/cat :pat ::pattern, :ctx context?)) 227 | 228 | (defn add-node [pat, ^Context ctx] 229 | (when-not (head-fail (.i ctx) pat) 230 | (let [k (Key. pat ctx)] 231 | (when-not (get-node k) 232 | (change! graph assoc-in (node-path k) {}) 233 | (send [:init k])) 234 | k))) 235 | 236 | (s/def ::prefix ::passed) 237 | (s/def ::continue (s/nilable (s/or :cat (s/every ::pattern :kind seq?) 238 | :dispatch #{:dispatch}))) 239 | 240 | (s/def ::decorator 241 | (s/keys :req-un [::prefix] 242 | :opt-un [::continue ::dispatch])) 243 | 244 | (s/fdef decorate 245 | :args (s/cat :t ::tree, ::d (s/nilable ::decorator)) 246 | :ret ::tree) 247 | 248 | (defn decorate 249 | "Applies a transformation to trees flowing along an edge." 250 | [t {:as d :keys [prefix continue dispatch]}] 251 | {:post [(s/assert ::tree %)]} 252 | (if d 253 | (merge prefix t 254 | {::a/begin (::a/begin prefix) 255 | ::a/end (::a/end t) 256 | ::a/children (conj (::a/children prefix) t) 257 | ::a/value (conj (::a/value prefix) (::a/value t)) 258 | ::a/elements (conj (::a/elements prefix) t)} 259 | (when continue 260 | {::a/continue continue})) 261 | t)) 262 | 263 | (s/fdef add-edge 264 | :args (s/cat :pat some? 265 | :ctx context? 266 | :dst key? 267 | :d (s/nilable ::decorator))) 268 | 269 | (defn add-edge [pat ctx dst d] 270 | (when-let [k (add-node pat ctx)] 271 | (let [n (get-node k)] 272 | (when-not (get-in n [:edges dst d]) 273 | (change! graph update-in (conj (node-path k) :edges dst) conjs d) 274 | ;; Replay previously generated parses. 275 | (doseq [t (:generated n)] 276 | (send [:pass dst (decorate t d)])))))) 277 | 278 | (s/fdef pass 279 | :args (s/cat :k key?, :t ::passed)) 280 | 281 | (defn pass [{:keys [pat ctx] :as k} t] 282 | (let [v (::a/value t) 283 | t (assoc t ::a/pattern pat) 284 | n (get-node k)] 285 | (when-not (get-in n [:generated t]) 286 | (change! graph update-in (conj (node-path k) :generated) conjs t) 287 | (doseq [[dst ds] (:edges n) 288 | d ds] 289 | (send [:pass dst (decorate t d)]))))) 290 | 291 | (defn pass-child [k t] 292 | (pass k (assoc t ::a/children [t]))) 293 | 294 | (defn empty-in [^Context ctx] 295 | (let [p (pos-at (.i ctx))] 296 | {::a/begin p 297 | ::a/end p 298 | ::a/children [] 299 | ::a/elements [] 300 | ::a/value [] 301 | ::a/env (.env ctx)})) 302 | 303 | (defn report-ex [k ex] 304 | (when-not (-> ex ex-data ::a/failure) 305 | (binding [*out* *err*] 306 | #_(prn 'catch-at k ex))) ;XXX Remove me after implementing error recovery. 307 | (change! graph update-in (conj (node-path k) :exception) #(or % ex)) 308 | nil) 309 | 310 | (def ^:dynamic env) 311 | 312 | (defmacro try-at [k & body] 313 | `(let [k# ~k] 314 | (try 315 | ~@body 316 | (catch ~'Exception ex# 317 | (report-ex k# ex#))))) 318 | 319 | ;;; Terminals. 320 | 321 | (defn lit-init [^long i, c, {:keys [ctx] :as k}] 322 | (let [x (input-at i)] 323 | (when (= x c) 324 | (pass k {::a/begin (pos-at i) 325 | ::a/end (pos-at (inc i)) 326 | ::a/value x 327 | ::a/env (.env ^Context ctx)})))) 328 | 329 | (defn lit-failure [^long i, c] 330 | (when (not= (input-at i) c) 331 | (errors-at i {:expected c}))) 332 | 333 | (defmethod init 'ambiparse/lit [[_ c], ^Context ctx, k] 334 | (lit-init (.i ctx) c k)) 335 | 336 | (defmethod -failure 'ambiparse/lit [[_ c], ^Context ctx, k] 337 | (lit-failure (.i ctx) c)) 338 | 339 | (defmethod init java.lang.Character [c, ^Context ctx, k] 340 | (lit-init (.i ctx) c k)) 341 | 342 | (defmethod -failure java.lang.Character [c, ^Context ctx, k] 343 | (lit-failure (.i ctx) c)) 344 | 345 | (defmethod init java.lang.String [s, ^Context ctx, k] 346 | (let [i (.i ctx)] 347 | (loop [n 0] 348 | (if (< n (count s)) 349 | (when (= (input-at (+ i n)) (nth s n)) 350 | (recur (inc n))) 351 | (pass k {::a/begin (pos-at i) 352 | ::a/end (pos-at (+ i n)) 353 | ::a/value s 354 | ::a/env (.env ctx)}))))) 355 | 356 | (defmethod -failure java.lang.String [s, ^Context ctx, k] 357 | (let [i (.i ctx)] 358 | (loop [n 0] 359 | (cond 360 | (= n (count s)) (errors-at (+ i n) {:expected ::a/eof}) 361 | (= (input-at (+ i n)) (nth s n)) (recur (inc n)) 362 | :else (let [actual (subs input i (min (count input) (+ i (count s))))] 363 | (errors-at (+ i n) {:expected s :actual actual})))))) 364 | 365 | (defmethod init 'ambiparse/-pred [[_ _ f], ^Context ctx, k] 366 | (let [i (.i ctx) 367 | x (input-at i)] 368 | (when (try-at k (binding [env (.env ctx)] (f x))) 369 | (pass k {::a/begin (pos-at i) 370 | ::a/end (pos-at (inc i)) 371 | ::a/value x 372 | ::a/env (.env ctx)})))) 373 | 374 | (defmethod -failure 'ambiparse/-pred [[_ expr f], ^Context ctx, k] 375 | (let [i (.i ctx) 376 | x (input-at i)] 377 | (when-not (f x) 378 | (errors-at i {:message "Predicate failed" 379 | :predicate f 380 | :expression expr})))) 381 | 382 | (defmethod init 'ambiparse/eof [_, ^Context ctx, k] 383 | (let [i (.i ctx)] 384 | (when (= (count input) i) 385 | (let [pos (pos-at i)] 386 | (pass k {::a/begin pos 387 | ::a/end pos 388 | ::a/value ::a/eof 389 | ::a/env (.env ctx)}))))) 390 | 391 | (defmethod -failure 'ambiparse/eof [_, ^Context ctx, k] 392 | (let [i (.i ctx)] 393 | (when (not= i (count input)) 394 | (errors-at i {:expected ::a/eof})))) 395 | 396 | 397 | ;;; Concatenation. 398 | 399 | (defn do-cat [t, ^Context ctx, ^Key k, pats] 400 | (if-let [[p & ps] (seq pats)] 401 | (let [i (-> t ::a/end :index) 402 | env (::a/env t) 403 | d {:prefix t :continue ps}] 404 | (add-edge p (Context. i env) k d)) 405 | (pass k (assoc t ::a/structure (.pat k))))) 406 | 407 | (defmethod init 'ambiparse/cat [[_ & pats], ^Context ctx, k] 408 | (do-cat (empty-in ctx) ctx k pats)) 409 | 410 | (defmethod passed 'ambiparse/cat [_ ctx k t] 411 | (do-cat (dissoc t ::a/continue) ctx k (::a/continue t))) 412 | 413 | (defmethod -failure 'ambiparse/cat 414 | [[_ & pats], ^Context ctx, k] 415 | (let [[pat ctx] (if-let [t (rightmost-received k)] 416 | (let [pats (-> t ::a/continue seq) 417 | i (-> t ::a/end :index) 418 | env (::a/env t)] 419 | [(first pats) (Context. i env)]) 420 | [(first pats) ctx])] 421 | (or (and pat (failure (Key. pat ctx))) 422 | (errors-at (:i ctx) {:expected ::a/eof})))) 423 | 424 | 425 | ;;; Dispatch. 426 | 427 | (defmethod init 'ambiparse/-dispatch 428 | [[_ pat _ _] ctx k] 429 | (add-edge pat ctx k {:prefix (empty-in ctx) 430 | :continue :dispatch})) 431 | 432 | (defmethod passed 'ambiparse/-dispatch 433 | [[_ pat _ f], ^Context ctx, ^Key k, t] 434 | (if (::a/continue t) 435 | (let [t (dissoc t ::a/continue) 436 | i (-> t ::a/end :index) 437 | env (::a/env t)] 438 | (when-let [pat (try-at k 439 | (binding [env (.env ctx)] 440 | (-> t ::a/elements first f)))] 441 | (add-edge pat (Context. i env) k {:prefix t}))) 442 | (let [t (-> t 443 | (update ::a/value last) 444 | (assoc ::a/structure (.pat k)))] 445 | (pass k t)))) 446 | 447 | (defmethod -failure 'ambiparse/-dispatch 448 | [[_ pat _ f], ^Context ctx, k] 449 | (or (failure (Key. pat ctx)) 450 | (if-let [t (rightmost-received k)] 451 | (let [i (-> t ::a/end :index) 452 | x (try 453 | (binding [env (.env ctx)] 454 | (-> t ::a/elements first f)) 455 | (catch Exception ex 456 | ex))] 457 | (if (instance? Exception x) 458 | (errors-at i {:exception x}) 459 | (failure (Key. x (Context. i (.env ctx))))))))) 460 | 461 | 462 | ;;; Alternation. 463 | 464 | (defn init-alt [^Key k, pats] 465 | (doseq [pat pats] 466 | (add-edge pat (.ctx k) k nil))) 467 | 468 | (defmethod init 'ambiparse/alt 469 | [[_ & pats] _ k] 470 | (init-alt k pats)) 471 | 472 | (defmethod passed 'ambiparse/alt [_ _ k t] 473 | (pass-child k t)) 474 | 475 | (defn alt-failure 476 | [^Context ctx, pats] 477 | (let [failures (->> pats (map #(failure (Key. % ctx))) (remove nil?)) 478 | pos (->> failures (rightmost :pos) :pos) 479 | failures (filter #(= (:pos %) pos) failures)] 480 | (cond 481 | (next failures) (apply errors-at (.i ctx) 482 | (apply set/union (map :errors failures))) 483 | (seq failures) (first failures)))) 484 | 485 | (defmethod -failure 'ambiparse/alt [[_ & pats] ctx k] 486 | (alt-failure ctx pats)) 487 | 488 | 489 | ;;; Repetition. 490 | 491 | (defn non-rec [pat t] 492 | (let [b (-> t ::a/begin :index) 493 | e (-> t ::a/end :index) 494 | span [b e]] 495 | (when-not (get-in t [::a/matched pat span]) 496 | (update-in t [::a/matched pat] conjs span)))) 497 | 498 | (defn do-rep [[_ pat :as p] ctx k t] 499 | (pass k t) 500 | (when-let [t (non-rec pat t)] 501 | (let [b (-> t ::a/begin :index) 502 | e (-> t ::a/end :index)] 503 | (when (< b e) 504 | (add-edge pat (Context. e (::a/env t)) k {:prefix t}))))) 505 | 506 | (defmethod init 'ambiparse/* [[_ pat] ctx k] 507 | (let [t (empty-in ctx)] 508 | (pass k t) 509 | (add-edge pat ctx k {:prefix t}))) 510 | 511 | (defmethod init 'ambiparse/+ [[_ pat] ctx k] 512 | (add-edge pat ctx k {:prefix (empty-in ctx)})) 513 | 514 | (defmethod passed 'ambiparse/* [pat ctx k t] 515 | (do-rep pat ctx k t)) 516 | 517 | (defmethod passed 'ambiparse/+ [pat ctx k t] 518 | (do-rep pat ctx k t)) 519 | 520 | (defn rep-failure [pat, ^Context ctx, k] 521 | (let [[e env] (if-let [t (rightmost-received k)] 522 | [(-> t ::a/end :index) (::a/env t)] 523 | [(.i ctx) (.env ctx)])] 524 | (failure (Key. pat (Context. e env))))) 525 | 526 | (defmethod -failure 'ambiparse/* [[_ pat], ^Context ctx, k] 527 | (rep-failure pat ctx k)) 528 | 529 | (defmethod -failure 'ambiparse/+ [[_ pat] ctx k] 530 | (or (failure (Key. pat ctx)) 531 | (rep-failure pat ctx k))) 532 | 533 | 534 | ;;; Optional. 535 | 536 | (defmethod init 'ambiparse/? [[_ pat] ctx k] 537 | (let [t (assoc (empty-in ctx) ::a/value nil)] 538 | (pass k t) 539 | (add-edge pat ctx k nil))) 540 | 541 | (defmethod passed 'ambiparse/? [pat ctx k t] 542 | (pass k t)) 543 | 544 | (defmethod -failure 'ambiparse/? [[_ pat], ^Context ctx, k] 545 | (failure (Key. pat ctx))) 546 | 547 | 548 | ;;; Transformation. 549 | 550 | (defmethod init 'ambiparse/-rule 551 | [[_ pat _ f] ctx k] 552 | (add-edge pat ctx k nil)) 553 | 554 | (defmethod passed 'ambiparse/-rule 555 | [[_ pat _ f], ^Context ctx, k, t] 556 | (binding [env (::a/env t)] 557 | (when-let [t* (try-at k (f t))] 558 | (let [t* (assoc t* ::a/env env)] 559 | (pass-child k t*))))) 560 | 561 | (defmethod -failure 'ambiparse/-rule 562 | [[_ pat body _] ctx k] 563 | (failure (Key. pat ctx))) 564 | 565 | 566 | ;;; Labeling. 567 | 568 | (defmethod init 'ambiparse/label 569 | [[_ _ pat] ctx k] 570 | (add-edge pat ctx k nil)) 571 | 572 | (defn strip-labels [t] 573 | (->> t 574 | (filter (fn [[k v]] 575 | (and (keyword? k) 576 | (= (namespace k) "ambiparse")))) 577 | (into {}))) 578 | 579 | (defmethod passed 'ambiparse/label 580 | [[_ name pat] ctx k t] 581 | (pass-child k (assoc (strip-labels t) name (::a/value t)))) 582 | 583 | (defmethod -failure 'ambiparse/label 584 | [[_ _ pat] ctx k] 585 | (failure (Key. pat ctx))) 586 | 587 | 588 | ;;; Indirection. 589 | 590 | (defn var-alts [var, ^Context ctx] 591 | (-> (get (.env ctx) var) 592 | vals 593 | (conj @var))) 594 | 595 | (defmethod init clojure.lang.Var 596 | [pat ctx k] 597 | (init-alt k (var-alts pat ctx))) 598 | 599 | (defmethod passed clojure.lang.Var 600 | [pat ctx k t] 601 | (when-let [t (non-rec pat t)] 602 | (pass-child k t))) 603 | 604 | (defmethod -failure clojure.lang.Var 605 | [pat ctx k] 606 | ;;TODO: Include var in failure somehow. 607 | (alt-failure ctx (var-alts pat ctx))) 608 | 609 | 610 | ;;; Precedence. 611 | 612 | (defmethod init 'ambiparse/-prefer 613 | [[_ _ pat cmp] ctx k] 614 | (add-edge pat ctx k nil)) 615 | 616 | (defmethod passed 'ambiparse/-prefer 617 | [[_ _ pat cmp], ^Context ctx, k, t] 618 | (let [{:keys [buffer]} (get-node k) 619 | buffer* (try-at k 620 | (binding [env (.env ctx)] 621 | (cond 622 | ;; First parse. 623 | (empty? buffer) #{t} 624 | ;; Strictly preferrable. 625 | (every? #(neg? (cmp % t)) buffer) #{t} 626 | ;; Not strictly less preferrable. 627 | ;;TODO: Compare in one pass over buffer. 628 | (some #(zero? (cmp % t)) buffer) (conjs buffer t))))] 629 | (when buffer* 630 | (change! graph assoc-in (conj (node-path k) :buffer) buffer*) 631 | (change! buffered conj k)))) 632 | 633 | (defmethod -failure 'ambiparse/-prefer 634 | [[_ _ pat] ctx k] 635 | (failure (Key. pat ctx))) 636 | 637 | 638 | ;;; Filtering. 639 | ;;TODO: remove seems more common - better primative? 640 | 641 | (defmethod init 'ambiparse/-filter 642 | [[_ _ pat _] ctx k] 643 | (add-edge pat ctx k nil)) 644 | 645 | (defmethod passed 'ambiparse/-filter 646 | [[_ _ _ f], ^Context ctx, k, t] 647 | (when (try-at k (binding [env (.env ctx)] (f t))) 648 | (pass-child k t))) 649 | 650 | (defmethod -failure 'ambiparse/-filter 651 | [[_ expr pat f], ^Context ctx, k] 652 | (if-let [rs (-> k get-node :received)] 653 | (errors-at (.i ctx) {:message "Filter predicate failed" 654 | :predicate f 655 | :expression expr 656 | :candidates rs}) 657 | (failure (Key. pat ctx)))) 658 | 659 | 660 | ;;; Adaptation. 661 | 662 | (defmethod init 'ambiparse/scope 663 | [[_ pat] ctx k] 664 | (add-edge pat ctx k nil)) 665 | 666 | (defmethod passed 'ambiparse/scope 667 | [[_ pat], ^Context ctx, k, t] 668 | (pass-child k (assoc t ::a/env (.env ctx)))) 669 | 670 | (defmethod -failure 'ambiparse/scope 671 | [[_ pat] ctx k] 672 | (failure (Key. pat ctx))) 673 | 674 | 675 | ;;; Ambiguity Control. 676 | 677 | (defmethod init 'ambiparse/unambiguous 678 | [[_ pat] ctx k] 679 | (add-edge pat ctx k nil)) 680 | 681 | (defmethod passed 'ambiparse/unambiguous 682 | [[_ pat], ^Context ctx, k, t] 683 | (let [{:keys [received]} (get-node k) 684 | buffer (if (pos? (count received)) #{} #{t})] 685 | (change! graph assoc-in (conj (node-path k) :buffer) buffer) 686 | (change! buffered conj k))) 687 | 688 | (defmethod -failure 'ambiparse/unambiguous 689 | [[_ pat], ^Context ctx, k] 690 | (let [rs (-> k get-node :received)] 691 | (case (count rs) 692 | 0 (failure (Key. pat ctx)) 693 | 1 nil 694 | (errors-at (.i ctx) {:message "Ambiguous" :candidates rs})))) 695 | 696 | 697 | ;;; Execution. 698 | 699 | (defn exec [[op ^Key k & args :as msg]] 700 | (log 'exec msg) 701 | (case op 702 | :init (init (.pat k) (.ctx k) k) 703 | :pass (let [[{v ::a/value, :as t}] args 704 | n (get-node k)] 705 | (when-not (get-in n [:received t]) 706 | (passed (.pat k) (.ctx k) k t) 707 | (change! graph update-in (conj (node-path k) :received) 708 | conjs t))))) 709 | 710 | (defn pump [] 711 | (log 'pump) 712 | ;; Execute queued messages. 713 | (let [q queue] 714 | (set! queue []) 715 | (doseq [msg q] 716 | (when (zero? (change! fuel dec)) 717 | (throw (Exception. "out of fuel!"))) 718 | (exec msg))) 719 | ;; When subgraphs quiesce, flush buffers. 720 | (when (empty? queue) 721 | (log 'quiescence) 722 | (when-let [q (seq buffered)] 723 | (set! buffered #{}) 724 | (doseq [k q 725 | :let [n (get-node k)] 726 | :when (not (:exception n)) 727 | t (:buffer n)] 728 | (pass-child k t) 729 | ;;XXX clear the buffer! 730 | )))) 731 | 732 | (defn run [pat s opts] 733 | ;;TODO: Generate parses lazily. 734 | (binding [input s 735 | root (Key. pat (Context. 0 (:env opts {}))) 736 | graph [] 737 | queue [] 738 | buffered #{} 739 | breaks (when (string? s) [0]) 740 | traveled 0 741 | fuel (opts :fuel 0)] 742 | (try 743 | (let [{:keys [pat ctx]} root] 744 | (add-node pat ctx)) 745 | (while (seq queue) 746 | (pump)) 747 | (finally 748 | (log 'final-state= (state)) 749 | (when (:viz opts) 750 | (viz/show! (state))))) 751 | (let [ps (->> root get-node :generated 752 | (remove #(< (-> % ::a/end :index) (count s))) 753 | (map ::a/value) 754 | distinct)] 755 | (cond 756 | (next ps) (if (:unique opts) 757 | (errors-at 0 {:message "Ambiguous" :parses (take 2 ps)}) 758 | ps) 759 | (seq ps) ps 760 | :else (or (failure) (throw (Exception. "Unknown parse failure"))))))) 761 | -------------------------------------------------------------------------------- /src/ambiparse/util.clj: -------------------------------------------------------------------------------- 1 | (ns ambiparse.util) 2 | 3 | (defmacro change! [v f & args] 4 | `(set! ~v (~f ~v ~@args))) 5 | 6 | (defn compare-key [f x y] 7 | (compare (f x) (f y))) 8 | 9 | (defn comparator-key [f] 10 | (comparator #(compare-key f %1 %2))) 11 | 12 | (def conjs (fnil conj #{})) 13 | -------------------------------------------------------------------------------- /src/ambiparse/viz.clj: -------------------------------------------------------------------------------- 1 | (ns ambiparse.viz 2 | (:require [dorothy.core :as d] 3 | [fipp.edn :refer [pprint]])) 4 | 5 | (create-ns 'ambiparse) 6 | (alias 'a 'ambiparse) 7 | 8 | (defn pps [x] 9 | (with-out-str (pprint x {:width 30}))) 10 | 11 | ;;TODO: Cleanup and expose this? 12 | (defn unform [pat] 13 | (if (sequential? pat) 14 | (case (first pat) 15 | ambiparse/lit (list 'a/lit (second pat)) 16 | ambiparse/-pred (list 'a/pred (second pat)) 17 | ambiparse/cat (list* 'a/cat (map unform (next pat))) 18 | ambiparse/alt (list* 'a/alt (map unform (next pat))) 19 | ambiparse/label (list 'a/label (second pat) (-> pat (nth 2) unform)) 20 | ambiparse/-rule (list 'a/rule (-> pat second unform) (nth pat 2)) 21 | ambiparse/+ (list 'a/+ (-> pat second unform)) 22 | ambiparse/* (list 'a/* (-> pat second unform)) 23 | ambiparse/? (list 'a/? (-> pat second unform)) 24 | ambiparse/-filter (list 'a/filter (second pat) (-> pat (nth 2) unform)) 25 | ambiparse/-prefer (list 'a/prefer (second pat) (-> pat (nth 2) unform)) 26 | ambiparse/scope (list 'a/scope (second pat)) 27 | ambiparse/-dispatch (list* 'a/dispatch (-> pat second unform) 28 | (nth pat 2)) 29 | ambiparse/unambiguous (list 'a/unambiguous (-> pat second unform))) 30 | pat)) 31 | 32 | (defn edge-label [x] 33 | (if x 34 | (str (-> x :prefix ::a/begin :index) " - " (-> x :prefix ::a/end :index) "\n" 35 | "pre: " (-> x :prefix ::a/value pps) 36 | (when (-> x :prefix ::a/env seq) 37 | (str "env: " (-> x :prefix ::a/env pps))) 38 | (when-let [cont (:continue x)] 39 | (str "cont: " (->> cont (map unform) pps)))) 40 | "")) 41 | 42 | (defn identify [ids k] 43 | (or (@ids k) 44 | (let [id (inc (count @ids))] 45 | (swap! ids assoc k id) 46 | id))) 47 | 48 | (defn node-label [{:keys [pat ctx]}] 49 | (let [{:keys [env]} ctx] 50 | (binding [*print-level* 3] 51 | (str (-> pat unform pr-str) \newline 52 | (when (seq env) 53 | (str "env: " (pps env))))))) 54 | 55 | (defn pos-node [input] 56 | (let [label (->> input count range 57 | (map #(str " " % " " (nth input %))) 58 | (interpose " | ") 59 | (apply str))] 60 | ["pos" {:shape "record" :label label}])) 61 | 62 | (defn to-dorothy [{:keys [graph input]}] 63 | (let [ids (atom {})] 64 | [(pos-node input) 65 | (for [[i ks] (map vector (range) graph) 66 | [k {:keys [edges]}] ks 67 | :let [src-id (identify ids k)]] 68 | ;; Nodes. 69 | (list [src-id {:label (node-label k)}] 70 | ;; Position edge. 71 | [(str "pos:i" i) src-id {:headlabel (str i) 72 | :style "dotted" 73 | :arrowhead "none"}] 74 | ;; Edges. 75 | (for [[dst decorators] edges 76 | :let [dst-id (identify ids dst)] 77 | decorator decorators] 78 | [src-id dst-id {:label (edge-label decorator)}])))])) 79 | 80 | (defn show! [state] 81 | (-> state to-dorothy d/digraph d/dot 82 | (d/show! {:frame :ambiparse 83 | :frame-width 1280 84 | :frame-height 800}))) 85 | -------------------------------------------------------------------------------- /test/ambiparse/adaptive_test.clj: -------------------------------------------------------------------------------- 1 | (ns ambiparse.adaptive-test 2 | (:require [clojure.test :refer :all] 3 | [ambiparse :as a])) 4 | 5 | (def Atom \a) 6 | 7 | (def Add (a/rule (a/cat \+ (a/label :x (a/pred any?))) 8 | (a/add! #'Atom (:x %) (:x %)) 9 | (str "+" (:x %)))) 10 | 11 | (def Del (a/rule (a/cat \- (a/label :x (a/pred any?))) 12 | (a/del! #'Atom (:x %)) 13 | (str "-" (:x %)))) 14 | 15 | (declare Commands) 16 | 17 | (def Block (a/rule (a/scope (a/cat \[ (a/label :xs #'Commands) \])) 18 | (:xs %))) 19 | 20 | (def Command (a/alt #'Atom Add Del Block)) 21 | 22 | (def Commands (a/interpose* \space Command)) 23 | 24 | (deftest add-del-test 25 | (are [s t] (= (a/parse! Commands s) t) 26 | "" [] 27 | "a" [\a] 28 | "a a" [\a \a] 29 | "+b b" ["+b" \b] 30 | "a +b b" [\a "+b" \b] 31 | "[] a" [[] \a] 32 | "[+b b]" [["+b" \b]] 33 | "[+b b] a" [["+b" \b] \a] 34 | "+b +c c" ["+b" "+c" \c] 35 | "+b +c b c" ["+b" "+c" \b \c] 36 | "+b +c b c -b c" ["+b" "+c" \b \c "-b" \c] 37 | ) 38 | ;;TODO: Check specific failures. 39 | (are [s] (not (seq? (a/parses Commands s))) 40 | "b" 41 | "a b" 42 | "+b c" 43 | "[+b b] b" 44 | "+b +c b c -b b" 45 | )) 46 | 47 | (deftest bind-test 48 | (let [x (a/rule \x 49 | (a/bind! ::count (inc (or (a/resolve ::count) 0))) 50 | nil) 51 | xs (a/rule (a/* x) 52 | {:env (::a/env %) 53 | :count (a/resolve ::count)}) 54 | {:keys [env count]} (a/parse! xs "xxxx") 55 | _ (is (= count 4)) 56 | {:keys [count]} (a/parse! xs "xx" {:env env}) 57 | _ (is (= count 6))] 58 | )) 59 | 60 | (comment 61 | 62 | (defn party [s] 63 | (fipp.edn/pprint (a/parse! Commands s {:viz true}))) 64 | 65 | (party "a +b b") 66 | 67 | ) 68 | -------------------------------------------------------------------------------- /test/ambiparse/benchmark.clj: -------------------------------------------------------------------------------- 1 | (ns ambiparse.benchmark 2 | (:require [clojure.java.io :as io] 3 | [ambiparse :as a] 4 | [ambiparse.edn-test :refer [Forms]]) 5 | (:import (java.io StringReader PushbackReader))) 6 | 7 | (def edn (slurp (io/resource "stuff.edn"))) 8 | 9 | (defn read-all [s] 10 | (let [r (-> s StringReader. PushbackReader.)] 11 | (->> (repeatedly #(read {:eof ::eof} r)) 12 | (take-while #(not= % ::eof)) 13 | vec))) 14 | 15 | (comment 16 | 17 | (prn 'reader) 18 | (time 19 | (dotimes [_ 100] 20 | (read-all edn))) 21 | 22 | (prn 'ambiparse) 23 | (time 24 | (dotimes [_ 10] 25 | (a/parse! Forms edn))) 26 | 27 | (subs edn 50) 28 | 29 | ) 30 | -------------------------------------------------------------------------------- /test/ambiparse/calc_test.clj: -------------------------------------------------------------------------------- 1 | (ns ambiparse.calc-test 2 | (:require [clojure.test :refer :all] 3 | [ambiparse :as a] 4 | [ambiparse.util :refer :all])) 5 | 6 | (def Digit 7 | (a/rule a/digit 8 | (- (-> % ::a/value int) (int \0)))) 9 | 10 | (def Num 11 | (a/rule (a/+ Digit) 12 | (reduce (fn [n d] 13 | (+ (* n 10) d)) 14 | 0 (::a/value %)))) 15 | 16 | (def Space 17 | (a/+ \space)) 18 | 19 | (declare Expr) 20 | 21 | (defn flat-op [op f] 22 | (a/flat (a/rule (a/cat (a/label :first #'Expr) 23 | (a/label :rest (a/* (a/cat Space #'Expr)))) 24 | (apply f (:first %) (->> % :rest (map second)))))) 25 | 26 | (defn binop [op f] 27 | (a/rule (a/cat (a/label :lhs #'Expr) Space 28 | (a/label :op op) Space 29 | (a/label :rhs #'Expr)) 30 | (f (:lhs %) (:rhs %)))) 31 | 32 | ;;XXX Use flat-op for these after fixing infinite-loop / stack-overflow. 33 | (def Plus (binop \+ +)) 34 | (def Times (binop \* *)) 35 | 36 | (def Minus (a/left (binop \- -))) 37 | (def Divide (a/left (binop \/ /))) 38 | (def Pow (a/right (binop \^ #(Math/pow %1 %2)))) 39 | 40 | (def priority 41 | {\^ 1 42 | \* 2 43 | \/ 2 44 | \+ 3 45 | \- 3}) 46 | 47 | (def Binops 48 | (a/prefer (fn [x y] (compare-key (comp priority :op) x y)) 49 | Plus Minus Times Divide Pow)) 50 | 51 | (def Expr 52 | (a/alt Num Binops)) 53 | 54 | (deftest calc-test 55 | (are [s n] (== (a/parse! Expr s) n) 56 | "5" 5 57 | "15" 15 58 | "2 + 3" 5 59 | "2 + 3 + 1" 6 60 | "2 ^ 3" 8 61 | "2 ^ 3 ^ 2" 512 62 | "2 * 4 + 3" 11 63 | "1 / 2 / 2" 0.25 64 | )) 65 | 66 | (comment 67 | (a/parse! Expr "2 + 4 + 3") 68 | ) 69 | -------------------------------------------------------------------------------- /test/ambiparse/edn_test.clj: -------------------------------------------------------------------------------- 1 | (ns ambiparse.edn-test 2 | (:require [clojure.test :refer :all] 3 | [ambiparse :as a])) 4 | 5 | ;; EDN is LL(1), so this is a poor test of capability, but a decent 6 | ;; performance baseline. Note: This is for test purposes only and will never 7 | ;; be remotely feature complete. 8 | 9 | (declare Form) 10 | 11 | (def LineComment 12 | (a/cat \; (a/* (a/pred #(not= \newline %))) \newline)) 13 | 14 | (def Space 15 | (a/+ (a/alt \space \tab \newline \, LineComment))) 16 | 17 | (def Forms 18 | (a/cat (a/? Space) 19 | (a/label :forms (a/interpose* Space #'Form)) 20 | (a/? Space))) 21 | 22 | (def Digit 23 | (a/rule a/digit 24 | (- (-> % ::a/value int) (int \0)))) 25 | 26 | (def Int 27 | (a/rule (a/+ Digit) 28 | (reduce (fn [n d] 29 | (+ (* n 10) d)) 30 | 0 (::a/value %)))) 31 | 32 | (def Num 33 | Int) 34 | 35 | (def SymbolChars 36 | (a/pred #((set ".*+!-_?$%&=<>") %))) 37 | 38 | (def BaseName 39 | (a/rule (a/cons (a/alt a/alpha SymbolChars) 40 | (a/* (a/alt a/alpha Digit SymbolChars))) 41 | (->> % ::a/value (apply str)))) 42 | 43 | (def Namespace 44 | BaseName) 45 | 46 | (def Name 47 | (a/alt BaseName "/")) 48 | 49 | (def Symbol 50 | (a/alt (a/rule (a/cat (a/label :ns Namespace) \/ (a/label :name Name)) 51 | (symbol (:ns %) (:name %))) 52 | (a/rule Name (-> % ::a/value symbol)))) 53 | 54 | (def Keyword 55 | (a/rule (a/cat \: (a/label :symbol Symbol)) 56 | (-> % :symbol keyword))) 57 | 58 | (def Str 59 | \x) ;XXX 60 | 61 | (def NamedChar 62 | (a/alt (a/rule "newline" \newline) 63 | (a/rule "return" \return) 64 | (a/rule "space" \space) 65 | (a/rule "tab" \tab))) 66 | 67 | (def Char 68 | (a/rule (a/cat \\ (a/label :char (a/alt NamedChar 69 | ;;TODO Use a negative match. 70 | (a/pred #(not= \space %))))) 71 | (:char %))) 72 | 73 | (def List 74 | (a/rule (a/cat \( Forms \)) 75 | (-> % :forms list* (or ())))) 76 | 77 | (def Map 78 | (a/rule (a/cat \{ Forms \}) 79 | (->> % :forms (partition 2) (map vec) (into {})))) 80 | 81 | (def Vector 82 | (a/rule (a/cat \[ Forms \]) 83 | (-> % :forms vec))) 84 | 85 | (def Form 86 | (a/alt Num Symbol Keyword Str Char List Map Vector)) 87 | 88 | (deftest edn-test 89 | (are [s] (= (a/parse! Form s) (read-string s)) 90 | "15" 91 | "xyz" 92 | "a/b" 93 | "/" 94 | ":abc" 95 | ;XXX "\\x" 96 | "\\newline" 97 | "[]" 98 | "[1]" 99 | "[1 2 3]" 100 | "[ 1 2 3 ]" 101 | "{}" 102 | "{:x 1}" 103 | "{:x 1, :y 2}" 104 | "()" 105 | "(a b c)" 106 | )) 107 | 108 | (comment 109 | (a/parse! Form "{:x 1 :y 2}") 110 | ) 111 | -------------------------------------------------------------------------------- /test/ambiparse/manual_test.clj: -------------------------------------------------------------------------------- 1 | (ns ambiparse.manual-test 2 | (:require [fipp.edn :refer [pprint]] 3 | [ambiparse :as a] 4 | [ambiparse.gll :as gll] 5 | [ambiparse-test :refer :all])) 6 | 7 | (defn party [pat s] 8 | (pprint (a/parses pat s {:fuel 100 :viz true}) 9 | {:width 160})) 10 | 11 | (comment 12 | 13 | (party \x "x") 14 | (party \y "x") 15 | (party "xy" "xy") 16 | (party "xy" "xz") 17 | (party (a/lit :x) [:x]) 18 | (party (a/lit :x) [:y]) 19 | (party (a/pred pos?) [5]) 20 | (party (a/pred pos?) [-2]) 21 | (party (a/cat) "") 22 | (party (a/cat) "x") 23 | (party (a/cat \x) "x") 24 | (party (a/cat \x \y) "x") 25 | (party (a/cat \x \y) "xy") 26 | (party (a/cat \x \y) "xz") 27 | (party (a/cat \x) "xy") 28 | (party (a/cat \x \y \z) "xy") 29 | (party (a/cat \x \y \z) "xyz") 30 | (party (a/alt) "") 31 | (party (a/alt \x) "x") 32 | (party (a/alt \x \y) "x") 33 | (party (a/alt \x \y) "y") 34 | (party (a/alt \x \y) "z") 35 | (party (a/alt (a/cat \x \y) \z) "xo") 36 | (party (a/cat (a/alt \a (a/cat (a/alt \b \x)))) "x") 37 | (party (a/cat \a (a/alt \x \y) \b \b) "axbb") 38 | (party (a/cat \a (a/? (a/cat \b \c)) \d) "abd") 39 | (party (a/* \x) "") 40 | (party (a/* \x) "x") 41 | (party (a/* \x) "xx") 42 | (party (a/* \x) "xxy") 43 | (party (a/cat (a/* \x) \y) "xxy") 44 | (party (a/+ \x) "") 45 | (party (a/+ \x) "x") 46 | (party (a/+ \x) "xx") 47 | (party (a/+ \x) "xxy") 48 | (party (a/cat (a/+ \x) \y) "xxy") 49 | (party (a/? \x) "") 50 | (party (a/? \x) "x") 51 | (party (a/? \x) "y") 52 | (party (a/cat (a/? \x) \z) "xz") 53 | (party (a/cat (a/? \x) \z) "yz") 54 | (party (a/cat (a/? \x) \y) "yz") 55 | (party (a/rule \x 1) "x") 56 | (party (a/rule \x [%]) "x") 57 | (party (a/rule \x (/ 1 0)) "x") 58 | (party (a/label :lbl \x) "x") 59 | (party (a/label :a (a/label :b \x)) "x") 60 | (party (a/prefer (constantly 0) \x) "x") 61 | (party (a/cat (a/* \x) (a/? \x)) "xx") 62 | (party (a/cat (a/greedy (a/* \x)) (a/? \x)) "xxxxx") 63 | 64 | (party a/eof "") 65 | (party (a/cat \x a/eof) "x") 66 | 67 | (party XS "xxxx") 68 | 69 | (party \x "y") 70 | (party (a/cat \x \y) "zy") 71 | (party (a/cat \x \y) "xz") 72 | (party (a/* \x) "xxxxx") 73 | (party (a/prefer (fn [t u] (throw (Exception. "whoops"))) 74 | (a/cat (a/cat \x \x) \x) 75 | (a/cat \x \x (a/cat \x))) 76 | "xxx") 77 | 78 | (party (a/filter (constantly true) \x) "x") 79 | (party (a/filter (constantly false) \x) "x") 80 | (party (a/remove (constantly true) \x) "x") 81 | (party (a/remove (constantly false) \x) "x") 82 | 83 | (party A "a") 84 | 85 | (party B "bbb") 86 | (party (a/right (a/cat B B)) "bbb") 87 | (party (a/left (a/cat B B)) "bbb") 88 | 89 | (party #'C "") 90 | 91 | (party #'D "dd") 92 | 93 | (party #'E "ee") 94 | 95 | (party L "xxx") 96 | (party R "xxx") 97 | 98 | (party T "") 99 | (party U "") 100 | (party V "") 101 | (party W "") 102 | 103 | (party (a/rule \x (a/fail! "reject")) "x") 104 | (party (a/rule \x (a/fail! "reject")) "y") 105 | 106 | (party (a/dispatch (a/alt \a \b) 107 | (case (::a/value %) 108 | \a \x 109 | \b \y)) 110 | "ax") 111 | 112 | (party (a/cat \x (a/unambiguous (a/pred pos?) (a/pred even?)) \y) 113 | [\x 5 \y]) 114 | (party (a/cat \x (a/unambiguous (a/pred pos?) (a/pred even?)) \y) 115 | [\x -1 \y]) 116 | (party (a/cat \x (a/unambiguous (a/pred pos?) (a/pred even?)) \y) 117 | [\x 4 \y]) 118 | 119 | (binding [gll/breaks [0 3 7]] 120 | (doseq [i (range 9)] 121 | (prn i (gll/pos-at i)))) 122 | 123 | ) 124 | -------------------------------------------------------------------------------- /test/ambiparse_test.clj: -------------------------------------------------------------------------------- 1 | (ns ambiparse-test 2 | (:require [clojure.test :refer :all] 3 | [ambiparse :as a] 4 | [ambiparse.util :refer :all])) 5 | 6 | ;; Non-recursive var. 7 | (def XS (a/+ \x)) 8 | 9 | ;; Simple left and right recursion. 10 | (def L (a/cat (a/? #'L) \x)) 11 | (def R (a/cat \x (a/? #'R))) 12 | 13 | ;; Various other infinitely recursive grammars. 14 | (def A (a/alt \a #'A)) 15 | (def B (a/alt \b (a/cat #'B #'B))) 16 | (def C (a/cat #'C)) 17 | (def D (a/alt (a/cat #'D #'D) \d)) 18 | 19 | ;; Infinite zero-width matches. 20 | (def T (a/* (a/cat))) 21 | (def U (a/+ (a/cat))) 22 | (def V (a/cat (a/? #'V) a/eof)) 23 | (def W (a/cat (a/? #'W) (a/cat))) 24 | 25 | (defn parse-set [pat s] 26 | (let [ps (a/parses pat s {:fuel 500})] 27 | (if (seq? ps) 28 | (set ps) 29 | #{}))) 30 | 31 | (deftest parses-test 32 | (are [pat s ts] (= (parse-set pat s) ts) 33 | 34 | \x "x" #{\x} 35 | \y "x" #{} 36 | 37 | "" "" #{""} 38 | "" "x" #{} 39 | "xy" "xy" #{"xy"} 40 | "xy" "xz" #{} 41 | 42 | (a/lit :x) [:x] #{:x} 43 | 44 | (a/pred even?) [1] #{} 45 | (a/pred even?) [2] #{2} 46 | 47 | (a/cat) "" #{[]} 48 | (a/cat) "x" #{} 49 | (a/cat \x) "x" #{[\x]} 50 | (a/cat \x) "xy" #{} 51 | (a/cat \x \y) "x" #{} 52 | (a/cat \x \y) "xy" #{[\x \y]} 53 | (a/cat \x \y \z) "xyz" #{[\x \y \z]} 54 | 55 | a/eof "" #{::a/eof} 56 | (a/cat \x a/eof) "x" #{[\x ::a/eof]} 57 | 58 | (a/alt \x \y) "x" #{\x} 59 | (a/alt \x \y) "y" #{\y} 60 | (a/alt \x \y) "z" #{} 61 | 62 | (a/* \x) "" #{[]} 63 | (a/* \x) "x" #{[\x]} 64 | (a/* \x) "xx" #{[\x \x]} 65 | 66 | (a/+ \x) "" #{} 67 | (a/+ \x) "x" #{[\x]} 68 | (a/+ \x) "xx" #{[\x \x]} 69 | 70 | (a/? \x) "" #{nil} 71 | (a/? \x) "x" #{\x} 72 | 73 | (a/cat (a/* \x) (a/? \x)) "xxx" #{[[\x \x \x] nil] [[\x \x] \x]} 74 | 75 | #'XS "xx" #{[\x \x]} 76 | 77 | L "xxx" #{[[[nil \x] \x] \x]} 78 | R "xxx" #{[\x [\x [\x nil]]]} 79 | 80 | #'A "a" #{\a} 81 | #'B "bb" #{[\b \b]} 82 | #'C "c" #{} 83 | #'D "dd" #{[\d \d]} 84 | 85 | T "" #{[] [[]]} 86 | U "" #{[[]]} 87 | V "" #{[nil ::a/eof] [[nil ::a/eof] ::a/eof]} 88 | W "" #{[nil []] [[nil []] []]} 89 | 90 | (a/prefer (constantly 0) \x) "x" #{\x} 91 | (a/cat (a/greedy (a/* \x)) (a/? \x)) "xxx" #{[[\x \x \x] nil]} 92 | ;;XXX prefer test cycle 93 | 94 | (a/cat (a/filter #(>= (a/length %) 2) 95 | (a/* \x)) 96 | (a/? \x)) 97 | "xxx" 98 | #{[[\x \x] \x] [[\x \x \x] nil]} 99 | 100 | (a/cat (a/remove #(< (a/length %) 2) 101 | (a/* \x)) 102 | (a/? \x)) 103 | "xxx" 104 | #{[[\x \x] \x] [[\x \x \x] nil]} 105 | 106 | (a/filter #(= (::a/value %) \x) \x) "x" #{\x} 107 | 108 | (a/dispatch (a/alt \a \b) 109 | (case (::a/value %) 110 | \a \x 111 | \b \y)) 112 | "ax" 113 | #{\x} 114 | 115 | (a/cat \x (a/unambiguous (a/pred pos?) (a/pred even?)) \y) 116 | [\x 5 \y] 117 | #{[\x 5 \y]} 118 | 119 | (a/cat \x (a/unambiguous (a/pred pos?) (a/pred even?)) \y) 120 | [\x -1 \y] 121 | #{} 122 | 123 | )) 124 | 125 | (defn clean-error 126 | "Simplifies errors to remove object identity and minimize test fragility." 127 | [err] 128 | (cond-> err 129 | (contains? err :exception) (update :exception #(.getMessage ^Exception %)) 130 | (contains? err :predicate) (assoc :predicate '...) 131 | (contains? err :candidates) (update :candidates 132 | #(->> % (map ::a/value) set)))) 133 | 134 | (defn clean-failure [[t fail]] 135 | [t (when fail 136 | (update fail :errors #(->> % (map clean-error) set)))]) 137 | 138 | (deftest errors-test 139 | (are [pat s i errs] 140 | (let [ret (a/parse pat s) 141 | [t {:keys [pos errors]}] (clean-failure ret)] 142 | ;(prn pos errors) 143 | (= [t (:index pos) errors] [nil i errs])) 144 | 145 | ;; Unexpected terminal. 146 | \x "y" 147 | 0 #{{:expected \x}} 148 | 149 | ;; Unexpected end of input. 150 | \x "" 151 | 0 #{{:expected \x}} 152 | 153 | ;; Unexpected string. 154 | "abcd" "abxd" 155 | 2 #{{:expected "abcd" :actual "abxd"}} 156 | 157 | ;; Short string. 158 | "xy" "x" 159 | 1 #{{:expected "xy" :actual "x"}} 160 | 161 | ;; Lit failure in non-string input. 162 | (a/lit :x) [:y] 163 | 0 #{{:expected :x}} 164 | 165 | ;; Predicate failed. 166 | (a/pred even?) [1] 167 | 0 #{{:message "Predicate failed" :predicate '... :expression 'even?}} 168 | 169 | ;; Failure in element of concatenation. 170 | (a/cat \x \y) "zy" 171 | 0 #{{:expected \x}} 172 | (a/cat \x \y) "xz" 173 | 1 #{{:expected \y}} 174 | 175 | ;; Failure in optional element of concatenation. 176 | (a/cat \a (a/? (a/cat \b \c \d)) \e) "abce" 177 | 3 #{{:expected \d}} 178 | 179 | ;; XXX 180 | ;(a/cat \a (a/? (a/cat \b \c)) \d) "abd" 181 | ;2 #{{:expected \c}} 182 | 183 | ;; Empty cat before end of input. 184 | (a/cat) "x" 185 | 0 #{{:expected ::a/eof}} 186 | 187 | ;; Cat shorter than input. 188 | (a/cat "x") "xy" 189 | 1 #{{:expected ::a/eof}} 190 | 191 | ;; Unexpected eof in cat. 192 | (a/cat \x \y) "x" 193 | 1 #{{:expected \y}} 194 | 195 | ;; Rightmost failure from alt. 196 | (a/alt (a/cat \x \y) \z) "xo" 197 | 1 #{{:expected \y}} 198 | 199 | ;; Multiple rightmost failures. 200 | (a/alt \x \y) "z" 201 | 0 #{{:expected \x} 202 | {:expected \y}} 203 | 204 | ;; Nested rightmost failures. 205 | (a/alt \x (a/alt \y \z)) "w" 206 | 0 #{{:expected \x} 207 | {:expected \y} 208 | {:expected \z}} 209 | 210 | ;; Tail zero-or-more failure. 211 | (a/* (a/cat \x \y)) "x" 212 | 1 #{{:expected \y}} 213 | 214 | ;; First one-or-more failure. 215 | (a/+ (a/cat \x \z)) "yz" 216 | 0 #{{:expected \x}} 217 | 218 | ;; Rule pattern failure. 219 | (a/rule \x \z) "y" 220 | 0 #{{:expected \x}} 221 | 222 | ;; Rule expression exception. 223 | (a/rule \x (/ 1 0)) "x" 224 | 1 #{{:exception "Divide by zero"}} 225 | 226 | ;; Explicit failure rule. 227 | (a/rule \x (a/fail! "oh noez!" {:x 123})) "x" 228 | 1 #{{:message "oh noez!" 229 | :data {:x 123}}} 230 | 231 | ;; Label pattern failure. 232 | (a/label :foo (a/+ \x)) "y" 233 | 0 #{{:expected \x}} 234 | 235 | ;; Var pattern failure. 236 | #'XS "y" 237 | 0 #{{:expected \x}} ;XXX include var. 238 | 239 | ;; Prefer compare failure. 240 | (a/prefer (fn [t u] (throw (Exception. "whoops"))) 241 | (a/cat (a/cat \x \x) \x) 242 | (a/cat \x \x (a/cat \x))) 243 | "xxx" 244 | 3 #{{:exception "whoops"}} 245 | 246 | ;; Prefer pattern failure. 247 | (a/greedy (a/+ \x)) "y" 248 | 0 #{{:expected \x}} 249 | 250 | ;XXX Check failure of cyclic prefer. 251 | 252 | ;; Filter pattern failed. 253 | (a/filter (constantly false) \x) "y" 254 | 0 #{{:expected \x}} 255 | 256 | ;; Filter predicate failed. 257 | (a/filter (constantly false) \x) "x" 258 | 0 #{{:message "Filter predicate failed" 259 | :predicate '... 260 | :expression '(constantly false) 261 | :candidates #{\x}}} 262 | 263 | ;; Remove predicate failed. 264 | (a/remove (constantly true) \x) "x" 265 | 0 #{{:message "Filter predicate failed" 266 | :predicate '... 267 | :expression '(comp not (constantly true)) 268 | :candidates #{\x}}} 269 | 270 | ;; Dispatch initial pattern failed. 271 | (a/dispatch \a \b) "xy" 272 | 0 #{{:expected \a}} 273 | 274 | ;; Dispatched pattern failed. 275 | (a/dispatch \a \b) "ay" 276 | 1 #{{:expected \b}} 277 | 278 | ;; Dispatched body throws. 279 | (a/dispatch \a (throw (Exception. "oh noez!"))) "a" 280 | 1 #{{:exception "oh noez!"}} 281 | 282 | ;; Dispatched body throws later... 283 | ;;TODO: Eliminate need to eval body again during gll/-failure. 284 | (let [a (atom 2)] 285 | (a/dispatch \a 286 | (when (zero? (swap! a dec)) 287 | (throw (Exception. "oh noez!"))))) "a" 288 | 1 #{{:exception "oh noez!"}} 289 | 290 | ;; Scoped Ambiguity. 291 | (a/cat \x 292 | (a/unambiguous (a/rule (a/pred pos?) :pos) 293 | (a/rule (a/pred even?) :even)) 294 | \y) 295 | [\x 4 \y] 296 | 1 #{{:message "Ambiguous" :candidates #{:pos :even}}} 297 | 298 | )) 299 | -------------------------------------------------------------------------------- /test/resources/stuff.edn: -------------------------------------------------------------------------------- 1 | ;; Just some random code taken from this project, then non-edn features 2 | ;; (or just featured I didn't feel like implemeting) stripped out. 3 | 4 | (defn do-cat [t [_ _ tail? :as k] pats] 5 | (if-let [[p & ps] pats] 6 | (let [i (-> t :a/end :index) 7 | d {:prefix t :continue ps} 8 | tl? (and (empty? ps) tail?)] 9 | (add-edge i p tl? k d)) 10 | (pass k (assoc t :a/structure (second k))))) 11 | --------------------------------------------------------------------------------