├── deps.edn ├── resources └── clj-kondo.exports │ └── com.github.strojure │ └── parsesso │ └── config.edn ├── .idea ├── codeStyles │ ├── codeStyleConfig.xml │ └── Project.xml ├── cursive-test-integration.xml └── ClojureProjectResolveSettings.xml ├── .gitignore ├── bb.edn ├── .clj-kondo └── config.edn ├── test ├── dev │ └── node_repl.cljc └── strojure │ └── parsesso │ ├── expr_test.cljc │ ├── char_test.cljc │ └── parser_test.cljc ├── .github └── workflows │ └── tests.yml ├── project.clj ├── UNLICENSE ├── CHANGELOG.md ├── src └── strojure │ └── parsesso │ ├── impl │ ├── parser.cljc │ ├── state.cljc │ ├── pos.cljc │ ├── char.cljc │ ├── reply.cljc │ └── error.cljc │ ├── unicode.clj │ ├── expr.cljc │ ├── char.cljc │ └── parser.cljc ├── doc ├── demo │ └── honeysql_select.clj └── benchmarks │ └── compare.clj └── README.md /deps.edn: -------------------------------------------------------------------------------- 1 | {:paths ["src"]} 2 | -------------------------------------------------------------------------------- /resources/clj-kondo.exports/com.github.strojure/parsesso/config.edn: -------------------------------------------------------------------------------- 1 | {:lint-as {strojure.parsesso.parser/for clojure.core/let}} 2 | -------------------------------------------------------------------------------- /.idea/codeStyles/codeStyleConfig.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 5 | -------------------------------------------------------------------------------- /.idea/cursive-test-integration.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 6 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | pom.xml 2 | pom.xml.asc 3 | *.jar 4 | *.class 5 | *.iml 6 | /lib/ 7 | /classes/ 8 | /target/ 9 | /checkouts/ 10 | /test/user 11 | /cljs-test-runner-out 12 | .idea/ 13 | .lein-deps-sum 14 | .lein-repl-history 15 | .lein-plugins/ 16 | .lein-failures 17 | .nrepl-port 18 | .cache/ 19 | .calva/ 20 | .cljs_node_repl/ 21 | .cpcache/ -------------------------------------------------------------------------------- /bb.edn: -------------------------------------------------------------------------------- 1 | {:deps {com.github.strojure/parsesso {:local/root "."}} 2 | :tasks 3 | {test:bb {:extra-paths ["test"] 4 | :extra-deps {com.cognitect/test-runner {:git/url "https://github.com/cognitect-labs/test-runner" 5 | :sha "a522ab2851a2aa5bf9c22a942b45287a3a019310"}} 6 | :task cognitect.test-runner/-main}}} 7 | -------------------------------------------------------------------------------- /.idea/ClojureProjectResolveSettings.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | PROJECT 7 | 8 | -------------------------------------------------------------------------------- /.clj-kondo/config.edn: -------------------------------------------------------------------------------- 1 | {:linters {:missing-docstring {:level :warning} 2 | :redundant-fn-wrapper {:level :warning} 3 | :shadowed-var {:level :warning 4 | :exclude [] 5 | :suggest {}} 6 | :unknown-require-option {:level :off} 7 | :unsorted-required-namespaces {:level :warning}} 8 | :lint-as {cljs.core/defrecord clojure.core/defrecord} 9 | :config-paths ["../resources/clj-kondo.exports/com.github.strojure/parsesso"] 10 | :config-in-comment {:linters {:redundant-expression {:level :off} 11 | :unresolved-namespace {:level :off} 12 | :unresolved-symbol {:level :off} 13 | :duplicate-require {:level :off}}}} 14 | -------------------------------------------------------------------------------- /test/dev/node_repl.cljc: -------------------------------------------------------------------------------- 1 | (ns dev.node-repl 2 | "ClojureScript Node REPL." 3 | (:require [cljs.repl :as repl] 4 | [cljs.repl.node :as node])) 5 | 6 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 7 | 8 | (defn- start 9 | [] 10 | (-> (node/repl-env) 11 | (repl/repl :quit-prompt (fn [] 12 | (repl/repl-title) 13 | (repl/repl-quit-prompt))))) 14 | 15 | (def -main 16 | "Main entry point." 17 | start) 18 | 19 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 20 | 21 | (comment 22 | (start) 23 | :cljs/quit 24 | ) 25 | 26 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 27 | -------------------------------------------------------------------------------- /.idea/codeStyles/Project.xml: -------------------------------------------------------------------------------- 1 | 2 | 3 | 23 | -------------------------------------------------------------------------------- /.github/workflows/tests.yml: -------------------------------------------------------------------------------- 1 | name: tests 2 | 3 | on: 4 | workflow_dispatch: { } 5 | push: 6 | branches: [ default ] 7 | paths: [ "src/**", "test/**", "project.clj", "*.edn" ] 8 | pull_request: 9 | paths: [ "src/**", "test/**", "project.clj", "*.edn" ] 10 | 11 | jobs: 12 | tests: 13 | runs-on: ubuntu-latest 14 | steps: 15 | - name: Checkout 16 | uses: actions/checkout@v3 17 | 18 | - name: Prepare java 19 | uses: actions/setup-java@v3 20 | with: 21 | distribution: 'zulu' 22 | java-version: '11' 23 | 24 | - name: Install clojure tools 25 | uses: DeLaGuardo/setup-clojure@10.1 26 | with: 27 | bb: latest 28 | lein: latest 29 | 30 | - name: Cache clojure dependencies 31 | uses: actions/cache@v3 32 | with: 33 | path: | 34 | ~/.m2/repository 35 | ~/.gitlibs 36 | ~/.deps.clj 37 | key: cljdeps-${{ hashFiles('project.clj', 'bb.edn') }} 38 | restore-keys: cljdeps- 39 | 40 | - run: lein deps 41 | 42 | - run: lein test 43 | 44 | - run: lein cljs-test 45 | 46 | - run: bb test:bb 47 | -------------------------------------------------------------------------------- /project.clj: -------------------------------------------------------------------------------- 1 | (defproject com.github.strojure/parsesso "1.2.3-SNAPSHOT" 2 | :description "Parser combinators library for Clojure(Script)." 3 | :url "https://github.com/strojure/parsesso" 4 | :license {:name "The Unlicense" :url "https://unlicense.org"} 5 | 6 | :dependencies [] 7 | 8 | :profiles {:provided {:dependencies [[org.clojure/clojure "1.11.1"] 9 | [org.clojure/clojurescript "1.11.60"]]} 10 | :dev,,,,, {:dependencies [;; clojurescript tests 11 | [com.google.guava/guava "31.1-jre"] 12 | [olical/cljs-test-runner "3.8.0"] 13 | ;; inspiration libs 14 | [org.blancas/kern "1.1.0"] 15 | [rm-hull/jasentaa "0.2.5"] 16 | [the/parsatron "0.0.8"]] 17 | :source-paths ["doc"]}} 18 | 19 | :aliases {"cljs-test" ["run" "-m" "cljs-test-runner.main"]} 20 | 21 | :clean-targets ["target" "cljs-test-runner-out"] 22 | 23 | :deploy-repositories [["clojars" {:url "https://clojars.org/repo" :sign-releases false}]]) 24 | -------------------------------------------------------------------------------- /UNLICENSE: -------------------------------------------------------------------------------- 1 | This is free and unencumbered software released into the public domain. 2 | 3 | Anyone is free to copy, modify, publish, use, compile, sell, or 4 | distribute this software, either in source code form or as a compiled 5 | binary, for any purpose, commercial or non-commercial, and by any 6 | means. 7 | 8 | In jurisdictions that recognize copyright laws, the author or authors 9 | of this software dedicate any and all copyright interest in the 10 | software to the public domain. We make this dedication for the benefit 11 | of the public at large and to the detriment of our heirs and 12 | successors. We intend this dedication to be an overt act of 13 | relinquishment in perpetuity of all present and future rights to this 14 | software under copyright law. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR 20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR 22 | OTHER DEALINGS IN THE SOFTWARE. 23 | 24 | For more information, please refer to 25 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | All notable changes to this project will be documented in this file. 4 | 5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/). 6 | 7 | ## `1.2.3-SNAPSHOT` 8 | 9 | Release date `UNRELEASED` 10 | 11 | ## `1.2.2+295` 12 | 13 | Release date `2023-06-06` 14 | 15 | - (fix): allow destructuring in `p/for` [#8] 16 | 17 | [#8]: https://github.com/strojure/parsesso/issues/8 18 | 19 | ## `1.2.1+292` 20 | 21 | Release date `2023-05-28` 22 | 23 | - (docs): fix :arglists of `parser/parse` [#7] 24 | 25 | [#7]: https://github.com/strojure/parsesso/issues/7 26 | 27 | ## `1.2.0+287` 28 | 29 | Release date `2023-05-25` 30 | 31 | - (feat pos): allow to specify initial line/col for :text pos 32 | - (fix): cannot pass custom `InputPos` [#6] 33 | 34 | [#6]: https://github.com/strojure/parsesso/issues/6 35 | 36 | ## `1.1.2-283` 37 | 38 | Release date `2023-05-17` 39 | 40 | - (fix): `expecting` adds a message instead of replacing [#5] 41 | 42 | [#5]: https://github.com/strojure/parsesso/issues/5 43 | 44 | ## `1.1.1-274` 45 | 46 | Release date `2023-03-08` 47 | 48 | - (chore project) Implement `cljs-test` lein alias. 49 | - (fix cljs) `parser/update-state` for nil :input. 50 | - (chore) Change license to Unlicense. 51 | 52 | ## `1.1.0-258` 53 | 54 | Release date `2023-03-04` 55 | 56 | - feat: Make code compatible with `bb` and other platforms. 57 | - build: Add CI config to run lein test + bb test:bb. 58 | 59 | ## `1.0.253` 60 | 61 | Release date `2023-03-04` 62 | -------------------------------------------------------------------------------- /src/strojure/parsesso/impl/parser.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.impl.parser 2 | {:no-doc true} 3 | (:require [strojure.parsesso.impl.reply :as r])) 4 | 5 | #?(:clj (set! *warn-on-reflection* true) 6 | :cljs (set! *warn-on-infer* true)) 7 | 8 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 9 | 10 | (deftype Continue [f]) 11 | 12 | (defn go 13 | "Returns continuation for the parser `p`." 14 | [p state context] 15 | (Continue. (fn [] (p state context)))) 16 | 17 | (defn run 18 | "Executes parser `p` in continuation loop." 19 | [p state] 20 | (loop [ret (go p state (r/new-context))] 21 | (if (instance? Continue ret) 22 | (recur ((.-f ^Continue ret))) 23 | ret))) 24 | 25 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 26 | 27 | (defn e-ok-throw-empty-input 28 | "Throws exception in `many` combinator." 29 | [_ _] 30 | (throw (ex-info (str "Combinator is applied to a parser that accepts an empty input.") {}))) 31 | 32 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 33 | 34 | (def ^:private word-test-fn! 35 | (atom {})) 36 | 37 | (defn register-word-test-fn 38 | "Associates keyword `k` with test-fn of the [[word]] parser." 39 | [k, f] 40 | (assert (keyword k) "Requires keyword as word test-fn ID") 41 | (swap! word-test-fn! assoc k f)) 42 | 43 | (defn word-test-fn 44 | "Returns registered test-fn for the keyword `k`." 45 | [k] 46 | (or (@word-test-fn! k) 47 | (throw (ex-info (str "The word test-fn is not registered:" k) {})))) 48 | 49 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 50 | -------------------------------------------------------------------------------- /src/strojure/parsesso/impl/state.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.impl.state 2 | {:no-doc true} 3 | (:require [strojure.parsesso.impl.pos :as pos]) 4 | #?(:clj (:import (clojure.lang ISeq)))) 5 | 6 | #?(:clj (set! *warn-on-reflection* true) 7 | :cljs (set! *warn-on-infer* true)) 8 | 9 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 10 | 11 | (defrecord State [input pos user]) 12 | 13 | (defn conform-input 14 | "Return non-nil input." 15 | [input] 16 | (or (seq input) ())) 17 | 18 | (defn init-state 19 | "Returns new instance of parser state." 20 | [input pos user] 21 | (State. (conform-input input) pos user)) 22 | 23 | (defn next-state 24 | "Returns next (incremented) instance of parser state for parsed token `tok`." 25 | ([^State state, tok] 26 | (State. (#?(:bb rest :clj .more :cljs -rest :default rest) ^ISeq (.-input state)) 27 | (pos/next-pos (.-pos state) tok) 28 | (.-user state))) 29 | ([^State state, tok, user-fn] 30 | (State. (#?(:bb rest :clj .more :cljs -rest :default rest) ^ISeq (.-input state)) 31 | (pos/next-pos (.-pos state) tok) 32 | (user-fn (.-user state))))) 33 | 34 | (defn set-input-pos 35 | "Returns instance of parser state with new values of input and pos." 36 | [^State state, input, pos] 37 | (State. (conform-input input) pos (.-user state))) 38 | 39 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 40 | 41 | (defn input 42 | "Returns parsing state input." 43 | [state] 44 | (.-input ^State state)) 45 | 46 | (defn pos 47 | "Returns parsing state position." 48 | [state] 49 | (.-pos ^State state)) 50 | 51 | (defn user 52 | "Returns user state." 53 | [state] 54 | (.-user ^State state)) 55 | 56 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 57 | -------------------------------------------------------------------------------- /src/strojure/parsesso/unicode.clj: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.unicode 2 | "Unicode char parsers using `java.lang.Character`. Clojure only." 3 | (:require [strojure.parsesso.parser :as p])) 4 | 5 | (set! *warn-on-reflection* true) 6 | 7 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 8 | 9 | (def lower? 10 | "Parser and predicate for the lower-case letter character according to 11 | `Character/isLowerCase`." 12 | (p/token #(Character/isLowerCase ^char %) 13 | "lower-case letter")) 14 | 15 | (def upper? 16 | "Parser and predicate for the upper-case letter character according to 17 | `Character/isUpperCase`." 18 | (p/token #(Character/isUpperCase ^char %) 19 | "upper-case letter")) 20 | 21 | (def title? 22 | "Parser and predicate for the title-case letter character according to 23 | `Character/isTitleCase`." 24 | (p/token #(Character/isTitleCase ^char %) 25 | "title-case letter")) 26 | 27 | (def digit? 28 | "Parser and predicate for the digit character according to 29 | `Character/isDigit`." 30 | (p/token #(Character/isDigit ^char %) 31 | "digit")) 32 | 33 | (def defined? 34 | "Parser and predicate for the character defined in Unicode, according to 35 | `Character/isDefined`." 36 | (p/token #(Character/isDefined ^char %) 37 | "unicode defined character")) 38 | 39 | (def letter? 40 | "Parser and predicate for the letter character according to 41 | `Character/isLetter`." 42 | (p/token #(Character/isLetter ^char %) 43 | "letter")) 44 | 45 | (def letter-or-digit? 46 | "Parser and predicate for the letter or digit character according to 47 | `Character/isLetterOrDigit`." 48 | (p/token #(Character/isLetterOrDigit ^char %) 49 | "letter or digit")) 50 | 51 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 52 | 53 | (def space? 54 | "Parser and predicate for the Unicode space character according to 55 | `Character/isSpaceChar`." 56 | (p/token #(Character/isSpaceChar ^char %) 57 | "space character")) 58 | 59 | (def white? 60 | "Parser and predicate for the white space character according to 61 | `Character/isWhitespace`." 62 | (p/token #(Character/isWhitespace ^char %) 63 | "whitespace character")) 64 | 65 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 66 | -------------------------------------------------------------------------------- /src/strojure/parsesso/expr.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.expr 2 | "Parser combinators for expressions." 3 | (:require [strojure.parsesso.parser :as p])) 4 | 5 | #?(:clj (set! *warn-on-reflection* true) 6 | :cljs (set! *warn-on-infer* true)) 7 | 8 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 9 | 10 | (defn +chain-left 11 | "Parses _one_ or more occurrences of `p`, separated by `op`. Returns a value 12 | obtained by a _left_ associative application of all functions returned by `op` 13 | to the values returned by `p`. This parser can for example be used to 14 | eliminate left recursion which typically occurs in expression grammars. 15 | 16 | (def mulop (p/alt (p/after (char/is \\*) (p/result *)) 17 | (p/after (char/is \\/) (p/result /)))) 18 | 19 | (def addop (p/alt (p/after (char/is \\+) (p/result +)) 20 | (p/after (char/is \\-) (p/result -)))) 21 | 22 | (def expr (+chain-left term addop)) 23 | (def term (+chain-left factor mulop)) 24 | (def factor (p/alt (parens expr) integer)) 25 | " 26 | [p op] 27 | (letfn [(more [x] 28 | (p/alt (p/for [f op, y p] 29 | (more (f x y))) 30 | (p/result x)))] 31 | (p/for [x p] 32 | (more x)))) 33 | 34 | (defn *chain-left 35 | "Parses _zero_ or more occurrences of `p`, separated by `op`. Returns a value 36 | obtained by a _left_ associative application of all functions returned by `op` 37 | to the values returned by `p`. If there are zero occurrences of `p`, the value 38 | `x` is returned." 39 | [p op x] 40 | (p/option (+chain-left p op) x)) 41 | 42 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 43 | 44 | (defn +chain-right 45 | "Parses _one_ or more occurrences of `p`, separated by `op`. Returns a value 46 | obtained by a _right_ associative application of all functions returned by 47 | `op` to the values returned by `p`." 48 | [p op] 49 | (letfn [(scan [] 50 | (p/for [x p] 51 | (more x))) 52 | (more [x] 53 | (p/alt (p/for [f op, y (scan)] 54 | (p/result (f x y))) 55 | (p/result x)))] 56 | (scan))) 57 | 58 | (defn *chain-right 59 | "Parses _zero_ or more occurrences of `p`, separated by `op`. Returns a value 60 | obtained by a _right_ associative application of all functions returned by 61 | `op` to the values returned by `p`. If there are no occurrences of `p`, the 62 | value `x` is returned." 63 | [p op x] 64 | (p/option (+chain-right p op) x)) 65 | 66 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 67 | -------------------------------------------------------------------------------- /src/strojure/parsesso/impl/pos.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.impl.pos 2 | {:no-doc true}) 3 | 4 | #?(:clj (set! *warn-on-reflection* true) 5 | :cljs (set! *warn-on-infer* true)) 6 | 7 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 8 | 9 | (defprotocol InputPos 10 | (next-pos [pos token] 11 | "Returns new source pos for the current token.") 12 | (compare-pos [pos1 pos2] 13 | "Comparator. Returns -1/0/1 like `compare`.")) 14 | 15 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 16 | 17 | (defmulti init-pos 18 | "Returns initial InputPos for given options and input." 19 | (fn [opts _input] (:pos opts))) 20 | 21 | (defmethod init-pos :default 22 | [{:keys [pos]} _] 23 | (when (keyword? pos) 24 | (throw (ex-info (str "Cannot init input position for: " pos) {}))) 25 | pos) 26 | 27 | (defmethod init-pos nil 28 | [opts input] 29 | ;; Tries to detect text input and use text pos. 30 | (let [f (get-method init-pos (if (or (string? input) (char? (first input))) 31 | :text :sequence))] 32 | (f opts input))) 33 | 34 | (defmethod init-pos :disabled 35 | [_ _] 36 | nil) 37 | 38 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 39 | 40 | (extend-protocol InputPos 41 | nil 42 | (next-pos [_ _]) 43 | (compare-pos [_ _] 0) 44 | #?(:clj Number :cljs number) 45 | (next-pos [pos _] (inc pos)) 46 | (compare-pos [a b] (compare a b))) 47 | 48 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 49 | 50 | (defrecord IndexPos [^long i] 51 | InputPos 52 | (next-pos [_ _] (IndexPos. (unchecked-inc i))) 53 | (compare-pos [_ pos] (compare i (:i pos))) 54 | Object 55 | (toString [_] (str "index " i))) 56 | 57 | (defmethod init-pos :sequence 58 | [_ _] 59 | (IndexPos. 0)) 60 | 61 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 62 | 63 | (defn- compare* 64 | [x y] 65 | (let [c (compare x y)] 66 | (when-not (zero? c) 67 | c))) 68 | 69 | (defrecord TextPos [tab, ^long line, ^long col] 70 | InputPos 71 | (next-pos [pos c] 72 | (case c \tab 73 | (update pos :col #(-> % (+ tab) (- (mod (dec %) tab)))) 74 | \newline 75 | (TextPos. tab (unchecked-inc line) 1) 76 | ;; default 77 | (TextPos. tab line (unchecked-inc col)))) 78 | (compare-pos [_ pos] 79 | (or (compare* line (:line pos)) 80 | (compare* col (:col pos)) 81 | 0)) 82 | Object 83 | (toString [_] (str "line " line ", column " col))) 84 | 85 | (defmethod init-pos :text 86 | [opts _] 87 | (TextPos. (or (:tab opts) 8) (or (:line opts) 1) (or (:col opts) 1))) 88 | 89 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 90 | -------------------------------------------------------------------------------- /src/strojure/parsesso/impl/char.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.impl.char 2 | {:no-doc true} 3 | (:require [clojure.string :as string]) 4 | #?(:cljs (:import [goog.string StringBuffer]))) 5 | 6 | #?(:clj (set! *warn-on-reflection* true) 7 | :cljs (set! *warn-on-infer* true)) 8 | 9 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 10 | 11 | (defn equals-ignorecase 12 | "True if chars are equal, case insensitive. " 13 | [c1 c2] 14 | (or (= c1 c2) 15 | #?(:bb 16 | (= (string/lower-case c1) 17 | (string/lower-case c2)) 18 | :clj 19 | (.equals ^Object (Character/toLowerCase ^char c1) 20 | (Character/toLowerCase ^char c2)) 21 | :default 22 | (= (string/lower-case c1) 23 | (string/lower-case c2))))) 24 | 25 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 26 | 27 | (def ^:private string-pred-fn! 28 | (atom {})) 29 | 30 | (defn register-string-pred-fn 31 | "Associates keyword `k` with predicate function of the `is` and `is-not` 32 | parsers." 33 | [k, f] 34 | (assert (keyword k) "Requires keyword as `is` test-fn ID") 35 | (swap! string-pred-fn! assoc k f)) 36 | 37 | (defn string-pred-fn 38 | "Returns predicate for the keyword `k` and string of characters `s`." 39 | [k s] 40 | (if-let [f (@string-pred-fn! k)] 41 | (f s) 42 | (throw (ex-info (str "The `is` predicate function is not registered:" k) {})))) 43 | 44 | (defn string-pred-default 45 | "Default predicate for `is` and `is-not` parsers." 46 | [s] 47 | #?(:bb 48 | (fn [c] (string/index-of s c)) 49 | :clj 50 | (if (char? s) 51 | (fn [c] (.equals ^Character s c)) 52 | (fn [c] (<= 0 (.indexOf ^String s ^int (.charValue ^Character c))))) 53 | :default 54 | (fn [c] (string/index-of s c)))) 55 | 56 | (defn string-pred-ignorecase 57 | "Default predicate for `is` and `is-not` parsers." 58 | [s] 59 | (let [s (string/lower-case s)] 60 | (fn [c] #?(:bb 61 | (string/index-of s (string/lower-case c)) 62 | :clj 63 | (<= 0 (.indexOf ^String s ^int (Character/toLowerCase ^char c))) 64 | :default 65 | (string/index-of s (string/lower-case c)))))) 66 | 67 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 68 | 69 | (defn str* 70 | "Builds string from (possibly nested) collections of parsed characters and 71 | strings." 72 | ([x] (-> #?(:clj (StringBuilder.) :cljs (StringBuffer.)) 73 | (str* x) 74 | (str))) 75 | ([sb x] 76 | (if (sequential? x) 77 | (reduce str* sb x) 78 | #?(:clj (.append ^StringBuilder sb (str x)) 79 | :cljs (.append ^StringBuffer sb (str x)))))) 80 | 81 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 82 | -------------------------------------------------------------------------------- /src/strojure/parsesso/impl/reply.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.impl.reply 2 | {:no-doc true}) 3 | 4 | #?(:clj (set! *warn-on-reflection* true) 5 | :cljs (set! *warn-on-infer* true)) 6 | 7 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 8 | 9 | (deftype Context [cok, eok, cerr, eerr]) 10 | 11 | (defn c-ok 12 | "Replies with result value as consumed (consumed-ok)." 13 | [^Context context, state, x] 14 | ((.-cok context) state x)) 15 | 16 | (defn e-ok 17 | "Replies with result value as not consumed (empty-ok)." 18 | [^Context context, state, x] 19 | ((.-eok context) state x)) 20 | 21 | (defn c-err 22 | "Fails with parser error as consumed (consumed-error)." 23 | [^Context context, error] 24 | ((.-cerr context) error)) 25 | 26 | (defn e-err 27 | "Fails with parser error as not consumed (empty-error)." 28 | [^Context context, error] 29 | ((.-eerr context) error)) 30 | 31 | (defn assign* 32 | "Returns new instance of context with replaced functions, nil arg keep 33 | functions untouched. To be used with macro." 34 | [^Context context, -c-ok, -e-ok, -c-err, -e-err] 35 | (Context. (or -c-ok (.-cok context)) 36 | (or -e-ok (.-eok context)) 37 | (or -c-err (.-cerr context)) 38 | (or -e-err (.-eerr context)))) 39 | 40 | (defmacro assign 41 | "Expands to code updating specified context functions at once." 42 | [context m] 43 | (assert (map? m)) 44 | (let [m (update-keys m (comp eval eval))] 45 | (assert (every? #{c-ok e-ok c-err e-err} (keys m))) 46 | `(assign* ~context ~(m c-ok) ~(m e-ok) ~(m c-err) ~(m e-err)))) 47 | 48 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 49 | 50 | (defrecord Result [value consumed state]) 51 | 52 | (defrecord Failure [consumed error]) 53 | 54 | (defn result? 55 | "True if `reply` is parsing result with value." 56 | {:inline (fn [reply] `(instance? Result ~reply))} 57 | [reply] 58 | (instance? Result reply)) 59 | 60 | (defn error? 61 | "True if `reply` is parser error." 62 | {:inline (fn [reply] `(instance? Failure ~reply))} 63 | [reply] 64 | (instance? Failure reply)) 65 | 66 | (defn value 67 | "Returns value for Result reply or throws exception otherwise." 68 | [reply] 69 | (cond 70 | (result? reply) (:value reply) 71 | (error? reply) (throw (ex-info (str (:error reply)) reply)) 72 | :else (throw (ex-info "Invalid parser reply" {::reply reply})))) 73 | 74 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 75 | 76 | (defn new-context 77 | "Returns new instance of context with initialized reply functions." 78 | [] 79 | (Context. (fn c-ok [s x] (Result. x true s)) 80 | (fn e-ok [s x] (Result. x false s)) 81 | (fn c-err [e] (Failure. true e)) 82 | (fn e-err [e] (Failure. false e)))) 83 | 84 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 85 | -------------------------------------------------------------------------------- /doc/demo/honeysql_select.clj: -------------------------------------------------------------------------------- 1 | (ns demo.honeysql-select 2 | "Demo: Parse SQL SELECT query to HoneySQL data structures." 3 | {:clj-kondo/config '{:linters {:missing-docstring {:level :off}}}} 4 | (:require [strojure.parsesso.char :as char] 5 | [strojure.parsesso.parser :as p])) 6 | 7 | (set! *warn-on-reflection* true) 8 | 9 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 10 | 11 | (comment 12 | "SELECT u.username, s.name FROM user AS u, status AS s WHERE (u.statusid = s.id) AND (u.id = ?)" 13 | 14 | {:select [:u.username :s.name] 15 | :from [[:user :u] [:status :s]] 16 | :where [:and [:= :u.statusid :s.id] 17 | [:= :u.id 9]]} 18 | 19 | "SELECT username, name FROM user, status WHERE (user.statusid = status.id) AND (user.id = ?)" 20 | 21 | {:select [:username :name] 22 | :from [:user :status] 23 | :where [:and [:= :user.statusid :status.id] 24 | [:= :user.id 9]]}) 25 | 26 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 27 | 28 | (def *space (p/*skip char/white?)) 29 | 30 | (def +space (p/+skip char/white?)) 31 | 32 | (defn comma-sep 33 | "Parses `p` separated by commas." 34 | [p] 35 | (p/+sep-by p (p/maybe (-> (char/is \,) 36 | (p/between *space))))) 37 | 38 | (def table-name 39 | "Parses table name as `:table`." 40 | (-> (p/+many char/letter?) 41 | (p/value char/str* keyword))) 42 | 43 | (def column-name 44 | "Parses column as `:column` or `:table.column`." 45 | (-> (p/group (p/option (p/maybe (p/group (p/+many char/letter?) 46 | (char/is \.)))) 47 | (p/+many char/letter?)) 48 | (p/value char/str* keyword))) 49 | 50 | (comment 51 | (p/parse column-name "username") #_=> :username 52 | (p/parse column-name "u.username") #_=> :u.username 53 | (p/parse column-name "u.u.username") #_=> :u.u 54 | ) 55 | 56 | (def as-expr 57 | "Parses alias keyword like `:alias` after AS." 58 | (p/after (p/maybe (-> (p/word "as" :ic) (p/between +space))) 59 | (-> (p/+many char/letter?) 60 | (p/value char/str* keyword)))) 61 | 62 | (comment 63 | (p/parse as-expr " AS name") #_=> :name 64 | ) 65 | 66 | (defn with-as 67 | "Parses `p` with optional alias like `:name` or `[:name :alias]`." 68 | [p] 69 | (-> (p/group p (p/option as-expr)) 70 | (p/value (fn [[x as]] (if as [x as] x))))) 71 | 72 | (comment 73 | (p/parse (with-as column-name) "u.username") #_=> :u.username 74 | (p/parse (with-as column-name) "u.username AS name") #_=> [:u.username :name] 75 | ) 76 | 77 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 78 | 79 | (def select-statement 80 | "Parses SQL SELECT statement to `{:select [...] :from [...] ...}`." 81 | (p/for [_ (p/maybe (p/after (p/word "select" :ic) +space)) 82 | select (comma-sep (with-as column-name)) 83 | _ (-> (p/word "from" :ic) (p/between +space)) 84 | from (comma-sep (with-as table-name))] 85 | (p/result 86 | {:select (vec select) 87 | :from (vec from)}))) 88 | 89 | (comment 90 | (def -q "SELECT username, u.name AS x FROM user AS u, status") 91 | (p/parse select-statement -q) 92 | #_=> {:select [:username [:u.name :x]], 93 | :from [[:user :u] :status]} 94 | ) 95 | 96 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 97 | -------------------------------------------------------------------------------- /src/strojure/parsesso/char.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.char 2 | "Basic function for parsing sequences of characters." 3 | (:refer-clojure :exclude [newline number?]) 4 | (:require #?@(:bb [[clojure.string :as string]] :clj [] :default [[clojure.string :as string]]) 5 | [strojure.parsesso.impl.char :as impl] 6 | [strojure.parsesso.parser :as p])) 7 | 8 | #?(:clj (set! *warn-on-reflection* true) 9 | :cljs (set! *warn-on-infer* true)) 10 | 11 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 12 | 13 | (defn register-string-pred 14 | "Associates keyword `k` with predicate function of the [[is]] and 15 | [[is-not]] parsers." 16 | [k, f] 17 | (impl/register-string-pred-fn k f)) 18 | 19 | (register-string-pred :default impl/string-pred-default) 20 | (register-string-pred :ic impl/string-pred-ignorecase) 21 | 22 | (defn is 23 | "Returns parser and predicate for the character `c` which is in the supplied 24 | string of characters `s` (or a single character). The optional `pred-k` 25 | keyword refers to function `(fn [pred-k s] (fn [c] ...))` which returns custom 26 | predicate for chars against `s`. The new `pred-k` should be registered using 27 | [[register-string-pred]], predefined values are `:default` for default and 28 | `:ic` for case-insensitive matching. 29 | 30 | (def control-char (char/is \"EX\")) 31 | 32 | (def control-char-ignorecase (char/is \"ex\" :ic)) 33 | " 34 | ([s] 35 | (p/token (impl/string-pred-default s) 36 | (delay (str (p/render s) " character")))) 37 | ([s, pred-k] 38 | (p/token (impl/string-pred-fn pred-k s) 39 | (delay (str (p/render s) " character"))))) 40 | 41 | (defn is-not 42 | "Returns parser and predicate for the character `c` which is _not_ in the 43 | supplied string of characters `s` (or a single character). See also [[is]] 44 | about optional `pred-k` argument." 45 | ([s] 46 | (p/token (complement (is s)) 47 | (delay (str "not " (p/render s) " character")))) 48 | ([s, pred-k] 49 | (p/token (complement (is s pred-k)) 50 | (delay (str "not " (p/render s) " character"))))) 51 | 52 | (defn regex 53 | "Returns parser and predicate for the character `c` matching regex pattern 54 | `re`." 55 | [re] 56 | (p/token (fn [c] (re-find re (str c))) 57 | (delay (str "character matching regex " (p/render re))))) 58 | 59 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 60 | 61 | (def upper? 62 | "Parser and predicate for ASCII 7 bit upper-case letter character." 63 | (p/token (fn [c] #?(:bb 64 | (re-find #"[A-Z]" (str c)) 65 | :clj 66 | (let [c (unchecked-int (.charValue ^Character c))] 67 | (and (<= 65 c) (<= c 90))) 68 | :default 69 | (re-find #"[A-Z]" (str c)))) 70 | "upper-case ascii letter")) 71 | 72 | (def lower? 73 | "Parser and predicate for ASCII 7 bit lower-case letter character." 74 | (p/token (fn [c] #?(:bb 75 | (re-find #"[a-z]" (str c)) 76 | :clj 77 | (let [c (unchecked-int (.charValue ^Character c))] 78 | (and (<= 97 c) (<= c 122))) 79 | :default 80 | (re-find #"[a-z]" (str c)))) 81 | "lower-case ascii letter")) 82 | 83 | (def letter? 84 | "Parser and predicate for ASCII 7 bit letter character." 85 | (p/token (fn [c] #?(:bb 86 | (re-find #"[a-zA-Z]" (str c)) 87 | :clj 88 | (or (upper? c) (lower? c)) 89 | :default 90 | (re-find #"[a-zA-Z]" (str c)))) 91 | "ascii letter")) 92 | 93 | (def number? 94 | "Parser and predicate for ASCII 7 bit number character." 95 | (p/token (fn [c] #?(:bb 96 | (re-find #"[0-9]" (str c)) 97 | :clj 98 | (let [c (unchecked-int (.charValue ^Character c))] 99 | (and (<= 48 c) (<= c 57))) 100 | :default 101 | (re-find #"[0-9]" (str c)))) 102 | "ascii number")) 103 | 104 | (def letter-or-number? 105 | "Parser and predicate for ASCII 7 bit letter or number character." 106 | (p/token (fn [c] #?(:bb 107 | (re-find #"[a-zA-Z0-9]" (str c)) 108 | :clj 109 | (or (letter? c) (number? c)) 110 | :default 111 | (re-find #"[a-zA-Z0-9]" (str c)))) 112 | "ascii letter or number")) 113 | 114 | (def white? 115 | "Parser and predicate for ASCII 7 bit whitespace character." 116 | (p/token (fn [c] #?(:bb 117 | (string/index-of " \n\r\t\f" c) 118 | :clj 119 | (Character/isSpace c) 120 | :default 121 | (string/index-of " \n\r\t\f" c))) 122 | "whitespace character")) 123 | 124 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 125 | 126 | (def newline 127 | "Parses a CRLF or LF end of line. Returns a `\\newline` character." 128 | (p/alt (is \newline) 129 | (p/after (is \return) (is \newline)))) 130 | 131 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 132 | 133 | (defn str* 134 | "Builds string from (possibly nested) collections of parsed characters and 135 | strings. To be used with [[strojure.parsesso.parser/value]]." 136 | [x] 137 | (impl/str* x)) 138 | 139 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 140 | -------------------------------------------------------------------------------- /test/strojure/parsesso/expr_test.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.expr-test 2 | (:require [clojure.string :as string] 3 | [clojure.test :as test :refer [deftest]] 4 | [strojure.parsesso.expr :as expr] 5 | [strojure.parsesso.parser :as p])) 6 | 7 | #_(test/run-tests) 8 | 9 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 10 | 11 | (defn- p 12 | "Parses test input using given parser. Returns custom map with test result." 13 | [parser input] 14 | (let [result (p/parse* parser input)] 15 | (if-let [error (:error result)] 16 | (-> (select-keys result [:consumed]) 17 | (assoc :error (-> (str error) (string/split-lines)))) 18 | (select-keys result [:consumed :value])))) 19 | 20 | (defn- tok 21 | [& cs] 22 | (p/token (set cs))) 23 | 24 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 25 | 26 | (deftest +chain-left-t 27 | (test/are [expr result] (= result expr) 28 | 29 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9) 30 | (tok + - * /)) 31 | [8 - 2 / 2]) 32 | {:consumed true, :value 3} 33 | 34 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9) 35 | (tok + - * /)) 36 | [8 - 2 2]) 37 | {:consumed true, :value 6} 38 | 39 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9) 40 | (tok + - * /)) 41 | [1]) 42 | {:consumed true, :value 1} 43 | 44 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9) 45 | (tok + - * /)) 46 | [+]) 47 | {:consumed false, :error ["error at index 0:" 48 | (str "unexpected " (p/render +))]} 49 | 50 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9) 51 | (tok + - * /)) 52 | [0]) 53 | {:consumed false, :error ["error at index 0:" 54 | "unexpected 0"]} 55 | 56 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9) 57 | (tok + - * /)) 58 | []) 59 | {:consumed false, :error ["error at index 0:" 60 | "unexpected end of input"]} 61 | 62 | )) 63 | 64 | (deftest *chain-left-t 65 | (test/are [expr result] (= result expr) 66 | 67 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9) 68 | (tok + - * /) 69 | 0) 70 | [8 - 2 / 2]) 71 | {:consumed true, :value 3} 72 | 73 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9) 74 | (tok + - * /) 75 | 0) 76 | [8 - 2 2]) 77 | {:consumed true, :value 6} 78 | 79 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9) 80 | (tok + - * /) 81 | 0) 82 | [1]) 83 | {:consumed true, :value 1} 84 | 85 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9) 86 | (tok + - * /) 87 | 0) 88 | [+]) 89 | {:consumed false, :value 0} 90 | 91 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9) 92 | (tok + - * /) 93 | 0) 94 | [0]) 95 | {:consumed false, :value 0} 96 | 97 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9) 98 | (tok + - * /) 99 | 0) 100 | []) 101 | {:consumed false, :value 0} 102 | 103 | )) 104 | 105 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 106 | 107 | (deftest +chain-right-t 108 | (test/are [expr result] (= result expr) 109 | 110 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9) 111 | (tok + - * /)) 112 | [8 - 2 / 2]) 113 | {:consumed true, :value 7} 114 | 115 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9) 116 | (tok + - * /)) 117 | [8 - 2 2]) 118 | {:consumed true, :value 6} 119 | 120 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9) 121 | (tok + - * /)) 122 | [1]) 123 | {:consumed true, :value 1} 124 | 125 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9) 126 | (tok + - * /)) 127 | [+]) 128 | {:consumed false, :error ["error at index 0:" 129 | (str "unexpected " (p/render +))]} 130 | 131 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9) 132 | (tok + - * /)) 133 | [0]) 134 | {:consumed false, :error ["error at index 0:" 135 | "unexpected 0"]} 136 | 137 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9) 138 | (tok + - * /)) 139 | []) 140 | {:consumed false, :error ["error at index 0:" 141 | "unexpected end of input"]} 142 | 143 | )) 144 | 145 | (deftest *chain-right-t 146 | (test/are [expr result] (= result expr) 147 | 148 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9) 149 | (tok + - * /) 150 | 0) 151 | [8 - 2 / 2]) 152 | {:consumed true, :value 7} 153 | 154 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9) 155 | (tok + - * /) 156 | 0) 157 | [8 - 2 2]) 158 | {:consumed true, :value 6} 159 | 160 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9) 161 | (tok + - * /) 162 | 0) 163 | [1]) 164 | {:consumed true, :value 1} 165 | 166 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9) 167 | (tok + - * /) 168 | 0) 169 | [+]) 170 | {:consumed false, :value 0} 171 | 172 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9) 173 | (tok + - * /) 174 | 0) 175 | [0]) 176 | {:consumed false, :value 0} 177 | 178 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9) 179 | (tok + - * /) 180 | 0) 181 | []) 182 | {:consumed false, :value 0} 183 | 184 | )) 185 | 186 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 187 | -------------------------------------------------------------------------------- /src/strojure/parsesso/impl/error.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.impl.error 2 | {:no-doc true} 3 | (:require [clojure.string :as string] 4 | [strojure.parsesso.impl.pos :as pos] 5 | [strojure.parsesso.impl.state :as state])) 6 | 7 | #?(:clj (set! *warn-on-reflection* true) 8 | :cljs (set! *warn-on-infer* true)) 9 | 10 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 11 | 12 | (defprotocol IRenderObject 13 | (render-object [obj] 14 | "Returns string representation of the `obj` in parser error messages.")) 15 | 16 | #?(:clj 17 | (extend-protocol IRenderObject 18 | nil,,,,,, (render-object [x] (pr-str x)) 19 | Object,,, (render-object [x] (pr-str x)) 20 | Character (render-object [c] (pr-str (str c)))) 21 | 22 | :cljs 23 | (extend-protocol IRenderObject 24 | nil,,,,, (render-object [x] (pr-str x)) 25 | object,, (render-object [x] (pr-str x)) 26 | string,, (render-object [x] (pr-str x)) 27 | function (render-object [x] (pr-str x)) 28 | number,, (render-object [x] (str x)))) 29 | 30 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 31 | 32 | (declare render-messages) 33 | 34 | (defrecord ParseError [pos messages] 35 | Object 36 | (toString [_] 37 | (str "error at " pos ":\n" (render-messages messages)))) 38 | 39 | (defn- new-error 40 | [state typ msg] 41 | (ParseError. (state/pos state) (cons [typ msg] nil))) 42 | 43 | (defn sys-unexpected 44 | "Returns “unexpected input” parser error with message `msg`." 45 | [state msg] 46 | (new-error state ::sys-unexpected msg)) 47 | 48 | (defn sys-unexpected-eof 49 | "Returns “unexpected input” parser error in case of end of input." 50 | [state] 51 | (new-error state ::sys-unexpected nil)) 52 | 53 | (defn unexpected 54 | "Returns “unexpected item” parser error with message `msg`." 55 | [state msg] 56 | (new-error state ::unexpected msg)) 57 | 58 | (defn expecting 59 | "Returns new instance of the parser error `err` with replaced “expected item” 60 | message. Ignores nil `msg` and returns just `err`." 61 | [^ParseError err, msg] 62 | (if msg 63 | (ParseError. (.-pos err) 64 | (cons [::expecting msg] (filter #(not= ::expecting (first %)) 65 | (.-messages err)))) 66 | err)) 67 | 68 | (defn message 69 | "Returns parser error with some general parser message `msg`, generated by the 70 | `fail` combinator." 71 | [state msg] 72 | (new-error state ::message msg)) 73 | 74 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 75 | 76 | (defn merge-errors 77 | "Returns parser error with messages from another two errors." 78 | [e1 e2] 79 | (let [m1 (:messages e1), m2 (:messages e2)] 80 | ;; prefer meaningful errors 81 | (cond (and m1 (nil? m2)) e1 82 | (and m2 (nil? m1)) e2 83 | :else (let [pos1 (:pos e1)] 84 | ;; select the longest match 85 | (case (int (pos/compare-pos pos1 (:pos e2))) 86 | 1 e1, -1 e2, (ParseError. pos1 (reduce conj m1 m2))))))) 87 | 88 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 89 | 90 | (defn- comma-sep 91 | [-or xs] 92 | (let [xs (->> xs (map str) (filter seq))] 93 | (case (count xs) 94 | 0 nil 95 | 1 (str (first xs)) 96 | (str (string/join ", " (butlast xs)) " " -or " " (last xs))))) 97 | 98 | (defn- render-many 99 | [xs -or prefix] 100 | (when xs 101 | (cond->> (->> xs (map (comp force second)) 102 | (comma-sep -or)) 103 | prefix (str prefix " ")))) 104 | 105 | (defn render-messages 106 | "The standard function for showing error messages. Formats a list of error 107 | messages in English. The resulting string will be formatted like: 108 | 109 | - unexpected _{The first UnExpect or a SysUnExpect message}_ 110 | - expecting _{comma separated list of Expect messages}_ 111 | - _{comma separated list of Message messages}_ 112 | 113 | Example: 114 | 115 | unexpected UnExpect1 or UnExpect2 116 | expecting Expect1, Expect2 or Expect3 117 | Message1 or Message2 118 | " 119 | {:arglists '([{:keys [unknown expecting unexpected end-of-input or] :as dict}, messages] 120 | [messages])} 121 | ([messages] (render-messages nil messages)) 122 | ([dict messages] 123 | (let [dict (->> dict (merge {:unknown "unknown parse error" 124 | :expecting "expecting" 125 | :unexpected "unexpected" 126 | :end-of-input "end of input" 127 | :or "or"}))] 128 | (if messages 129 | (let [xs (->> messages 130 | (map #(update % 1 force)) 131 | (distinct) 132 | (reverse) 133 | (group-by first))] 134 | (->> [(when-let [[[_ msg]] (and (not (xs ::unexpected)) 135 | (xs ::sys-unexpected))] 136 | (str (dict :unexpected) " " (or (not-empty msg) 137 | (dict :end-of-input)))) 138 | (render-many (xs ::unexpected) (dict :or) (dict :unexpected)) 139 | (render-many (xs ::expecting) (dict :or) (dict :expecting)) 140 | (render-many (xs ::message) (dict :or) nil)] 141 | (filter some?) 142 | (string/join "\n"))) 143 | (dict :unknown))))) 144 | 145 | (comment 146 | (->> (list [::message "Message2"] 147 | [::message (delay "Message1")] 148 | [::message "Message1"] 149 | [::expecting "Expect3"] 150 | [::expecting ""] 151 | [::expecting "Expect2"] 152 | [::expecting (delay "Expect2")] 153 | [::expecting "Expect1"] 154 | [::unexpected (delay "UnExpect2")] 155 | [::unexpected "UnExpect1"] 156 | [::sys-unexpected "SysUnExpect"] 157 | [::sys-unexpected nil]) 158 | (render-messages) 159 | (println)) 160 | ) 161 | 162 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 163 | -------------------------------------------------------------------------------- /test/strojure/parsesso/char_test.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.char-test 2 | (:require [clojure.string :as string] 3 | [clojure.test :as test :refer [deftest testing]] 4 | [strojure.parsesso.char :as char] 5 | [strojure.parsesso.parser :as p])) 6 | 7 | #_(test/run-tests) 8 | 9 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 10 | 11 | (defn- p 12 | "Parses test input using given parser. Returns custom map with test result." 13 | [parser input] 14 | (let [result (p/parse* parser input)] 15 | (if-let [error (:error result)] 16 | (-> (select-keys result [:consumed]) 17 | (assoc :error (-> (str error) (string/split-lines)))) 18 | (select-keys result [:consumed :value])))) 19 | 20 | (defn- c 21 | "Cross-platform char." 22 | [s] 23 | #?(:cljs s, :default (first s))) 24 | 25 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 26 | 27 | (deftest is-t 28 | (testing "default matching" 29 | (test/are [expr result] (= result expr) 30 | 31 | (p (char/is "abc") 32 | "a") 33 | {:consumed true, :value (c "a")} 34 | 35 | (p (char/is "abc") 36 | "b") 37 | {:consumed true, :value (c "b")} 38 | 39 | (p (char/is "abc") 40 | "c") 41 | {:consumed true, :value (c "c")} 42 | 43 | (p (char/is "abc") 44 | "d") 45 | {:consumed false, :error ["error at line 1, column 1:" 46 | "unexpected \"d\"" 47 | "expecting \"abc\" character"]} 48 | 49 | (p (char/is "abc") 50 | "") 51 | {:consumed false, :error ["error at line 1, column 1:" 52 | "unexpected end of input" 53 | "expecting \"abc\" character"]} 54 | 55 | (p (char/is "a") 56 | "d") 57 | {:consumed false, :error ["error at line 1, column 1:" 58 | "unexpected \"d\"" 59 | "expecting \"a\" character"]} 60 | 61 | (p (char/is "a") 62 | "") 63 | {:consumed false, :error ["error at line 1, column 1:" 64 | "unexpected end of input" 65 | "expecting \"a\" character"]} 66 | 67 | )) 68 | 69 | (testing "case-insensitive matching" 70 | (test/are [expr result] (= result expr) 71 | 72 | (p (char/is "abc" :ic) 73 | "a") 74 | {:consumed true, :value (c "a")} 75 | 76 | (p (char/is "abc" :ic) 77 | "A") 78 | {:consumed true, :value (c "A")} 79 | 80 | (p (char/is "ABC" :ic) 81 | "a") 82 | {:consumed true, :value (c "a")} 83 | 84 | (p (char/is "abc" :ic) 85 | "d") 86 | {:consumed false, :error ["error at line 1, column 1:" 87 | "unexpected \"d\"" 88 | "expecting \"abc\" character"]} 89 | 90 | ))) 91 | 92 | (deftest is-not-t 93 | (testing "default matching" 94 | (test/are [expr result] (= result expr) 95 | 96 | (p (char/is-not "abc") 97 | "x") 98 | {:consumed true, :value (c "x")} 99 | 100 | (p (char/is-not "abc") 101 | "a") 102 | {:consumed false, :error ["error at line 1, column 1:" 103 | "unexpected \"a\"" 104 | "expecting not \"abc\" character"]} 105 | 106 | (p (char/is-not "abc") 107 | "") 108 | {:consumed false, :error ["error at line 1, column 1:" 109 | "unexpected end of input" 110 | "expecting not \"abc\" character"]} 111 | 112 | (p (char/is-not "a") 113 | "a") 114 | {:consumed false, :error ["error at line 1, column 1:" 115 | "unexpected \"a\"" 116 | "expecting not \"a\" character"]} 117 | 118 | (p (char/is-not "a") 119 | "") 120 | {:consumed false, :error ["error at line 1, column 1:" 121 | "unexpected end of input" 122 | "expecting not \"a\" character"]} 123 | 124 | )) 125 | 126 | (testing "case insensitive matching" 127 | (test/are [expr result] (= result expr) 128 | 129 | (p (char/is-not "abc" :ic) 130 | "x") 131 | {:consumed true, :value (c "x")} 132 | 133 | (p (char/is-not "abc" :ic) 134 | "a") 135 | {:consumed false, :error ["error at line 1, column 1:" 136 | "unexpected \"a\"" 137 | "expecting not \"abc\" character"]} 138 | 139 | (p (char/is-not "abc" :ic) 140 | "A") 141 | {:consumed false, :error ["error at line 1, column 1:" 142 | "unexpected \"A\"" 143 | "expecting not \"abc\" character"]} 144 | 145 | (p (char/is-not "a" :ic) 146 | "a") 147 | {:consumed false, :error ["error at line 1, column 1:" 148 | "unexpected \"a\"" 149 | "expecting not \"a\" character"]} 150 | 151 | (p (char/is-not "a" :ic) 152 | "A") 153 | {:consumed false, :error ["error at line 1, column 1:" 154 | "unexpected \"A\"" 155 | "expecting not \"a\" character"]} 156 | 157 | ))) 158 | 159 | (deftest regex-t 160 | (test/are [expr result] (= result expr) 161 | 162 | (p (p/*many (char/regex #"[a-z]")) 163 | "abc") 164 | {:consumed true, :value (seq "abc")} 165 | 166 | (p (char/regex #"[a-z]") 167 | "A") 168 | {:consumed false, :error ["error at line 1, column 1:" 169 | "unexpected \"A\"" 170 | "expecting character matching regex #\"[a-z]\""]} 171 | 172 | (p (char/regex #"[a-z]") 173 | "") 174 | {:consumed false, :error ["error at line 1, column 1:" 175 | "unexpected end of input" 176 | "expecting character matching regex #\"[a-z]\""]} 177 | 178 | )) 179 | 180 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 181 | 182 | (deftest letter?-t 183 | (test/are [expr result] (= result expr) 184 | 185 | (p char/letter? 186 | "a") 187 | {:consumed true, :value (c "a")} 188 | 189 | (p char/letter? 190 | "1") 191 | {:consumed false, :error ["error at line 1, column 1:" 192 | "unexpected \"1\"" 193 | "expecting ascii letter"]} 194 | 195 | (p char/letter? 196 | "") 197 | {:consumed false, :error ["error at line 1, column 1:" 198 | "unexpected end of input" 199 | "expecting ascii letter"]} 200 | 201 | )) 202 | 203 | (deftest upper?-t 204 | (test/are [expr result] (= result expr) 205 | 206 | (p (p/*many char/upper?) 207 | "ABC") 208 | {:consumed true, :value (seq "ABC")} 209 | 210 | (p char/upper? 211 | "a") 212 | {:consumed false, :error ["error at line 1, column 1:" 213 | "unexpected \"a\"" 214 | "expecting upper-case ascii letter"]} 215 | 216 | (p char/upper? 217 | "") 218 | {:consumed false, :error ["error at line 1, column 1:" 219 | "unexpected end of input" 220 | "expecting upper-case ascii letter"]} 221 | 222 | )) 223 | 224 | (deftest lower?-t 225 | (test/are [expr result] (= result expr) 226 | 227 | (p (p/*many char/lower?) 228 | "abc") 229 | {:consumed true, :value (seq "abc")} 230 | 231 | (p char/lower? 232 | "A") 233 | {:consumed false, :error ["error at line 1, column 1:" 234 | "unexpected \"A\"" 235 | "expecting lower-case ascii letter"]} 236 | 237 | (p char/lower? 238 | "") 239 | {:consumed false, :error ["error at line 1, column 1:" 240 | "unexpected end of input" 241 | "expecting lower-case ascii letter"]} 242 | 243 | )) 244 | 245 | (deftest number?-t 246 | (test/are [expr result] (= result expr) 247 | 248 | (p (p/*many char/number?) 249 | "01234567890") 250 | {:consumed true, :value (seq "01234567890")} 251 | 252 | (p char/number? 253 | "a") 254 | {:consumed false, :error ["error at line 1, column 1:" 255 | "unexpected \"a\"" 256 | "expecting ascii number"]} 257 | 258 | (p char/number? 259 | "") 260 | {:consumed false, :error ["error at line 1, column 1:" 261 | "unexpected end of input" 262 | "expecting ascii number"]} 263 | 264 | )) 265 | 266 | (deftest letter-or-number?-t 267 | (test/are [expr result] (= result expr) 268 | 269 | (p (p/*many char/letter-or-number?) 270 | "12345abcABC") 271 | {:consumed true, :value (seq "12345abcABC")} 272 | 273 | (p char/letter-or-number? 274 | "-") 275 | {:consumed false, :error ["error at line 1, column 1:" 276 | "unexpected \"-\"" 277 | "expecting ascii letter or number"]} 278 | 279 | (p char/letter-or-number? 280 | "") 281 | {:consumed false, :error ["error at line 1, column 1:" 282 | "unexpected end of input" 283 | "expecting ascii letter or number"]} 284 | 285 | )) 286 | 287 | (deftest white?-t 288 | (test/are [expr result] (= result expr) 289 | 290 | (p (p/*many char/white?) 291 | " \t\r\n") 292 | {:consumed true, :value (seq " \t\r\n")} 293 | 294 | (p char/white? 295 | "a") 296 | {:consumed false, :error ["error at line 1, column 1:" 297 | "unexpected \"a\"" 298 | "expecting whitespace character"]} 299 | 300 | (p char/white? 301 | "") 302 | {:consumed false, :error ["error at line 1, column 1:" 303 | "unexpected end of input" 304 | "expecting whitespace character"]} 305 | 306 | )) 307 | 308 | (deftest newline-t 309 | (test/are [expr result] (= result expr) 310 | 311 | (p char/newline 312 | "\n") 313 | {:consumed true, :value (c "\n")} 314 | 315 | (p char/newline 316 | "\r\n") 317 | {:consumed true, :value (c "\n")} 318 | 319 | (p char/newline 320 | "\ra") 321 | {:consumed true, :error ["error at line 1, column 2:" 322 | "unexpected \"a\"" 323 | "expecting \"\\n\" character"]} 324 | 325 | (p char/newline 326 | "\r") 327 | {:consumed true, :error ["error at line 1, column 2:" 328 | "unexpected end of input" 329 | "expecting \"\\n\" character"]} 330 | 331 | (p char/newline 332 | "a") 333 | {:consumed false, :error ["error at line 1, column 1:" 334 | "unexpected \"a\"" 335 | "expecting \"\\n\" character or \"\\r\" character"]} 336 | 337 | (p char/newline 338 | "") 339 | {:consumed false, :error ["error at line 1, column 1:" 340 | "unexpected end of input" 341 | "expecting \"\\n\" character or \"\\r\" character"]} 342 | 343 | )) 344 | 345 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 346 | 347 | (deftest str*-t 348 | (test/are [expr result] (= result expr) 349 | 350 | (p (-> (char/is "abc") 351 | (p/value char/str*)) 352 | "abc") 353 | {:consumed true, :value "a"} 354 | 355 | (p (-> (p/group (p/+many (char/is "abc")) 356 | (p/+many (char/is "123"))) 357 | (p/value char/str*)) 358 | "abc123") 359 | {:consumed true, :value "abc123"} 360 | 361 | (p (-> (p/*many (char/is "abc")) 362 | (p/value char/str*)) 363 | "123") 364 | {:consumed false, :value ""} 365 | 366 | (p (-> (char/is "abc") 367 | (p/value char/str*)) 368 | "123") 369 | {:consumed false, :error ["error at line 1, column 1:" 370 | "unexpected \"1\"" 371 | "expecting \"abc\" character"]} 372 | 373 | )) 374 | 375 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 376 | -------------------------------------------------------------------------------- /doc/benchmarks/compare.clj: -------------------------------------------------------------------------------- 1 | (ns benchmarks.compare 2 | "Some benchmarks between parsesso, kern and parsatron. There is no jasentaa 3 | here because it is very slow." 4 | (:require [blancas.kern.core :as k] 5 | [strojure.parsesso.char :as char] 6 | [strojure.parsesso.parser :as p] 7 | [the.parsatron :as t])) 8 | 9 | (set! *warn-on-reflection* true) 10 | 11 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 12 | 13 | (defn- t-run 14 | [p input] 15 | (t/run-parser p (t/->InputState input (t/->SourcePos 1 1)))) 16 | 17 | (def ^:private -input-10000 (repeat 10000 :a)) 18 | 19 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 20 | 21 | ;; ## Return value without parsing ## 22 | 23 | (p/parse (p/result :x) []) 24 | ; Execution time mean : 175,877194 ns 25 | ; Execution time std-deviation : 34,075714 ns 26 | ; Execution time lower quantile : 153,729903 ns ( 2,5%) 27 | ; Execution time upper quantile : 217,875203 ns (97,5%) 28 | 29 | (k/parse (k/return :x) []) 30 | ; Execution time mean : 233,470315 ns 31 | ; Execution time std-deviation : 66,244027 ns 32 | ; Execution time lower quantile : 178,201399 ns ( 2,5%) 33 | ; Execution time upper quantile : 326,518209 ns (97,5%) 34 | 35 | (t/run (t/always :x) []) 36 | ; Execution time mean : 168,392753 ns 37 | ; Execution time std-deviation : 68,636364 ns 38 | ; Execution time lower quantile : 123,449569 ns ( 2,5%) 39 | ; Execution time upper quantile : 252,628602 ns (97,5%) 40 | 41 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 42 | 43 | ;; ## Fail immediately without parsing ## 44 | 45 | (p/parse* (p/fail :x) []) 46 | ; Execution time mean : 188,952263 ns 47 | ; Execution time std-deviation : 17,000877 ns 48 | ; Execution time lower quantile : 172,453755 ns ( 2,5%) 49 | ; Execution time upper quantile : 210,153699 ns (97,5%) 50 | 51 | (k/parse (k/fail :x) []) 52 | ; Execution time mean : 386,590746 ns 53 | ; Execution time std-deviation : 156,097460 ns 54 | ; Execution time lower quantile : 266,519628 ns ( 2,5%) 55 | ; Execution time upper quantile : 640,785168 ns (97,5%) 56 | 57 | (t-run (t/never) []) 58 | ; Execution time mean : 841,250545 ns 59 | ; Execution time std-deviation : 206,671857 ns 60 | ; Execution time lower quantile : 703,388694 ns ( 2,5%) 61 | ; Execution time upper quantile : 1,115857 µs (97,5%) 62 | 63 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 64 | 65 | ;; ## Parse token ## 66 | 67 | (p/parse (p/token #(= \a %)) "abc") 68 | ; Execution time mean : 280,963465 ns 69 | ; Execution time std-deviation : 16,328760 ns 70 | ; Execution time lower quantile : 268,625666 ns ( 2,5%) 71 | ; Execution time upper quantile : 307,169162 ns (97,5%) 72 | 73 | (k/parse (k/satisfy #(= \a %)) "abc") 74 | ; Execution time mean : 245,984170 ns 75 | ; Execution time std-deviation : 13,553994 ns 76 | ; Execution time lower quantile : 235,005603 ns ( 2,5%) 77 | ; Execution time upper quantile : 268,329750 ns (97,5%) 78 | 79 | (t/run (t/token #(= \a %)) "abc") 80 | ; Execution time mean : 557,024259 ns 81 | ; Execution time std-deviation : 14,359373 ns 82 | ; Execution time lower quantile : 541,631508 ns ( 2,5%) 83 | ; Execution time upper quantile : 578,875966 ns (97,5%) 84 | 85 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 86 | 87 | ;; ## Parse word ## 88 | 89 | (p/parse (p/word "abc") "abc") 90 | ; Execution time mean : 492,578323 ns 91 | ; Execution time std-deviation : 23,516467 ns 92 | ; Execution time lower quantile : 471,832427 ns ( 2,5%) 93 | ; Execution time upper quantile : 531,601202 ns (97,5%) 94 | 95 | (k/parse (k/token* "abc") "abc") 96 | ; Execution time mean : 4,020720 µs 97 | ; Execution time std-deviation : 429,048420 ns 98 | ; Execution time lower quantile : 3,767589 µs ( 2,5%) 99 | ; Execution time upper quantile : 4,754242 µs (97,5%) 100 | 101 | (t/run (t/string "abc") "abc") 102 | ; Execution time mean : 2,212562 µs 103 | ; Execution time std-deviation : 91,094400 ns 104 | ; Execution time lower quantile : 2,126279 µs ( 2,5%) 105 | ; Execution time upper quantile : 2,342896 µs (97,5%) 106 | 107 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 108 | 109 | ;; ## Parse word, case-insensitive ## 110 | 111 | (p/parse (p/word "abc" :ic) "ABC") 112 | ; Execution time mean : 631,199580 ns 113 | ; Execution time std-deviation : 9,939793 ns 114 | ; Execution time lower quantile : 618,951019 ns ( 2,5%) 115 | ; Execution time upper quantile : 641,954344 ns (97,5%) 116 | 117 | (k/parse (k/token- "abc") "ABC") 118 | ; Execution time mean : 5,063223 µs 119 | ; Execution time std-deviation : 212,754488 ns 120 | ; Execution time lower quantile : 4,915983 µs ( 2,5%) 121 | ; Execution time upper quantile : 5,412170 µs (97,5%) 122 | 123 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 124 | 125 | ;; ## Parse long word ## 126 | 127 | (p/parse (p/word -input-10000) -input-10000) 128 | ; Execution time mean : 190,951777 µs 129 | ; Execution time std-deviation : 15,767078 µs 130 | ; Execution time lower quantile : 177,441117 µs ( 2,5%) 131 | ; Execution time upper quantile : 209,664373 µs (97,5%) 132 | 133 | (comment 134 | (k/parse (k/token* -input-10000) -input-10000)) 135 | ; Execution error (StackOverflowError) at blancas.kern.core/>>=$fn 136 | 137 | (t/run (t/string -input-10000) -input-10000) 138 | ; Execution time mean : 5,677465 ms 139 | ; Execution time std-deviation : 961,844848 µs 140 | ; Execution time lower quantile : 4,976587 ms ( 2,5%) 141 | ; Execution time upper quantile : 6,805795 ms (97,5%) 142 | 143 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 144 | 145 | ;; ## Parse letters ## 146 | 147 | (p/parse (p/*many char/letter?) "abc") 148 | ; Execution time mean : 975,326535 ns 149 | ; Execution time std-deviation : 65,828611 ns 150 | ; Execution time lower quantile : 915,594047 ns ( 2,5%) 151 | ; Execution time upper quantile : 1,059000 µs (97,5%) 152 | 153 | (k/parse (k/many k/letter) "abc") 154 | ; Execution time mean : 1,911586 µs 155 | ; Execution time std-deviation : 511,124107 ns 156 | ; Execution time lower quantile : 1,646502 µs ( 2,5%) 157 | ; Execution time upper quantile : 2,783604 µs (97,5%) 158 | 159 | (t/run (t/many (t/letter)) "abc") 160 | ; Execution time mean : 2,599675 µs 161 | ; Execution time std-deviation : 576,904794 ns 162 | ; Execution time lower quantile : 2,193151 µs ( 2,5%) 163 | ; Execution time upper quantile : 3,354449 µs (97,5%) 164 | 165 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 166 | 167 | ;; ## Parse letters as string ## 168 | 169 | (p/parse (-> (p/*many char/letter?) (p/value char/str*)) "abc") 170 | ; Execution time mean : 1,514160 µs 171 | ; Execution time std-deviation : 104,898493 ns 172 | ; Execution time lower quantile : 1,439323 µs ( 2,5%) 173 | ; Execution time upper quantile : 1,680704 µs (97,5%) 174 | 175 | (k/parse (k/<+> (k/many k/letter)) "abc") 176 | ; Execution time mean : 5,568215 µs 177 | ; Execution time std-deviation : 145,037838 ns 178 | ; Execution time lower quantile : 5,459951 µs ( 2,5%) 179 | ; Execution time upper quantile : 5,810555 µs (97,5%) 180 | 181 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 182 | 183 | ;; ## Parse `many` for long input ## 184 | 185 | (p/parse (p/*many (p/token #(= :a %))) -input-10000) 186 | ; Execution time mean : 1,311809 ms 187 | ; Execution time std-deviation : 96,377398 µs 188 | ; Execution time lower quantile : 1,223376 ms ( 2,5%) 189 | ; Execution time upper quantile : 1,426319 ms (97,5%) 190 | 191 | (k/parse (k/many (k/satisfy #(= :a %))) -input-10000) 192 | ; Execution time mean : 1,928105 ms 193 | ; Execution time std-deviation : 62,373984 µs 194 | ; Execution time lower quantile : 1,868339 ms ( 2,5%) 195 | ; Execution time upper quantile : 2,024112 ms (97,5%) 196 | 197 | (t/run (t/many (t/token #(= :a %))) -input-10000) 198 | ; Execution time mean : 1,066323 sec 199 | ; Execution time std-deviation : 159,363140 ms 200 | ; Execution time lower quantile : 984,876092 ms ( 2,5%) 201 | ; Execution time upper quantile : 1,341844 sec (97,5%) 202 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 203 | 204 | ;; ## Skip `many` for long input ## 205 | 206 | (p/parse (p/*skip (p/token #(= :a %))) -input-10000) 207 | ; Execution time mean : 1,043996 ms 208 | ; Execution time std-deviation : 252,158552 µs 209 | ; Execution time lower quantile : 893,890237 µs ( 2,5%) 210 | ; Execution time upper quantile : 1,465919 ms (97,5%) 211 | 212 | (k/parse (k/skip-many (k/satisfy #(= :a %))) -input-10000) 213 | ; Execution time mean : 1,416146 ms 214 | ; Execution time std-deviation : 35,717820 µs 215 | ; Execution time lower quantile : 1,379739 ms ( 2,5%) 216 | ; Execution time upper quantile : 1,451345 ms (97,5%) 217 | 218 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 219 | 220 | ;; ## The `alt` combinator ## 221 | 222 | (p/parse (p/alt (p/fail "a") 223 | (p/fail "b") 224 | (p/result :x)) []) 225 | ; Execution time mean : 573,340067 ns 226 | ; Execution time std-deviation : 46,346310 ns 227 | ; Execution time lower quantile : 511,933832 ns ( 2,5%) 228 | ; Execution time upper quantile : 624,550670 ns (97,5%) 229 | 230 | (k/parse (k/<|> (k/fail "a") 231 | (k/fail "b") 232 | (k/return :x)) []) 233 | ; Execution time mean : 1,754808 µs 234 | ; Execution time std-deviation : 148,221426 ns 235 | ; Execution time lower quantile : 1,618505 µs ( 2,5%) 236 | ; Execution time upper quantile : 1,924351 µs (97,5%) 237 | 238 | (t/run (t/choice (t/never) 239 | (t/never) 240 | (t/always :x)) []) 241 | ; Execution time mean : 697,151006 ns 242 | ; Execution time std-deviation : 165,879602 ns 243 | ; Execution time lower quantile : 570,024598 ns ( 2,5%) 244 | ; Execution time upper quantile : 961,147185 ns (97,5%) 245 | 246 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 247 | 248 | ;; ## Wrap with `expecting` ## 249 | 250 | (p/parse (-> (p/result :x) (p/expecting "x")) []) 251 | ; Execution time mean : 212,033445 ns 252 | ; Execution time std-deviation : 20,071125 ns 253 | ; Execution time lower quantile : 196,685023 ns ( 2,5%) 254 | ; Execution time upper quantile : 238,117212 ns (97,5%) 255 | 256 | (k/parse (k/ (k/return :x) "x") []) 257 | ; Execution time mean : 222,587325 ns 258 | ; Execution time std-deviation : 16,924812 ns 259 | ; Execution time lower quantile : 205,615791 ns ( 2,5%) 260 | ; Execution time upper quantile : 240,220579 ns (97,5%) 261 | 262 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 263 | 264 | ;; ## Test for the end of input ## 265 | 266 | (p/parse* p/eof " ") 267 | ; Execution time mean : 231,661354 ns 268 | ; Execution time std-deviation : 25,008376 ns 269 | ; Execution time lower quantile : 209,952763 ns ( 2,5%) 270 | ; Execution time upper quantile : 262,847436 ns (97,5%) 271 | 272 | (k/parse k/eof " ") 273 | ; Execution time mean : 1,428015 µs 274 | ; Execution time std-deviation : 81,057937 ns 275 | ; Execution time lower quantile : 1,352623 µs ( 2,5%) 276 | ; Execution time upper quantile : 1,560179 µs (97,5%) 277 | 278 | (t-run (t/eof) " ") 279 | ; Execution time mean : 882,705676 ns 280 | ; Execution time std-deviation : 46,738939 ns 281 | ; Execution time lower quantile : 837,580307 ns ( 2,5%) 282 | ; Execution time upper quantile : 948,317437 ns (97,5%) 283 | 284 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 285 | 286 | (p/parse (p/after (p/word "")))) 288 | "") 289 | ; Execution time mean : 7,450434 µs 290 | ; Execution time std-deviation : 607,080144 ns 291 | ; Execution time lower quantile : 6,900613 µs ( 2,5%) 292 | ; Execution time upper quantile : 8,221736 µs (97,5%) 293 | 294 | (k/parse (k/>> (k/token* "")))) 296 | "") 297 | ; Execution time mean : 84,653453 µs 298 | ; Execution time std-deviation : 2,870985 µs 299 | ; Execution time lower quantile : 81,222728 µs ( 2,5%) 300 | ; Execution time upper quantile : 87,938498 µs (97,5%) 301 | 302 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 303 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # parsesso 2 | 3 | [Parser combinators](https://en.wikipedia.org/wiki/Parser_combinator) for 4 | Clojure(Script). 5 | 6 | [![Clojars Project](https://img.shields.io/clojars/v/com.github.strojure/parsesso.svg)](https://clojars.org/com.github.strojure/parsesso) 7 | ![ClojarsDownloads](https://img.shields.io/clojars/dt/com.github.strojure/parsesso) 8 | 9 | [![cljdoc badge](https://cljdoc.org/badge/com.github.strojure/parsesso)](https://cljdoc.org/d/com.github.strojure/parsesso) 10 | [![cljs compatible](https://img.shields.io/badge/cljs-compatible-green)](https://clojurescript.org/) 11 | [![bb compatible](https://raw.githubusercontent.com/babashka/babashka/master/logo/badge.svg)](https://book.babashka.org#badges) 12 | [![tests](https://github.com/strojure/parsesso/actions/workflows/tests.yml/badge.svg)](https://github.com/strojure/parsesso/actions/workflows/tests.yml) 13 | 14 | ## Motivation 15 | 16 | * Idiomatic and convenient API for parser combinators in Clojure and 17 | ClojureScript. 18 | 19 | ## Inspiration 20 | 21 | * [haskell/parsec](https://github.com/haskell/parsec) 22 | * [blancas/kern](https://github.com/blancas/kern) 23 | * [youngnh/parsatron](https://github.com/youngnh/parsatron) 24 | * [rm-hull/jasentaa](https://github.com/rm-hull/jasentaa) 25 | 26 | ## Documentation 27 | 28 | As far as there is no comprehensive documentation how to use `parsesso` there 29 | are another resources to get familiar with idea of parser combinators in Clojure: 30 | 31 | - [Kern documentation wiki](https://github.com/blancas/kern/wiki). 32 | 33 | ## Cheat sheet 34 | 35 | | Parsesso | Parsec[1],[2],[3] | Kern[4] | Parsatron[5] | 36 | |---------------------------------------|---------------------------------|-------------------------|-------------------------| 37 | | [p/do-parser] | | `fwd` | `defparser` | 38 | | [p/result] | `return` | `return` | `always` | 39 | | [p/fail] | `fail` | `fail` | `never` | 40 | | [p/fail-unexpected] | `unexpected` | `unexpected` | | 41 | | [p/expecting] | ``, `label` | ``, `expect` | | 42 | | [p/bind] | `>>=` | `>>=` | `bind` | 43 | | [p/for] | `do` | `bind` | `let->>` | 44 | | [p/after] | `>>` | `>>` | `>>`, `nxt` | 45 | | [p/value] | `fmap` | `<$>` | | 46 | | [p/maybe] | `try` | `<:>` | `attempt` | 47 | | [p/look-ahead] | `lookAhead` | `look-ahead` | `lookahead` | 48 | | [p/not-followed-by] | `notFollowedBy` | `not-followed-by` | | 49 | | [p/*many] | `many` | `many` | `many` | 50 | | [p/+many] | `many1` | `many1` | `many1` | 51 | | [p/*skip] | `skipMany` | `skip-many` | | 52 | | [p/+skip] | `skipMany1` | `skip-many1` | | 53 | | [p/token] | `token`, `satisfy` | `satisfy` | `token` | 54 | | [p/token-not] | | | | 55 | | [p/word] | `tokens`, `string` | `token*` | `string` | 56 | | [p/any-token] | `anyToken`,`anyChar` | `any-char` | `any-char` | 57 | | [p/eof] | `eof` | `eof` | `eof` | 58 | | [p/group] | `<*>` | `<*>` | | 59 | | [p/alt] | <|>, `choice` | <|> | `choice` | 60 | | [p/option] | `option`, `optional` | `option`, `optional` | | 61 | | [p/between] | `between` | `between` | `between` | 62 | | [p/times] | `count` | `times` | `times` | 63 | | [p/*many-till] | `manyTill` | `many-till` | | 64 | | [p/*sep-by] | `sepBy` | `sep-by` | | 65 | | [p/+sep-by] | `sepBy1` | `sep-by1` | | 66 | | [p/*sep-end-by] | `endBy` | `end-by` | | 67 | | [p/+sep-end-by] | `endBy1` | `end-by1` | | 68 | | [p/*sep-opt-by] | `sepEndBy` | `sep-end-by` | | 69 | | [p/+sep-opt-by] | `sepEndBy1` | `sep-end-by1` | | 70 | | [p/get-state] | `getParserState`... | input, pos, user state | | 71 | | [p/set-state] | `setParserState`... | input, pos, user state | | 72 | | [p/update-state] | `updateParserState`... | user state | | 73 | | [p/trace] | `parserTrace`, `parserTraced` | | | 74 | | [expr/*chain-left] | `chainl` | `chainl` | | 75 | | [expr/+chain-left] | `chainl1` | `chainl1` | | 76 | | [expr/*chain-right] | `chainr` | `chainr` | | 77 | | [expr/+chain-right] | `chainr1` | `chainr1` | | 78 | | [char/is] | `char`, `oneOf` | `sym*`, `one-of*` | `char` | 79 | | [char/is-not] | `noneOf` | `none-of*` | | 80 | | [char/regex] | | | | 81 | | [char/upper?] | `upper` | `upper` (unicode) | | 82 | | [char/lower?] | `lower` | `lower` (unicode) | | 83 | | [char/letter?] | `letter` | `letter` (unicode) | `letter` (unicode) | 84 | | [char/number?] | `digit` | `digit` (unicode) | `digit` (unicode) | 85 | | [char/letter-or-number?] | `alphaNum` | `alpha-num` (unicode) | | 86 | | [char/white?] | `space` | `white-space` (unicode) | | 87 | | [char/newline] | `endOfLine` | `new-line*` | | 88 | | [char/str*] | | `<+>` | | 89 | 90 | [1]: https://github.com/haskell/parsec/blob/master/src/Text/Parsec/Prim.hs 91 | 92 | [2]: https://github.com/haskell/parsec/blob/master/src/Text/Parsec/Combinator.hs 93 | 94 | [3]: https://github.com/haskell/parsec/blob/master/src/Text/Parsec/Char.hs 95 | 96 | [4]: https://github.com/blancas/kern/blob/master/src/main/clojure/blancas/kern/core.clj 97 | 98 | [5]: https://github.com/youngnh/parsatron/blob/master/src/clj/the/parsatron.clj 99 | 100 | [p/do-parser]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#do-parser 101 | 102 | [p/result]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#result 103 | 104 | [p/fail]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#fail 105 | 106 | [p/fail-unexpected]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#fail-unexpected 107 | 108 | [p/expecting]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#expecting 109 | 110 | [p/bind]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#bind 111 | 112 | [p/for]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#for 113 | 114 | [p/after]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#after 115 | 116 | [p/value]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#value 117 | 118 | [p/maybe]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#maybe 119 | 120 | [p/look-ahead]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#look-ahead 121 | 122 | [p/not-followed-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#not-followed-by 123 | 124 | [p/*many]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*many 125 | 126 | [p/+many]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#+many 127 | 128 | [p/*skip]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*skip 129 | 130 | [p/+skip]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#+skip 131 | 132 | [p/token]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#token 133 | 134 | [p/token-not]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#token-not 135 | 136 | [p/word]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#word 137 | 138 | [p/any-token]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#any-token 139 | 140 | [p/eof]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#eof 141 | 142 | [p/group]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#group 143 | 144 | [p/alt]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#alt 145 | 146 | [p/option]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#option 147 | 148 | [p/between]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#between 149 | 150 | [p/times]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#times 151 | 152 | [p/*many-till]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*many-till 153 | 154 | [p/*sep-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*sep-by 155 | 156 | [p/+sep-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#+sep-by 157 | 158 | [p/*sep-end-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*sep-end-by 159 | 160 | [p/+sep-end-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#+sep-end-by 161 | 162 | [p/*sep-opt-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*sep-opt-by 163 | 164 | [p/+sep-opt-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#+sep-opt-by 165 | 166 | [p/get-state]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#get-state 167 | 168 | [p/set-state]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#set-state 169 | 170 | [p/update-state]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#update-state 171 | 172 | [p/trace]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#trace 173 | 174 | [expr/*chain-left]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.expr#*chain-left 175 | 176 | [expr/+chain-left]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.expr#+chain-left 177 | 178 | [expr/*chain-right]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.expr#*chain-right 179 | 180 | [expr/+chain-right]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.expr#+chain-right 181 | 182 | [char/is]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#is 183 | 184 | [char/is-not]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#is-not 185 | 186 | [char/regex]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#regex 187 | 188 | [char/upper?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#upper? 189 | 190 | [char/lower?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#lower? 191 | 192 | [char/letter?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#letter? 193 | 194 | [char/number?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#number? 195 | 196 | [char/letter-or-number?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#letter-or-number? 197 | 198 | [char/white?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#white? 199 | 200 | [char/newline]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#newline 201 | 202 | [char/str*]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#str* 203 | 204 | ## Examples 205 | 206 | * [HoneySQL SELECT](doc/demo/honeysql_select.clj) 207 | 208 | ## Performance 209 | 210 | See some benchmarks [here](doc/benchmarks/compare.clj). 211 | 212 | ## FAQ 213 | 214 | **What parser combinators are & are good for? How does it differ e.g. from 215 | Instaparse, which also parses text into data?** 216 | 217 | A parser combinator library is a library with functions that can be composed 218 | into a parser. Instaparse takes a grammar specification, but in a parser 219 | combinator library you build the specification from functions, rather than a 220 | DSL. 221 | 222 | **When should I pick parser combinators over EBNF? Do they offer the same, 223 | and it is only question of which one I prefer to learn or is there some distinct 224 | advantage over a DSL such as EBNF? Perhaps it is easier to describe more complex 225 | grammars b/c I can make my own helper functions, or something?** 226 | 227 | In general, parser combinators such as `parsesso` are for creating top-down 228 | (i.e. LL) parsers, with the ability to reuse common code (this lib). Parser 229 | Generators typically generate a finite state automaton for a bottom-up (LR) 230 | parser. Though nowadays there are also combinators for LR grammars and 231 | generators for LL ones (e.g. ANTLR). Which one you should use, depends on how 232 | hard your grammar is, and how fast the parser needs to be. Especially if the 233 | grammar has lot of non-trivial ambiguities then it might be easier with the more 234 | flexible combinators approach. 235 | 236 | ## Contributors 237 | 238 | - [Michiel Borkent](https://github.com/borkdude) 239 | + Compatibility with babashka. 240 | + Github CI configuration. 241 | + Clj-kondo configuration tips. 242 | - [Jakub Holý](https://github.com/holyjak) 243 | + Questions and answers in FAQ. 244 | -------------------------------------------------------------------------------- /src/strojure/parsesso/parser.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.parser 2 | "Main namespace with parsers and their combinators." 3 | (:refer-clojure :exclude [for]) 4 | (:require [strojure.parsesso.impl.char :as char] 5 | [strojure.parsesso.impl.error :as error] 6 | [strojure.parsesso.impl.parser :as parser] 7 | [strojure.parsesso.impl.pos :as pos] 8 | [strojure.parsesso.impl.reply :as reply :include-macros true] 9 | [strojure.parsesso.impl.state :as state]) 10 | #?(:clj (:import (clojure.lang ISeq)) 11 | :cljs (:require-macros [strojure.parsesso.parser :refer [for do-parser]]))) 12 | 13 | #?(:clj (set! *warn-on-reflection* true) 14 | :cljs (set! *warn-on-infer* true)) 15 | 16 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 17 | 18 | (def ^{:arglists '([obj])} 19 | render 20 | "Returns string representation of the `obj` in parser error messages." 21 | error/render-object) 22 | 23 | (defmacro do-parser 24 | "Delays the evaluation of a parser that was forward (declare)d and 25 | it has not been defined yet. For use in (def)s of no-arg parsers, 26 | since the parser expression evaluates immediately." 27 | [& body] 28 | `(fn [state# context#] 29 | (parser/go (do ~@body) state# context#))) 30 | 31 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 32 | 33 | ;; ## Parsers ## 34 | 35 | (defn result 36 | "This parser always succeeds with value `x` without consuming any input. 37 | 38 | - Fails: never. 39 | - Consumes: never. 40 | " 41 | [x] 42 | (fn [state context] 43 | (reply/e-ok context state x))) 44 | 45 | (defn fail 46 | "This parser always fails with message `msg` without consuming any input. 47 | 48 | - Fails: always. 49 | - Consumes: never. 50 | " 51 | ([msg] 52 | (fn [state context] 53 | (reply/e-err context (error/message state msg)))) 54 | ([] 55 | (fail nil))) 56 | 57 | (defn fail-unexpected 58 | "This parser always fails with an unexpected error message `msg` without 59 | consuming any input. 60 | 61 | - Fails: always. 62 | - Consumes: never. 63 | " 64 | [msg] 65 | (fn [state context] 66 | (reply/e-err context (error/unexpected state (or msg (delay (render msg))))))) 67 | 68 | (defn expecting 69 | "This parser behaves as parser `p`, but whenever the parser `p` fails _without 70 | consuming any input_, it replaces expect error messages with the expect error 71 | message `msg`. 72 | 73 | This is normally used at the end of a set alternatives where we want to return 74 | an error message in terms of a higher level construct rather than returning 75 | all possible characters. For example, if the `expr` parser from the [[maybe]] 76 | example would fail, the error message is: '...: expecting expression'. Without 77 | the [[expecting]] combinator, the message would be like '...: expecting 78 | \"let\" or alphabetic character', which is less friendly. 79 | 80 | The parsers [[fail]], [[fail-unexpected]] and [[expecting]] are the three 81 | parsers used to generate error messages. Of these, only [[expecting]] is 82 | commonly used. 83 | " 84 | [p msg] 85 | (fn [state context] 86 | (letfn [(e-err [e] (reply/e-err context (error/expecting e msg)))] 87 | (parser/go p state (reply/assign context {reply/e-err e-err}))))) 88 | 89 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 90 | 91 | (defn bind 92 | "This parser applies parser `p` and then parser `(f x)` where x is a return 93 | value of the parser `p`. 94 | 95 | - Fails: when any of parsers `p` or `(f x)` fails. 96 | - Consumes: when any of parsers `p` or `(f x)` consumes some input. 97 | " 98 | [p f] 99 | (fn [state context] 100 | (letfn [(c-ok-p [s x] 101 | ;; - if (f x) doesn't consume input, but is okay, we still return in the consumed 102 | ;; continuation 103 | ;; - if (f x) doesn't consume input, but errors, we return the error in the 104 | ;; 'consumed-err' continuation 105 | (parser/go (f x) s (reply/assign context {reply/e-ok (partial reply/c-ok context) 106 | reply/e-err (partial reply/c-err context)}))) 107 | (e-ok-p [s x] 108 | ;; - in these cases, (f x) can return as empty 109 | (parser/go (f x) s context))] 110 | (parser/go p state (reply/assign context {reply/c-ok c-ok-p 111 | reply/e-ok e-ok-p}))))) 112 | 113 | (defmacro for 114 | "Expands into nested bind forms and a function body. 115 | 116 | The pattern: 117 | 118 | (p/bind p (fn [x] 119 | (p/bind q (fn [y] 120 | ... 121 | (p/result (f x y ...)))))) 122 | 123 | can be more conveniently be written as: 124 | 125 | (p/for [x p 126 | y q 127 | ...] 128 | (p/result (f x y ...))) 129 | " 130 | [[& bindings] & body] 131 | (let [[sym p :as pair] (take 2 bindings)] 132 | (assert (= 2 (count pair)) "Requires an even number of forms in bindings") 133 | (assert (some? body) "Requires some body") 134 | (if (= 2 (count bindings)) 135 | `(bind ~p (fn [~sym] ~@body)) 136 | `(bind ~p (fn [~sym] (for ~(drop 2 bindings) ~@body)))))) 137 | 138 | (defn after 139 | "This parser tries to apply the parsers in order, until last of them succeeds. 140 | Returns the value of the last parser, discards result of all preceding 141 | parsers. 142 | 143 | - Fails: when any of tried parsers fails. 144 | - Consumes: when any of tried parsers consumes some input. 145 | " 146 | ([q p] 147 | (bind q (fn [_] p))) 148 | ([q qq p] 149 | (->> p (after (after q qq)))) 150 | ([q qq qqq & more] 151 | (reduce after (list* q qq qqq more)))) 152 | 153 | (defn value 154 | "This parser applies series of functions to the result value of the parser `p`. 155 | 156 | - Fails: when `p` fails. 157 | - Consumes: when `p` consumes some input. 158 | " 159 | ([p f] 160 | (bind p (fn [x] (result (f x))))) 161 | ([p f g] 162 | (bind p (fn [x] (result (g (f x)))))) 163 | ([p f g h] 164 | (bind p (fn [x] (result (h (g (f x))))))) 165 | ([p f g h & more] 166 | (bind p (fn [x] (result (reduce #(%2 %1) x (list* f g h more))))))) 167 | 168 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 169 | 170 | (defn maybe 171 | "This parser behaves like parser `p`, except that it pretends that it hasn't 172 | consumed any input when an error occurs. 173 | 174 | - Fails: when `p` fails. 175 | - Consumes: when `p` succeeds and consumes some input. 176 | 177 | This combinator is used whenever arbitrary look ahead is needed. Since it 178 | pretends that it hasn't consumed any input when `p` fails, the [[alt]] 179 | combinator will try its second alternative even when the first parser failed 180 | while consuming input. 181 | 182 | The [[maybe]] combinator can for example be used to distinguish identifiers 183 | and reserved words. Both reserved words and identifiers are a sequence of 184 | letters. Whenever we expect a certain reserved word where we can also expect 185 | an identifier we have to use the [[maybe]] combinator. Suppose we write: 186 | 187 | (def identifier 188 | (p/+many char/letter?)) 189 | 190 | (def let-expr 191 | (p/after (p/word \"let\") 192 | ...)) 193 | 194 | (def expr 195 | (-> (p/alt let-expr 196 | identifier) 197 | (p/expecting \"expression\")) 198 | 199 | If the user writes \"lexical\", the parser fails with: `unexpected \"x\", 200 | expecting \"t\" of (word \"let\")`. Indeed, since the [[alt]] combinator only 201 | tries alternatives when the first alternative hasn't consumed input, the 202 | `identifier` parser is never tried (because the prefix \"le\" of the `(p/word 203 | \"let\")` parser is already consumed). The right behaviour can be obtained by 204 | adding the [[maybe]] combinator: 205 | 206 | (def let-expr 207 | (p/after (p/maybe (p/word \"let\")) 208 | ...)) 209 | " 210 | [p] 211 | (fn [state context] 212 | (parser/go p state (reply/assign context {reply/c-err (partial reply/e-err context)})))) 213 | 214 | (defn look-ahead 215 | "Parses `p` without consuming any input. If `p` fails and consumes some input, 216 | so does [[look-ahead]]. Combine with [[maybe]] if this is undesirable. 217 | 218 | - Fails: when `p` fails. 219 | - Consumes: when `p` fails and consumes some input. 220 | " 221 | [p] 222 | (fn [state context] 223 | (letfn [(e-ok [_ x] (reply/e-ok context state x))] 224 | (parser/go p state (reply/assign context {reply/c-ok e-ok, 225 | reply/e-ok e-ok}))))) 226 | 227 | (letfn 228 | [(not-followed-by* [q] 229 | (fn [x] 230 | (fn [state context] 231 | (letfn [(e-ok [_ _] (reply/e-err context (if-let [input (seq (state/input state))] 232 | (error/unexpected state (delay (render (first input)))) 233 | (error/sys-unexpected-eof state)))) 234 | (e-err [_] (reply/e-ok context state x))] 235 | (parser/go q state (reply/assign context {reply/c-ok e-ok 236 | reply/e-ok e-ok 237 | reply/c-err e-err 238 | reply/e-err e-err}))))))] 239 | (defn not-followed-by 240 | "This parser behaves like parser `p`, except that it only succeeds when parser 241 | `q` fails. This parser can be used to implement the 'longest match' rule. For 242 | example, when recognizing keywords (for example `let`), we want to make sure 243 | that a keyword is not followed by a legal identifier character, in which case 244 | the keyword is actually an identifier (for example `lets`). We can write this 245 | behaviour as follows: 246 | 247 | (-> (p/word \"let\") 248 | (p/not-followed-by char/letter-or-number?)) 249 | 250 | - Fails: 251 | - when `p` fails. 252 | - when `q` succeeds. 253 | - Consumes: 254 | - when `p` consumes some input. 255 | " 256 | ([p q] 257 | (bind p (not-followed-by* q))) 258 | ([q] 259 | ((not-followed-by* q) nil)))) 260 | 261 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 262 | 263 | (defn *many 264 | "This parser applies the parser `p` _zero_ or more times. Returns a sequence 265 | of the returned values or `p`. 266 | 267 | - Fails: when `p` fails and consumes some input. 268 | - Consumes: when `p` consumes some input. 269 | 270 | Example: 271 | 272 | (def identifier 273 | (p/for [c char/letter? 274 | cs (p/*many (p/alt char/letter-or-number? 275 | (char/is \"_\")))] 276 | (p/result (cons c cs)))) 277 | " 278 | [p] 279 | (fn [state context] 280 | (letfn [(walk [xs s x] 281 | (let [xs (conj! xs x) 282 | e-err (fn [_] (reply/c-ok context s (seq (persistent! xs))))] 283 | (parser/go p s (reply/assign context {reply/c-ok (partial walk xs) 284 | reply/e-ok parser/e-ok-throw-empty-input 285 | reply/e-err e-err}))))] 286 | (parser/go p state (reply/assign context {reply/c-ok (partial walk (transient [])) 287 | reply/e-ok parser/e-ok-throw-empty-input 288 | reply/e-err (fn [_] (reply/e-ok context state nil))}))))) 289 | 290 | (defn +many 291 | "This parser applies the parser `p` _one_ or more times. Returns a sequence of 292 | the returned values of `p`. 293 | 294 | - Fails: when `p` does not succeed at least once. 295 | - Consumes: when `p` consumes some input. 296 | 297 | Example: 298 | 299 | (def word 300 | (p/+many char/letter?) 301 | " 302 | [p] 303 | (for [x p, xs (*many p)] 304 | (result (cons x xs)))) 305 | 306 | (defn *skip 307 | "This parser applies the parser `p` _zero_ or more times, skipping its result. 308 | 309 | - Fails: when `p` fails and consumes some input. 310 | - Consumes: when `p` consumes some input. 311 | 312 | Example: 313 | 314 | (def spaces 315 | (p/*skip char/white?)) 316 | " 317 | [p] 318 | (fn [state context] 319 | (letfn [(c-ok [s _] 320 | (parser/go p s (reply/assign context {reply/c-ok c-ok 321 | reply/e-ok parser/e-ok-throw-empty-input 322 | reply/e-err (fn [_] (reply/c-ok context s nil))})))] 323 | (parser/go p state (reply/assign context {reply/c-ok c-ok 324 | reply/e-ok parser/e-ok-throw-empty-input 325 | reply/e-err (fn [_] (reply/e-ok context state nil))}))))) 326 | 327 | (defn +skip 328 | "This parser applies the parser `p` _one_ or more times, skipping its result. 329 | 330 | - Fails: when `p` does not succeed at least once. 331 | - Consumes: when `p` consumes some input. 332 | " 333 | [p] 334 | (after p (*skip p))) 335 | 336 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 337 | 338 | (defn token 339 | "This parser accepts a token when `(pred token)` returns logical true, and 340 | optional expecting `msg`. 1-arity behaves as `pred` and can be used in 341 | predicate composition. 342 | 343 | - Fails: when `(pred token)` return logical false. 344 | - Consumes: when succeeds. 345 | " 346 | {:inline (fn [pred] `(token ~pred nil)) :inline-arities #{1}} 347 | ([pred] (token pred nil)) 348 | ([pred msg] 349 | (fn 350 | ;; Predicate behaviour. 351 | ([tok] (pred tok)) 352 | ;; Parser behaviour. 353 | ([state context] 354 | (if-let [input (-> ^ISeq (state/input state) #?(:bb seq :clj .seq :cljs -seq :default seq))] 355 | (let [tok (#?(:bb first :clj .first :cljs -first :default first) input)] 356 | (if (pred tok) 357 | (reply/c-ok context (state/next-state state tok) tok) 358 | (reply/e-err context (cond-> (error/sys-unexpected state (delay (render tok))) 359 | msg (error/expecting msg))))) 360 | (reply/e-err context (cond-> (error/sys-unexpected-eof state) 361 | msg (error/expecting msg)))))))) 362 | 363 | (defn token-not 364 | "This parser accepts a token when `(pred token)` returns logical false, and 365 | optional expecting `msg`. 1-arity behaves as `(complement pred)` and can be 366 | used in predicate composition. 367 | 368 | - Fails: when `(pred token)` return logical true. 369 | - Consumes: when succeeds. 370 | " 371 | ([pred] 372 | (token (complement pred))) 373 | ([pred msg] 374 | (token (complement pred) msg))) 375 | 376 | (defn register-word-test 377 | "Associates keyword `k` with test-fn of the [[word]] parser." 378 | [k, f] 379 | (parser/register-word-test-fn k f)) 380 | 381 | (register-word-test :default =) 382 | (register-word-test :ic char/equals-ignorecase) 383 | 384 | (defn word 385 | "Parses a sequence of tokens given by `ts` and returns `ts`. The optional 386 | function `(test-fn word-token input-token)` is used to match tokens 387 | differently than simple equality. The `test-fn` can be referred by keyword 388 | registered using [[register-word-test]]. There are two predefined keywords 389 | registered: `:default` for `=` and `:ic` for case insensitive char comparison. 390 | 391 | - Fails: when any of tokens don't match the input. 392 | - Consumes: when at least first token matches the input. 393 | 394 | Example: 395 | 396 | (def let-keyword (p/word \"let\")) 397 | 398 | (def let-keyword-ignorecase (p/word \"let\" :ic)) 399 | " 400 | {:inline (fn [tokens] `(word ~tokens =)) :inline-arities #{1}} 401 | ([tokens] (word tokens =)) 402 | ([tokens, test-fn] 403 | (let [test-fn (cond-> test-fn (keyword? test-fn) (parser/word-test-fn))] 404 | (fn [state context] 405 | (if-let [ws (seq tokens)] 406 | (loop [^ISeq ws ws 407 | ^ISeq input (seq (state/input state)) 408 | reply-err reply/e-err] 409 | (cond 410 | (not ws) 411 | (let [new-pos (reduce pos/next-pos (state/pos state) tokens) 412 | new-state (state/set-input-pos state input new-pos)] 413 | (reply/c-ok context new-state tokens)) 414 | (not input) 415 | (reply-err context (-> (error/sys-unexpected-eof state) 416 | (error/expecting (delay (render tokens))))) 417 | :else 418 | (let [w (#?(:bb first :clj .first :cljs -first :default first) ws) 419 | t (#?(:bb first :clj .first :cljs -first :default first) input)] 420 | (if (test-fn w t) 421 | (recur (#?(:bb next :clj .next :cljs -next :default next) ws) 422 | (#?(:bb next :clj .next :cljs -next :default next) input) 423 | reply/c-err) 424 | (reply-err context (-> (error/sys-unexpected state (delay (render t))) 425 | (error/expecting (delay (render tokens))))))))) 426 | (reply/e-ok context state tokens)))))) 427 | 428 | (def any-token 429 | "This parser accepts any kind of token. Returns the accepted token. 430 | 431 | - Fails: at the end of input. 432 | - Consumes: when succeeds. 433 | " 434 | (token any?)) 435 | 436 | (def ^{:arglists '([] [x])} 437 | eof 438 | "This parser only succeeds with value `x` at the end of the input. 439 | 440 | - Fails: when input is not completely consumed. 441 | - Consumes: never. 442 | " 443 | (letfn [(eof* [x] 444 | (fn 445 | ([] eof) 446 | ([x] (eof* x)) 447 | ([state context] 448 | (if-let [input (seq (state/input state))] 449 | (reply/e-err context (-> (error/unexpected state (delay (render (first input)))) 450 | (error/expecting "end of input"))) 451 | (reply/e-ok context state x)))))] 452 | (eof* nil))) 453 | 454 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 455 | 456 | ;; ## Combinators ## 457 | 458 | (defn group* 459 | "This parser tries to apply parsers of `ps` in order until all of them 460 | succeeds. Returns a sequence of values returned by every parser. 461 | 462 | - Fails: when any of tried parsers fails. 463 | - Consumes: when any of tried parsers consumes some input. 464 | " 465 | [ps] 466 | (if-let [p (first ps)] 467 | (for [x p, xs (group* (rest ps))] 468 | (result (cons x xs))) 469 | (result nil))) 470 | 471 | (defn group 472 | "This parser tries to apply parsers in order until all of them succeeds. 473 | Returns a sequence of values returned by every parser. 474 | 475 | - Fails: when any of tried parsers fails. 476 | - Consumes: when any of tried parsers consumes some input. 477 | " 478 | [p q & ps] 479 | (group* (cons p (cons q ps)))) 480 | 481 | (defn alt 482 | "This parser tries to apply the parsers in order, until one of them succeeds. 483 | Returns the value of the succeeding parser. 484 | 485 | - Fails: 486 | - when any of tried parsers fails consuming some input. 487 | - when all tried parsers fail without consuming any input. 488 | - Consumes: 489 | - when any of tried parsers consumes some input. 490 | 491 | The parser first applies `p`. If it succeeds, the value of `p` is returned. If 492 | `p` fails _without consuming any input_, parser `q` is tried and so on. 493 | 494 | The parser is called _predictive_ since `q` is only tried when parser `p` 495 | didn't consume any input (i.e. the look ahead is 1). This non-backtracking 496 | behaviour allows for both an efficient implementation of the parser 497 | combinators and the generation of good error messages. 498 | " 499 | ([p q] 500 | (fn [state context] 501 | (letfn [(e-err-p [e] 502 | (letfn [(e-ok-q [s x] (reply/e-ok context s x)) 503 | (e-err-q [ee] (reply/e-err context (error/merge-errors e ee)))] 504 | (parser/go q state (reply/assign context {reply/e-ok e-ok-q 505 | reply/e-err e-err-q}))))] 506 | (parser/go p state (reply/assign context {reply/e-err e-err-p}))))) 507 | ([p q qq] 508 | (-> p (alt q) (alt qq))) 509 | ([p q qq & more] 510 | (reduce alt (list* p q qq more)))) 511 | 512 | (defn option 513 | "This parser tries to apply parser `p`. If `p` fails without consuming input, 514 | it returns the value `x` (or `nil`), otherwise the value returned by `p`. 515 | 516 | - Fails: when `p` fails and consumes come input. 517 | - Consumes: when `p` consumes some input. 518 | " 519 | ([p] (option p nil)) 520 | ([p x] 521 | (alt p (result x)))) 522 | 523 | (defn between 524 | "Parses `open`, followed by `p` and `close`. Returns the value returned by `p`. 525 | 526 | - Fails: when any of parses fail. 527 | - Consumes: in all cases except when `open` fails without consuming any input. 528 | 529 | Example: 530 | 531 | (defn braces [p] 532 | (-> p (p/between (char/is \"{\") 533 | (char/is \"}\")))) 534 | " 535 | ([p around] (between p around around)) 536 | ([p open close] 537 | (for [_ open, x p, _ close] 538 | (result x)))) 539 | 540 | (defn times 541 | "Parses `n` occurrences of `p`. If `n` is smaller or equal to zero, the parser 542 | equals to `(p/result nil)`. Returns a sequence of `n` values returned by `p`." 543 | [n p] 544 | (group* (repeat n p))) 545 | 546 | (defn *many-till 547 | "This parser applies parser `p` _zero_ or more times until parser `end` 548 | succeeds. Returns a sequence of values returned by `p`. 549 | 550 | - Fails: 551 | - when `p` fails. 552 | - when `end` does not succeed before end of input. 553 | - Consumes: 554 | - when `p` or `end` consumes some input. 555 | 556 | Example: 557 | 558 | (def simple-comment 559 | (p/after (p/word \"\"))))) 561 | 562 | Note the overlapping parsers [[any-token]] and `(p/word \"-->\")`, and 563 | therefore the use of the [[maybe]] combinator. 564 | " 565 | [p end] 566 | (letfn [(scan [] (alt (after end (result nil)) 567 | (for [x p, xs (scan)] 568 | (result (cons x xs)))))] 569 | (scan))) 570 | 571 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 572 | 573 | (defn +sep-by 574 | "Parses _one_ or more occurrences of `p`, separated by `sep`. Returns a 575 | sequence of values returned by `p`." 576 | [p sep] 577 | (for [x p, xs (*many (after sep p))] 578 | (result (cons x xs)))) 579 | 580 | (defn *sep-by 581 | "Parses _zero_ or more occurrences of `p`, separated by `sep`. Returns a 582 | sequence of values returned by `p`. 583 | 584 | (defn comma-sep [p] 585 | (p/*sep-by p (p/after (char/is \",\") 586 | (p/*skip char/white?)))) 587 | " 588 | [p sep] 589 | (option (+sep-by p sep))) 590 | 591 | (defn +sep-end-by 592 | "Parses _one_ or more occurrences of `p`, separated and ended by `sep`. 593 | Returns a sequence of values returned by `p`." 594 | [p sep] 595 | (+many (for [x p, _ sep] 596 | (result x)))) 597 | 598 | (defn *sep-end-by 599 | "Parses _zero_ or more occurrences of `p`, separated and ended by `sep`. 600 | Returns a sequence of values returned by `p`." 601 | [p sep] 602 | (option (+sep-end-by p sep))) 603 | 604 | (defn +sep-opt-by 605 | "Parses _one_ or more occurrences of `p`, separated and optionally ended by 606 | `sep`. Returns a sequence of values returned by `p`." 607 | [p sep] 608 | (for [x p] 609 | (alt (for [_ sep, xs (option (+sep-opt-by p sep))] 610 | (result (cons x xs))) 611 | (result [x])))) 612 | 613 | (defn *sep-opt-by 614 | "Parses _zero_ or more occurrences of `p`, separated and optionally ended by 615 | `sep`. Returns a sequence of values returned by `p`." 616 | [p sep] 617 | (option (+sep-opt-by p sep))) 618 | 619 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 620 | 621 | ;; ## Parser state combinators ## 622 | 623 | (defn get-state 624 | "This parser returns the parser state field `:input`, `:pos` or `:user`. 625 | Without `field` it returns the parser state record itself." 626 | {:arglists '([] [:input] [:pos] [:user])} 627 | ([] 628 | (fn [state context] 629 | (reply/e-ok context state state))) 630 | ([field] 631 | (fn [state context] 632 | (reply/e-ok context state (field state))))) 633 | 634 | (defn update-state 635 | "This parser applies function `f` to the parser state field `:input`, `:pos` 636 | or `:user` and returns modified value. Without `field` it applies `f` to the 637 | parser state record itself. Suppose that we want to count identifiers in a 638 | source, we could use the user state as: 639 | 640 | (p/for [x identifier 641 | _ (p/update-state :user inc)] 642 | (p/result x))" 643 | {:arglists '([f] [:input, f] [:pos, f] [:user, f])} 644 | ([f] 645 | (fn [state context] 646 | (let [s (f state)] 647 | (reply/e-ok context s s)))) 648 | ([field f] 649 | (fn [state context] 650 | (let [v (cond-> (f (field state)) 651 | (= :input field) (state/conform-input))] 652 | (reply/e-ok context (assoc state field v) v))))) 653 | 654 | (defn set-state 655 | "This parser sets the parser state field `:input`, `:pos` or `:user` to `x`. 656 | Without `field` it sets the parser state record itself to `state`." 657 | {:arglists '([state] [:input, new-input] [:pos, new-pos] [:user, new-user-state])} 658 | ([state] 659 | (update-state (constantly state))) 660 | ([field x] 661 | (update-state field (constantly x)))) 662 | 663 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 664 | 665 | (defn trace 666 | "This parser prints the parser state (position, remaining input and user 667 | state) at the time it is invoked. When `p` is provided it then continues to 668 | apply parser `p`, and if `p` fails will indicate that the label has been 669 | backtracked. It is intended to be used for debugging parsers by inspecting 670 | their intermediate states. 671 | 672 | - Fails: when `p` fails. 673 | - Consumes: when `p` consumes some input. 674 | 675 | Examples: 676 | 677 | (p/parse (p/after (char/is \"aeiou\") 678 | (p/trace \"test-label\")) 679 | \"atest\") 680 | 681 | > test-label: at line 1, column 2 682 | > - input: (\\t \\e \\s \\t) 683 | > - user: nil 684 | 685 | (p/parse (p/after (char/is \"aeiou\") 686 | (p/trace \"test-label\" (char/is \"nope\"))) 687 | \"atest\") 688 | 689 | > test-label: at line 1, column 2 690 | > - input: (\\t \\e \\s \\t) 691 | > - user: nil 692 | > test-label: backtracked 693 | 694 | > error at line 1, column 2: 695 | > unexpected \"t\" 696 | > expecting character of \"nope\" 697 | " 698 | ([label] 699 | (fn [state context] 700 | (println (str label ": at " (state/pos state) 701 | "\n - input: " (pr-str (take 20 (state/input state))) 702 | "\n - user: " (pr-str (state/user state)))) 703 | (reply/e-ok context state nil))) 704 | ([label p] 705 | (after (trace label) 706 | (alt p, (do-parser (println (str label ": backtracked")) 707 | (fail)))))) 708 | 709 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 710 | 711 | (defn parse* 712 | "Executes parser `p` given `input` sequence of tokens, returns reply record. 713 | See [[parse]] for available `opts`." 714 | ([p input] 715 | (parser/run p (state/init-state input (pos/init-pos nil input) nil))) 716 | ([p input opts] 717 | (parser/run p (state/init-state input (pos/init-pos opts input) (:user-state opts))))) 718 | 719 | (defn parse 720 | "Executes parser `p` given `input` sequence of tokens, returns result value or 721 | throws exception on parsing error. 722 | 723 | Options: 724 | 725 | - `:pos` − The instance of InputPos or keyword for `pos/init-pos` to init 726 | parser pos. By default, pos is initialized to TextPos for string 727 | input or first token of char type, or IndexPos otherwise. 728 | 729 | - TextPos options: 730 | - `:tab` − tab size, default: 8. 731 | - `:line` − line number, default: 1. 732 | - `:col` − column number, default: 1. 733 | 734 | - `:user-state` − Initial value of user state. 735 | " 736 | {:arglists '([p input] 737 | [p input {:keys [pos user-state] :as options}] 738 | [p input {:keys [tab line col user-state] :as options}])} 739 | ([p input] 740 | (-> (parse* p input) (reply/value))) 741 | ([p input opts] 742 | (-> (parse* p input opts) (reply/value)))) 743 | 744 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 745 | -------------------------------------------------------------------------------- /test/strojure/parsesso/parser_test.cljc: -------------------------------------------------------------------------------- 1 | (ns strojure.parsesso.parser-test 2 | (:require [clojure.string :as string] 3 | [clojure.test :as test :refer [deftest testing]] 4 | [strojure.parsesso.parser :as p])) 5 | 6 | #_(test/run-tests) 7 | 8 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 9 | 10 | (defn- p 11 | "Parses test input using given parser. Returns custom map with test result." 12 | [parser input] 13 | (let [result (p/parse* parser input)] 14 | (if-let [error (:error result)] 15 | (-> (select-keys result [:consumed]) 16 | (assoc :error (-> (str error) (string/split-lines)))) 17 | (select-keys result [:consumed :value])))) 18 | 19 | (defn- tok 20 | [& cs] 21 | (p/token (set cs))) 22 | 23 | (defn- fail-consumed 24 | "Returns parser which fails when `p` is successfully consumed." 25 | [parser] 26 | (p/alt (p/for [x parser] (p/fail (str "Test failure after parsing " x))) 27 | parser)) 28 | 29 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 30 | 31 | (deftest result-t 32 | (test/are [expr result] (= result expr) 33 | 34 | (p (p/result :A) 35 | []) 36 | {:consumed false, :value :A} 37 | 38 | (p (p/result :A) 39 | [:B]) 40 | {:consumed false, :value :A} 41 | 42 | (p (fail-consumed (p/result :A)) 43 | []) 44 | {:consumed false, :value :A} 45 | 46 | )) 47 | 48 | (deftest fail-t 49 | (test/are [expr result] (= result expr) 50 | 51 | (p (p/fail "Test failure") 52 | []) 53 | {:consumed false, :error ["error at index 0:" 54 | "Test failure"]} 55 | 56 | (p (p/fail "Test failure") 57 | [:A]) 58 | {:consumed false, :error ["error at index 0:" 59 | "Test failure"]} 60 | 61 | (p (p/fail nil) 62 | []) 63 | {:consumed false, :error ["error at index 0:"]} 64 | 65 | (p (p/fail) 66 | []) 67 | {:consumed false, :error ["error at index 0:"]} 68 | 69 | )) 70 | 71 | (deftest fail-unexpected-t 72 | (test/are [expr result] (= result expr) 73 | 74 | (p (p/fail-unexpected "Boom") 75 | []) 76 | {:consumed false, :error ["error at index 0:" 77 | "unexpected Boom"]} 78 | 79 | (p (-> (p/fail-unexpected "Boom") 80 | (p/expecting "description")) 81 | []) 82 | {:consumed false, :error ["error at index 0:" 83 | "unexpected Boom" 84 | "expecting description"]} 85 | 86 | (p (p/fail-unexpected nil) 87 | []) 88 | {:consumed false, :error ["error at index 0:" 89 | "unexpected nil"]} 90 | 91 | )) 92 | 93 | (deftest expecting-t 94 | (test/are [expr result] (= result expr) 95 | 96 | (p (-> (p/fail "Test failure") 97 | (p/expecting "Expect")) 98 | []) 99 | {:consumed false, :error ["error at index 0:" 100 | "expecting Expect" 101 | "Test failure"]} 102 | 103 | (p (-> (p/fail "Test failure") 104 | (p/expecting (delay "Expect"))) 105 | []) 106 | {:consumed false, :error ["error at index 0:" 107 | "expecting Expect" 108 | "Test failure"]} 109 | 110 | (p (-> (p/fail "Test failure") 111 | (p/expecting nil)) 112 | []) 113 | {:consumed false, :error ["error at index 0:" 114 | "Test failure"]} 115 | 116 | (p (-> (p/fail "Test failure") 117 | (p/expecting "Inner") 118 | (p/expecting "Outer")) 119 | []) 120 | {:consumed false, :error ["error at index 0:" 121 | "expecting Outer" 122 | "Test failure"]} 123 | 124 | (p (-> (p/fail "Test failure") 125 | (p/expecting "Inner") 126 | (p/expecting nil)) 127 | []) 128 | {:consumed false, :error ["error at index 0:" 129 | "expecting Inner" 130 | "Test failure"]} 131 | 132 | )) 133 | 134 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 135 | 136 | (deftest bind-t 137 | (test/are [expr result] (= result expr) 138 | 139 | (p (p/bind (tok :A) p/result) 140 | [:A]) 141 | {:consumed true, :value :A} 142 | 143 | (p (p/bind (tok :A) (fn [_] (p/fail "Oops"))) 144 | [:A]) 145 | {:consumed true, :error ["error at index 1:" 146 | "Oops"]} 147 | 148 | (p (p/bind (tok :A) p/result) 149 | [:B]) 150 | {:consumed false, :error ["error at index 0:" 151 | "unexpected :B"]} 152 | 153 | (p (p/bind (tok :A) (fn [_] (p/fail "Oops"))) 154 | [:B]) 155 | {:consumed false, :error ["error at index 0:" 156 | "unexpected :B"]} 157 | 158 | (p (p/bind (tok :A) (fn [_] (tok :B))) 159 | [:A :B]) 160 | {:consumed true, :value :B} 161 | 162 | (p (p/bind (tok :A) (fn [_] (tok :B))) 163 | [:B :A]) 164 | {:consumed false, :error ["error at index 0:" 165 | "unexpected :B"]} 166 | 167 | (p (p/bind (tok :A) (fn [_] (tok :B))) 168 | [:A :A]) 169 | {:consumed true, :error ["error at index 1:" 170 | "unexpected :A"]} 171 | 172 | )) 173 | 174 | (deftest after-t 175 | (test/are [expr result] (= result expr) 176 | 177 | (p (p/after (tok :A) (tok :B)) 178 | [:A :B]) 179 | {:consumed true, :value :B} 180 | 181 | (p (p/after (tok :A) (tok :B)) 182 | [:A :A]) 183 | {:consumed true, :error ["error at index 1:" 184 | "unexpected :A"]} 185 | 186 | (p (p/after (tok :A) (tok :B)) 187 | [:A]) 188 | {:consumed true, :error ["error at index 1:" 189 | "unexpected end of input"]} 190 | 191 | (p (p/after (fail-consumed (tok :A)) (tok :B)) 192 | [:A :B]) 193 | {:consumed true, :error ["error at index 1:" 194 | "Test failure after parsing :A"]} 195 | 196 | (p (p/after (tok :A) (fail-consumed (tok :B))) 197 | [:A :B]) 198 | {:consumed true, :error ["error at index 2:" 199 | "Test failure after parsing :B"]} 200 | 201 | (p (p/after (tok :A) (tok :B) (tok :C)) 202 | [:A :B :C]) 203 | {:consumed true, :value :C} 204 | 205 | )) 206 | 207 | (deftest value-t 208 | (test/are [expr result] (= result expr) 209 | 210 | (p (p/value (tok :A) name) 211 | [:A]) 212 | {:consumed true, :value "A"} 213 | 214 | (p (p/value (p/token number?) inc inc) 215 | [1]) 216 | {:consumed true, :value 3} 217 | 218 | (p (p/value (p/token number?) inc inc inc str) 219 | [1]) 220 | {:consumed true, :value "4"} 221 | 222 | (p (p/value (tok :A) name) 223 | [:B]) 224 | {:consumed false, :error ["error at index 0:" 225 | "unexpected :B"]} 226 | 227 | (p (p/value (tok :A) name) 228 | []) 229 | {:consumed false, :error ["error at index 0:" 230 | "unexpected end of input"]} 231 | 232 | (p (p/value (fail-consumed (tok :A)) name) 233 | [:A]) 234 | {:consumed true, :error ["error at index 1:" 235 | "Test failure after parsing :A"]} 236 | 237 | (p (p/value (fail-consumed (tok :A)) name) 238 | [:B]) 239 | {:consumed false, :error ["error at index 0:" 240 | "unexpected :B"]} 241 | 242 | (p (p/value (fail-consumed (tok :A)) name) 243 | []) 244 | {:consumed false, :error ["error at index 0:" 245 | "unexpected end of input"]} 246 | 247 | )) 248 | 249 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 250 | 251 | (deftest maybe-t 252 | (test/are [expr result] (= result expr) 253 | 254 | (p (p/maybe (tok :A)) 255 | [:A]) 256 | {:consumed true, :value :A} 257 | 258 | (p (p/maybe (tok :A)) 259 | [:B]) 260 | {:consumed false, :error ["error at index 0:" 261 | "unexpected :B"]} 262 | 263 | (p (p/maybe (tok :A)) 264 | []) 265 | {:consumed false, :error ["error at index 0:" 266 | "unexpected end of input"]} 267 | 268 | (p (p/maybe (fail-consumed (tok :A))) 269 | [:A]) 270 | {:consumed false, :error ["error at index 1:" 271 | "Test failure after parsing :A"]} 272 | 273 | (p (p/maybe (fail-consumed (tok :A))) 274 | [:B]) 275 | {:consumed false, :error ["error at index 0:" 276 | "unexpected :B"]} 277 | 278 | (p (p/maybe (fail-consumed (tok :A))) 279 | []) 280 | {:consumed false, :error ["error at index 0:" 281 | "unexpected end of input"]} 282 | 283 | )) 284 | 285 | (deftest look-ahead-t 286 | (test/are [expr result] (= result expr) 287 | 288 | (p (p/look-ahead (tok :A)) 289 | [:A]) 290 | {:consumed false, :value :A} 291 | 292 | (p (p/look-ahead (tok :A)) 293 | [:B]) 294 | {:consumed false, :error ["error at index 0:" 295 | "unexpected :B"]} 296 | 297 | (p (p/look-ahead (tok :A)) 298 | []) 299 | {:consumed false, :error ["error at index 0:" 300 | "unexpected end of input"]} 301 | 302 | (p (p/look-ahead (fail-consumed (tok :A))) 303 | [:A]) 304 | {:consumed true, :error ["error at index 1:" 305 | "Test failure after parsing :A"]} 306 | 307 | (p (p/look-ahead (fail-consumed (tok :A))) 308 | [:B]) 309 | {:consumed false, :error ["error at index 0:" 310 | "unexpected :B"]} 311 | 312 | (p (p/look-ahead (fail-consumed (tok :A))) 313 | []) 314 | {:consumed false, :error ["error at index 0:" 315 | "unexpected end of input"]} 316 | 317 | )) 318 | 319 | (deftest not-followed-by-t 320 | (testing "not-followed-by [p q]" 321 | (test/are [expr result] (= result expr) 322 | 323 | (p (p/not-followed-by (p/result :X) 324 | (tok :A)) 325 | [:B]) 326 | {:consumed false, :value :X} 327 | 328 | (p (p/not-followed-by (tok :X) 329 | (tok :A)) 330 | [:X :B]) 331 | {:consumed true, :value :X} 332 | 333 | (p (p/not-followed-by (p/result :X) 334 | (p/after (tok :A) (tok :B))) 335 | [:A :A]) 336 | {:consumed false, :value :X} 337 | 338 | (p (p/not-followed-by (tok :X) 339 | (p/after (tok :A) (tok :B))) 340 | [:X :A :A]) 341 | {:consumed true, :value :X} 342 | 343 | (p (p/not-followed-by (p/result :X) 344 | (tok :A)) 345 | []) 346 | {:consumed false, :value :X} 347 | 348 | (p (p/not-followed-by (tok :X) 349 | (tok :A)) 350 | [:X]) 351 | {:consumed true, :value :X} 352 | 353 | (p (p/not-followed-by (p/result :X) 354 | p/any-token) 355 | []) 356 | {:consumed false, :value :X} 357 | 358 | (p (p/not-followed-by (tok :X) 359 | p/any-token) 360 | [:X]) 361 | {:consumed true, :value :X} 362 | 363 | (p (p/not-followed-by (p/result :X) 364 | (tok :A)) 365 | [:A]) 366 | {:consumed false, :error ["error at index 0:" 367 | "unexpected :A"]} 368 | 369 | (p (p/not-followed-by (tok :X) 370 | (tok :A)) 371 | [:X :A]) 372 | {:consumed true, :error ["error at index 1:" 373 | "unexpected :A"]} 374 | 375 | (p (p/not-followed-by (p/result :X) 376 | (p/after (tok :A) (tok :B))) 377 | [:A :B]) 378 | {:consumed false, :error ["error at index 0:" 379 | "unexpected :A"]} 380 | 381 | (p (p/not-followed-by (tok :X) 382 | (p/after (tok :A) (tok :B))) 383 | [:X :A :B]) 384 | {:consumed true, :error ["error at index 1:" 385 | "unexpected :A"]} 386 | 387 | (p (p/not-followed-by (p/result :X) 388 | p/any-token) 389 | [:A]) 390 | {:consumed false, :error ["error at index 0:" 391 | "unexpected :A"]} 392 | 393 | (p (p/not-followed-by (tok :X) 394 | p/any-token) 395 | [:X :A]) 396 | {:consumed true, :error ["error at index 1:" 397 | "unexpected :A"]} 398 | 399 | (p (p/not-followed-by (p/result :X) 400 | (p/eof)) 401 | []) 402 | {:consumed false, :error ["error at index 0:" 403 | "unexpected end of input"]} 404 | 405 | (p (p/not-followed-by (tok :X) 406 | (p/eof)) 407 | [:X]) 408 | {:consumed true, :error ["error at index 1:" 409 | "unexpected end of input"]} 410 | 411 | )) 412 | 413 | (testing "not-followed-by [q]" 414 | (test/are [expr result] (= result expr) 415 | 416 | (p (p/not-followed-by (tok :A)) 417 | [:B]) 418 | {:consumed false, :value nil} 419 | 420 | (p (p/not-followed-by (p/after (tok :A) (tok :B))) 421 | [:A :A]) 422 | {:consumed false, :value nil} 423 | 424 | (p (p/not-followed-by (tok :A)) 425 | []) 426 | {:consumed false, :value nil} 427 | 428 | (p (p/not-followed-by p/any-token) 429 | []) 430 | {:consumed false, :value nil} 431 | 432 | (p (p/not-followed-by (tok :A)) 433 | [:A]) 434 | {:consumed false, :error ["error at index 0:" 435 | "unexpected :A"]} 436 | 437 | (p (p/not-followed-by (p/after (tok :A) (tok :B))) 438 | [:A :B]) 439 | {:consumed false, :error ["error at index 0:" 440 | "unexpected :A"]} 441 | 442 | (p (p/not-followed-by p/any-token) 443 | [:A]) 444 | {:consumed false, :error ["error at index 0:" 445 | "unexpected :A"]} 446 | 447 | (p (p/not-followed-by p/eof) 448 | []) 449 | {:consumed false, :error ["error at index 0:" 450 | "unexpected end of input"]} 451 | 452 | ))) 453 | 454 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 455 | 456 | (deftest *many-t 457 | (test/are [expr result] (= result expr) 458 | 459 | (p (p/*many (tok :A :B :C)) 460 | [:A :B :C :D :E :F]) 461 | {:consumed true, :value [:A :B :C]} 462 | 463 | (p (p/*many (fail-consumed (tok :A :B :C))) 464 | [:A :B :C :D :E :F]) 465 | {:consumed true, :error ["error at index 1:" 466 | "Test failure after parsing :A"]} 467 | 468 | (p (p/*many (tok :D :E :F)) 469 | [:A :B :C :D :E :F]) 470 | {:consumed false, :value nil} 471 | 472 | (p (p/*many (tok :A :B :C)) 473 | []) 474 | {:consumed false, :value nil} 475 | 476 | (p (p/*many (tok :A)) 477 | (repeat 10000 :A)) 478 | {:consumed true, :value (repeat 10000 :A)} 479 | 480 | )) 481 | 482 | (deftest +many-t 483 | (test/are [expr result] (= result expr) 484 | 485 | (p (p/+many (tok :A :B :C)) 486 | [:A :B :C :D :E :F]) 487 | {:consumed true, :value [:A :B :C]} 488 | 489 | (p (p/+many (tok :D :E :F)) 490 | [:A :B :C :D :E :F]) 491 | {:consumed false, :error ["error at index 0:" 492 | "unexpected :A"]} 493 | 494 | (p (p/+many (tok :A :B :C)) 495 | []) 496 | {:consumed false, :error ["error at index 0:" 497 | "unexpected end of input"]} 498 | 499 | (p (p/+many (tok :A)) 500 | (repeat 10000 :A)) 501 | {:consumed true, :value (repeat 10000 :A)} 502 | 503 | )) 504 | 505 | (deftest *skip-t 506 | (test/are [expr result] (= result expr) 507 | 508 | (p (p/*skip (tok :A)) 509 | [:A :A :A :B :B :B]) 510 | {:consumed true, :value nil} 511 | 512 | (p (p/*skip (fail-consumed (tok :A))) 513 | [:A :A :A :B :B :B]) 514 | {:consumed true, :error ["error at index 1:" 515 | "Test failure after parsing :A"]} 516 | 517 | (p (p/*skip (tok :A)) 518 | [:B :B :B]) 519 | {:consumed false, :value nil} 520 | 521 | (p (p/*skip (tok :A)) 522 | []) 523 | {:consumed false, :value nil} 524 | 525 | ) 526 | ) 527 | 528 | (deftest +skip-t 529 | (test/are [expr result] (= result expr) 530 | 531 | (p (p/+skip (tok :A)) 532 | [:A :A :A :B :B :B]) 533 | {:consumed true, :value nil} 534 | 535 | (p (p/+skip (tok :A)) 536 | [:B :B :B]) 537 | {:consumed false, :error ["error at index 0:" 538 | "unexpected :B"]} 539 | 540 | (p (p/+skip (tok :A)) 541 | []) 542 | {:consumed false, :error ["error at index 0:" 543 | "unexpected end of input"]} 544 | 545 | )) 546 | 547 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 548 | 549 | (deftest token-t 550 | (test/are [expr result] (= result expr) 551 | 552 | (p (p/token #{:A}) 553 | [:A]) 554 | {:consumed true, :value :A} 555 | 556 | (p (p/token #{:A}) 557 | [:B]) 558 | {:consumed false, :error ["error at index 0:" 559 | "unexpected :B"]} 560 | 561 | (p (p/token #{:A}) 562 | []) 563 | {:consumed false, :error ["error at index 0:" 564 | "unexpected end of input"]} 565 | 566 | (p (fail-consumed (p/token #{:A})) 567 | [:A]) 568 | {:consumed true, :error ["error at index 1:" 569 | "Test failure after parsing :A"]} 570 | 571 | (p (fail-consumed (p/token #{:A})) 572 | [:B]) 573 | {:consumed false, :error ["error at index 0:" 574 | "unexpected :B"]} 575 | 576 | (p (fail-consumed (p/token #{:A})) 577 | []) 578 | {:consumed false, :error ["error at index 0:" 579 | "unexpected end of input"]} 580 | 581 | )) 582 | 583 | (deftest token-not-t 584 | (test/are [expr result] (= result expr) 585 | 586 | (p (p/token-not #{:A}) 587 | [:B]) 588 | {:consumed true, :value :B} 589 | 590 | (p (p/token-not #{:A}) 591 | [:A]) 592 | {:consumed false, :error ["error at index 0:" 593 | "unexpected :A"]} 594 | 595 | (p (p/token-not #{:A}) 596 | []) 597 | {:consumed false, :error ["error at index 0:" 598 | "unexpected end of input"]} 599 | 600 | (p (fail-consumed (p/token-not #{:A})) 601 | [:B]) 602 | {:consumed true, :error ["error at index 1:" 603 | "Test failure after parsing :B"]} 604 | 605 | (p (fail-consumed (p/token-not #{:A})) 606 | [:A]) 607 | {:consumed false, :error ["error at index 0:" 608 | "unexpected :A"]} 609 | 610 | (p (fail-consumed (p/token-not #{:A})) 611 | []) 612 | {:consumed false, :error ["error at index 0:" 613 | "unexpected end of input"]} 614 | 615 | )) 616 | 617 | (deftest word-t 618 | (testing "default matching" 619 | (test/are [expr result] (= result expr) 620 | 621 | (p (p/word [:A :B :C]) 622 | [:A :B :C]) 623 | {:consumed true, :value [:A :B :C]} 624 | 625 | (p (p/word [:A :B :C]) 626 | [:A :B]) 627 | {:consumed true, :error ["error at index 0:" 628 | "unexpected end of input" 629 | "expecting [:A :B :C]"]} 630 | 631 | (p (p/word [:A :B :C]) 632 | []) 633 | {:consumed false, :error ["error at index 0:" 634 | "unexpected end of input" 635 | "expecting [:A :B :C]"]} 636 | 637 | (p (p/word [:A :B :C]) 638 | [:A :B :X]) 639 | {:consumed true, :error ["error at index 0:" 640 | "unexpected :X" 641 | "expecting [:A :B :C]"]} 642 | 643 | (p (p/word [:A :B :C]) 644 | [:X :Y :Z]) 645 | {:consumed false, :error ["error at index 0:" 646 | "unexpected :X" 647 | "expecting [:A :B :C]"]} 648 | 649 | (p (p/word [:ns/A :ns/B :ns/C] 650 | (fn [w t] (= (name w) (name t)))) 651 | [:A :B :C]) 652 | {:consumed true, :value [:ns/A :ns/B :ns/C]} 653 | 654 | )) 655 | 656 | (testing "case insensitive matching" 657 | (test/are [expr result] (= result expr) 658 | 659 | (p (p/word "abc" :ic) 660 | "abc") 661 | {:consumed true, :value "abc"} 662 | 663 | (p (p/word "abc" :ic) 664 | "ABC") 665 | {:consumed true, :value "abc"} 666 | 667 | (p (p/word "ABC" :ic) 668 | "abc") 669 | {:consumed true, :value "ABC"} 670 | 671 | (p (p/word "abc" :ic) 672 | "abd") 673 | {:consumed true, :error ["error at line 1, column 1:" 674 | "unexpected \"d\"" 675 | "expecting \"abc\""]} 676 | 677 | (p (p/word "abc" :ic) 678 | "ab") 679 | {:consumed true, :error ["error at line 1, column 1:" 680 | "unexpected end of input" 681 | "expecting \"abc\""]} 682 | 683 | ))) 684 | 685 | (deftest any-token-t 686 | (test/are [expr result] (= result expr) 687 | 688 | (p p/any-token 689 | [:A]) 690 | {:consumed true, :value :A} 691 | 692 | (p p/any-token 693 | []) 694 | {:consumed false, :error ["error at index 0:" 695 | "unexpected end of input"]} 696 | 697 | )) 698 | 699 | (deftest eof-t 700 | (test/are [expr result] (= result expr) 701 | 702 | (p p/eof 703 | []) 704 | {:consumed false, :value nil} 705 | 706 | (p (p/eof :ok) 707 | []) 708 | {:consumed false, :value :ok} 709 | 710 | (p p/eof 711 | [:A]) 712 | {:consumed false, :error ["error at index 0:" 713 | "unexpected :A" 714 | "expecting end of input"]} 715 | 716 | )) 717 | 718 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 719 | 720 | (deftest group*-t 721 | (test/are [expr result] (= result expr) 722 | 723 | (p (p/group* [(tok :A) (tok :B) (tok :C)]) 724 | [:A :B :C]) 725 | '{:consumed true, :value (:A :B :C)} 726 | 727 | (p (p/group* [(tok :A) (tok :B) (tok :C)]) 728 | [:B :C]) 729 | {:consumed false, :error ["error at index 0:" 730 | "unexpected :B"]} 731 | 732 | (p (p/group* [(fail-consumed (tok :A)) (tok :B) (tok :C)]) 733 | [:A :B :C]) 734 | {:consumed true, :error ["error at index 1:" 735 | "Test failure after parsing :A"]} 736 | 737 | (p (p/group* []) 738 | [:A :B :C]) 739 | {:consumed false, :value nil} 740 | 741 | (p (p/group* nil) 742 | [:A :B :C]) 743 | {:consumed false, :value nil} 744 | 745 | )) 746 | 747 | (deftest group-t 748 | (test/are [expr result] (= result expr) 749 | 750 | (p (p/group (tok :A) (tok :B) (tok :C)) 751 | [:A :B :C]) 752 | '{:consumed true, :value (:A :B :C)} 753 | 754 | (p (p/group (tok :A) (tok :B) (tok :C)) 755 | [:B :C]) 756 | {:consumed false, :error ["error at index 0:" 757 | "unexpected :B"]} 758 | 759 | (p (p/group (fail-consumed (tok :A)) (tok :B) (tok :C)) 760 | [:A :B :C]) 761 | {:consumed true, :error ["error at index 1:" 762 | "Test failure after parsing :A"]} 763 | 764 | )) 765 | 766 | (deftest alt-t 767 | (test/are [expr result] (= result expr) 768 | 769 | (p (p/alt (tok :A) 770 | (tok :B)) 771 | [:A]) 772 | {:consumed true, :value :A} 773 | 774 | (p (p/alt (tok :A) 775 | (tok :B)) 776 | [:B]) 777 | {:consumed true, :value :B} 778 | 779 | (p (p/alt (tok :A) 780 | (tok :B)) 781 | [:C]) 782 | {:consumed false, :error ["error at index 0:" 783 | "unexpected :C"]} 784 | 785 | (p (p/alt (tok :A) 786 | (tok :B)) 787 | []) 788 | {:consumed false, :error ["error at index 0:" 789 | "unexpected end of input"]} 790 | 791 | (p (p/alt (fail-consumed (tok :A)) 792 | (tok :B)) 793 | [:A]) 794 | {:consumed true, :error ["error at index 1:" 795 | "Test failure after parsing :A"]} 796 | 797 | (p (p/alt (fail-consumed (tok :A)) 798 | (tok :B)) 799 | [:B]) 800 | {:consumed true, :value :B} 801 | 802 | (p (p/alt (fail-consumed (tok :A)) 803 | (tok :B)) 804 | [:C]) 805 | {:consumed false, :error ["error at index 0:" 806 | "unexpected :C"]} 807 | 808 | (p (p/alt (fail-consumed (tok :A)) 809 | (tok :B)) 810 | []) 811 | {:consumed false, :error ["error at index 0:" 812 | "unexpected end of input"]} 813 | 814 | (p (p/alt (tok :A) 815 | (fail-consumed (tok :B))) 816 | [:A]) 817 | {:consumed true, :value :A} 818 | 819 | (p (p/alt (tok :A) 820 | (fail-consumed (tok :B))) 821 | [:B]) 822 | {:consumed true, :error ["error at index 1:" 823 | "Test failure after parsing :B"]} 824 | 825 | (p (p/alt (tok :A) 826 | (fail-consumed (tok :B))) 827 | [:C]) 828 | {:consumed false, :error ["error at index 0:" 829 | "unexpected :C"]} 830 | 831 | (p (p/alt (tok :A) 832 | (fail-consumed (tok :B))) 833 | []) 834 | {:consumed false, :error ["error at index 0:" 835 | "unexpected end of input"]} 836 | 837 | (p (p/alt (p/expecting (tok :A) :A) 838 | (p/expecting (tok :B) :B)) 839 | [:C]) 840 | {:consumed false, :error ["error at index 0:" 841 | "unexpected :C" 842 | "expecting :A or :B"]} 843 | 844 | )) 845 | 846 | (deftest option-t 847 | (testing "The `option` without default." 848 | (test/are [expr result] (= result expr) 849 | 850 | (p (p/option (tok :A)) 851 | [:A]) 852 | {:consumed true, :value :A} 853 | 854 | (p (p/option (tok :A)) 855 | [:B]) 856 | {:consumed false, :value nil} 857 | 858 | (p (p/option (tok :A)) 859 | []) 860 | {:consumed false, :value nil} 861 | 862 | (p (p/option (fail-consumed (tok :A))) 863 | [:A]) 864 | {:consumed true, :error ["error at index 1:" 865 | "Test failure after parsing :A"]} 866 | 867 | (p (p/option (fail-consumed (tok :A))) 868 | [:B]) 869 | {:consumed false, :value nil} 870 | 871 | (p (p/option (fail-consumed (tok :A))) 872 | []) 873 | {:consumed false, :value nil} 874 | 875 | )) 876 | 877 | (testing "The `option` with default value." 878 | (test/are [expr result] (= result expr) 879 | 880 | (p (p/option (tok :A) :X) 881 | [:A]) 882 | {:consumed true, :value :A} 883 | 884 | (p (p/option (tok :A) :X) 885 | [:B]) 886 | {:consumed false, :value :X} 887 | 888 | (p (p/option (tok :A) :X) 889 | []) 890 | {:consumed false, :value :X} 891 | 892 | (p (p/option (fail-consumed (tok :A)) :X) 893 | [:A]) 894 | {:consumed true, :error ["error at index 1:" 895 | "Test failure after parsing :A"]} 896 | 897 | (p (p/option (fail-consumed (tok :A)) :X) 898 | [:B]) 899 | {:consumed false, :value :X} 900 | 901 | (p (p/option (fail-consumed (tok :A)) :X) 902 | []) 903 | {:consumed false, :value :X} 904 | 905 | ))) 906 | 907 | (deftest between-t 908 | (test/are [expr result] (= result expr) 909 | 910 | (p (p/between (tok :A) (tok :L) (tok :R)) 911 | [:L :A :R]) 912 | {:consumed true, :value :A} 913 | 914 | (p (p/between (tok :A) (tok :L) (tok :R)) 915 | [:R :A :L]) 916 | {:consumed false, :error ["error at index 0:" 917 | "unexpected :R"]} 918 | 919 | (p (p/between (tok :A) (tok :L) (tok :R)) 920 | [:L :A]) 921 | {:consumed true, :error ["error at index 2:" 922 | "unexpected end of input"]} 923 | 924 | (p (p/between (tok :A) (tok :L) (tok :R)) 925 | [:A :R]) 926 | {:consumed false, :error ["error at index 0:" 927 | "unexpected :A"]} 928 | 929 | (p (p/between (tok :A) (tok :L) (tok :R)) 930 | [:A]) 931 | {:consumed false, :error ["error at index 0:" 932 | "unexpected :A"]} 933 | 934 | (p (p/between (tok :A) (tok :L) (tok :R)) 935 | []) 936 | {:consumed false, :error ["error at index 0:" 937 | "unexpected end of input"]} 938 | 939 | (p (p/between (tok :A) (tok :I)) 940 | [:I :A :I]) 941 | {:consumed true, :value :A} 942 | 943 | (p (p/between (tok :A) (tok :I)) 944 | [:I :A]) 945 | {:consumed true, :error ["error at index 2:" 946 | "unexpected end of input"]} 947 | 948 | (p (p/between (tok :A) (tok :I)) 949 | [:A :I]) 950 | {:consumed false, :error ["error at index 0:" 951 | "unexpected :A"]} 952 | 953 | (p (p/between (tok :A) (tok :I)) 954 | [:A]) 955 | {:consumed false, :error ["error at index 0:" 956 | "unexpected :A"]} 957 | 958 | (p (p/between (tok :A) (tok :I)) 959 | []) 960 | {:consumed false, :error ["error at index 0:" 961 | "unexpected end of input"]} 962 | 963 | )) 964 | 965 | (deftest times-t 966 | (test/are [expr result] (= result expr) 967 | 968 | (p (p/times 3 (tok :A1 :A2 :A3)) 969 | [:A1 :A2 :A3]) 970 | {:consumed true, :value '(:A1 :A2 :A3)} 971 | 972 | (p (p/times 3 (tok :A1 :A2 :A3)) 973 | [:A1 :A2 :A3 :A4]) 974 | {:consumed true, :value '(:A1 :A2 :A3)} 975 | 976 | (p (p/times 3 (tok :A1 :A2 :A3)) 977 | [:A1 :A2 :A3 :B]) 978 | {:consumed true, :value '(:A1 :A2 :A3)} 979 | 980 | (p (p/times 3 (tok :A1 :A2 :A3)) 981 | [:A1 :A2]) 982 | {:consumed true, :error ["error at index 2:" 983 | "unexpected end of input"]} 984 | 985 | (p (p/times 3 (tok :A1 :A2 :A3)) 986 | [:A1 :A2 :B]) 987 | {:consumed true, :error ["error at index 2:" 988 | "unexpected :B"]} 989 | 990 | (p (p/times 3 (tok :A1 :A2 :A3)) 991 | [:B :A1 :A2 :A3]) 992 | {:consumed false, :error ["error at index 0:" 993 | "unexpected :B"]} 994 | 995 | (p (p/times 3 (tok :A1 :A2 :A3)) 996 | [:B :A1]) 997 | {:consumed false, :error ["error at index 0:" 998 | "unexpected :B"]} 999 | 1000 | (p (p/times 3 (tok :A1 :A2 :A3)) 1001 | []) 1002 | {:consumed false, :error ["error at index 0:" 1003 | "unexpected end of input"]} 1004 | 1005 | (p (p/times 0 (tok :A1 :A2 :A3)) 1006 | [:A1 :A2 :A3]) 1007 | {:consumed false, :value nil} 1008 | 1009 | (p (p/times -3 (tok :A1 :A2 :A3)) 1010 | [:A1 :A2 :A3]) 1011 | {:consumed false, :value nil} 1012 | 1013 | )) 1014 | 1015 | (deftest *many-till-t 1016 | (test/are [expr result] (= result expr) 1017 | 1018 | (p (p/*many-till (tok :A1 :A2 :A3) 1019 | (tok :END)) 1020 | [:A1 :A2 :A3 :END]) 1021 | {:consumed true, :value '(:A1 :A2 :A3)} 1022 | 1023 | (p (p/*many-till (tok :A1 :A2 :A3) 1024 | (tok :END)) 1025 | [:A1 :A2 :A3 :B :END]) 1026 | {:consumed true, :error ["error at index 3:" 1027 | "unexpected :B"]} 1028 | 1029 | (p (p/*many-till (tok :A1 :A2 :A3) 1030 | (tok :END)) 1031 | [:B :END]) 1032 | {:consumed false, :error ["error at index 0:" 1033 | "unexpected :B"]} 1034 | 1035 | (p (p/*many-till (tok :A1 :A2 :A3) 1036 | (tok :END)) 1037 | [:A1 :A2 :A3]) 1038 | {:consumed true, :error ["error at index 3:" 1039 | "unexpected end of input"]} 1040 | 1041 | (p (p/*many-till (fail-consumed (tok :A1 :A2 :A3)) 1042 | (tok :END)) 1043 | [:A1 :A2 :A3 :END]) 1044 | {:consumed true, :error ["error at index 1:" 1045 | "Test failure after parsing :A1"]} 1046 | 1047 | (p (p/*many-till (tok :A1 :A2 :A3) 1048 | (tok :END)) 1049 | [:END]) 1050 | {:consumed true, :value nil} 1051 | 1052 | (p (p/*many-till (p/alt (tok :A1 :A2 :A3) 1053 | (p/*many-till (tok :B1 :B2 :B3) 1054 | (tok :END))) 1055 | (tok :END)) 1056 | [:A1 :A2 :A3 :B1 :B2 :B3 :END :A1 :A2 :A3 :END]) 1057 | {:consumed true, :value '(:A1 :A2 :A3 (:B1 :B2 :B3) :A1 :A2 :A3)} 1058 | 1059 | (p (p/*many-till (tok :A1 :A2 :A3) 1060 | (tok :END)) 1061 | (concat (take 10000 (cycle [:A1 :A2 :A3])) [:END])) 1062 | {:consumed true, :value (take 10000 (cycle [:A1 :A2 :A3]))} 1063 | 1064 | )) 1065 | 1066 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 1067 | 1068 | (deftest +sep-by-t 1069 | (test/are [expr result] (= result expr) 1070 | 1071 | (p (p/+sep-by (tok :A) (tok :S)) 1072 | [:A :S :A :S :A]) 1073 | {:consumed true, :value '(:A :A :A)} 1074 | 1075 | (p (p/+sep-by (tok :A) (tok :S)) 1076 | [:A :S :A :S :A :S]) 1077 | {:consumed true, :error ["error at index 6:" 1078 | "unexpected end of input"]} 1079 | 1080 | (p (p/+sep-by (tok :A) (tok :S)) 1081 | [:A :S :A :S :A :B]) 1082 | {:consumed true, :value '(:A :A :A)} 1083 | 1084 | (p (p/+sep-by (tok :A) (tok :S)) 1085 | []) 1086 | {:consumed false, :error ["error at index 0:" 1087 | "unexpected end of input"]} 1088 | 1089 | (p (p/+sep-by (tok :A) (tok :S)) 1090 | [:B]) 1091 | {:consumed false, :error ["error at index 0:" 1092 | "unexpected :B"]} 1093 | 1094 | (p (p/+sep-by (tok :A) (tok :S)) 1095 | [:S]) 1096 | {:consumed false, :error ["error at index 0:" 1097 | "unexpected :S"]} 1098 | 1099 | )) 1100 | 1101 | (deftest *sep-by-t 1102 | (test/are [expr result] (= result expr) 1103 | 1104 | (p (p/*sep-by (tok :A) (tok :S)) 1105 | [:A :S :A :S :A]) 1106 | {:consumed true, :value '(:A :A :A)} 1107 | 1108 | (p (p/*sep-by (tok :A) (tok :S)) 1109 | [:A :S :A :S :A :S]) 1110 | {:consumed true, :error ["error at index 6:" 1111 | "unexpected end of input"]} 1112 | 1113 | (p (p/*sep-by (tok :A) (tok :S)) 1114 | [:A :S :A :S :A :B]) 1115 | {:consumed true, :value '(:A :A :A)} 1116 | 1117 | (p (p/*sep-by (tok :A) (tok :S)) 1118 | []) 1119 | {:consumed false, :value nil} 1120 | 1121 | (p (p/*sep-by (tok :A) (tok :S)) 1122 | [:B]) 1123 | {:consumed false, :value nil} 1124 | 1125 | (p (p/*sep-by (tok :A) (tok :S)) 1126 | [:S]) 1127 | {:consumed false, :value nil} 1128 | 1129 | )) 1130 | 1131 | (deftest +sep-end-by-t 1132 | (test/are [expr result] (= result expr) 1133 | 1134 | (p (p/+sep-end-by (tok :A) (tok :S)) 1135 | [:A :S :A :S :A :S]) 1136 | {:consumed true, :value '(:A :A :A)} 1137 | 1138 | (p (p/+sep-end-by (tok :A) (tok :S)) 1139 | [:A :S :A :S :A :S :A]) 1140 | {:consumed true, :error ["error at index 7:" 1141 | "unexpected end of input"]} 1142 | 1143 | (p (p/+sep-end-by (tok :A) (tok :S)) 1144 | [:A :S :A :S :A :S :B]) 1145 | {:consumed true, :value '(:A :A :A)} 1146 | 1147 | (p (p/+sep-end-by (tok :A) (tok :S)) 1148 | [:A :S :A :S :A]) 1149 | {:consumed true, :error ["error at index 5:" 1150 | "unexpected end of input"]} 1151 | 1152 | (p (p/+sep-end-by (tok :A) (tok :S)) 1153 | [:A :S :A :S :A :A]) 1154 | {:consumed true, :error ["error at index 5:" 1155 | "unexpected :A"]} 1156 | 1157 | (p (p/+sep-end-by (tok :A) (tok :S)) 1158 | [:A :S :A :S :A :B]) 1159 | {:consumed true, :error ["error at index 5:" 1160 | "unexpected :B"]} 1161 | 1162 | (p (p/+sep-end-by (tok :A) (tok :S)) 1163 | []) 1164 | {:consumed false, :error ["error at index 0:" 1165 | "unexpected end of input"]} 1166 | 1167 | (p (p/+sep-end-by (tok :A) (tok :S)) 1168 | [:B]) 1169 | {:consumed false, :error ["error at index 0:" 1170 | "unexpected :B"]} 1171 | 1172 | (p (p/+sep-end-by (tok :A) (tok :S)) 1173 | [:S]) 1174 | {:consumed false, :error ["error at index 0:" 1175 | "unexpected :S"]} 1176 | 1177 | ) 1178 | ) 1179 | 1180 | (deftest *sep-end-by-t 1181 | (test/are [expr result] (= result expr) 1182 | 1183 | (p (p/*sep-end-by (tok :A) (tok :S)) 1184 | [:A :S :A :S :A :S]) 1185 | {:consumed true, :value '(:A :A :A)} 1186 | 1187 | (p (p/*sep-end-by (tok :A) (tok :S)) 1188 | [:A :S :A :S :A :S :A]) 1189 | {:consumed true, :error ["error at index 7:" 1190 | "unexpected end of input"]} 1191 | 1192 | (p (p/*sep-end-by (tok :A) (tok :S)) 1193 | [:A :S :A :S :A :S :B]) 1194 | {:consumed true, :value '(:A :A :A)} 1195 | 1196 | (p (p/*sep-end-by (tok :A) (tok :S)) 1197 | [:A :S :A :S :A]) 1198 | {:consumed true, :error ["error at index 5:" 1199 | "unexpected end of input"]} 1200 | 1201 | (p (p/*sep-end-by (tok :A) (tok :S)) 1202 | [:A :S :A :S :A :A]) 1203 | {:consumed true, :error ["error at index 5:" 1204 | "unexpected :A"]} 1205 | 1206 | (p (p/*sep-end-by (tok :A) (tok :S)) 1207 | [:A :S :A :S :A :B]) 1208 | {:consumed true, :error ["error at index 5:" 1209 | "unexpected :B"]} 1210 | 1211 | (p (p/*sep-end-by (tok :A) (tok :S)) 1212 | []) 1213 | {:consumed false, :value nil} 1214 | 1215 | (p (p/*sep-end-by (tok :A) (tok :S)) 1216 | [:B]) 1217 | {:consumed false, :value nil} 1218 | 1219 | (p (p/*sep-end-by (tok :A) (tok :S)) 1220 | [:S]) 1221 | {:consumed false, :value nil} 1222 | 1223 | )) 1224 | 1225 | (deftest +sep-opt-by-t 1226 | (test/are [expr result] (= result expr) 1227 | 1228 | (p (p/+sep-opt-by (tok :A) (tok :S)) 1229 | [:A :S :A :S :A :S]) 1230 | {:consumed true, :value '(:A :A :A)} 1231 | 1232 | (p (p/+sep-opt-by (tok :A) (tok :S)) 1233 | [:A :S :A :S :A :S :A]) 1234 | {:consumed true, :value '(:A :A :A :A)} 1235 | 1236 | (p (p/+sep-opt-by (tok :A) (tok :S)) 1237 | [:A :S :A :S :A :S :B]) 1238 | {:consumed true, :value '(:A :A :A)} 1239 | 1240 | (p (p/+sep-opt-by (tok :A) (tok :S)) 1241 | [:A :S :A :S :A]) 1242 | {:consumed true, :value '(:A :A :A)} 1243 | 1244 | (p (p/+sep-opt-by (tok :A) (tok :S)) 1245 | [:A :S :A :S :A :A]) 1246 | {:consumed true, :value '(:A :A :A)} 1247 | 1248 | (p (p/+sep-opt-by (tok :A) (tok :S)) 1249 | [:A :S :A :S :A :B]) 1250 | {:consumed true, :value '(:A :A :A)} 1251 | 1252 | (p (p/+sep-opt-by (tok :A) (tok :S)) 1253 | []) 1254 | {:consumed false, :error ["error at index 0:" 1255 | "unexpected end of input"]} 1256 | 1257 | (p (p/+sep-opt-by (tok :A) (tok :S)) 1258 | [:B]) 1259 | {:consumed false, :error ["error at index 0:" 1260 | "unexpected :B"]} 1261 | 1262 | (p (p/+sep-opt-by (tok :A) (tok :S)) 1263 | [:S]) 1264 | {:consumed false, :error ["error at index 0:" 1265 | "unexpected :S"]} 1266 | 1267 | )) 1268 | 1269 | (deftest *sep-opt-by-t 1270 | (test/are [expr result] (= result expr) 1271 | 1272 | (p (p/*sep-opt-by (tok :A) (tok :S)) 1273 | [:A :S :A :S :A :S]) 1274 | {:consumed true, :value '(:A :A :A)} 1275 | 1276 | (p (p/*sep-opt-by (tok :A) (tok :S)) 1277 | [:A :S :A :S :A :S :A]) 1278 | {:consumed true, :value '(:A :A :A :A)} 1279 | 1280 | (p (p/*sep-opt-by (tok :A) (tok :S)) 1281 | [:A :S :A :S :A :S :B]) 1282 | {:consumed true, :value '(:A :A :A)} 1283 | 1284 | (p (p/*sep-opt-by (tok :A) (tok :S)) 1285 | [:A :S :A :S :A]) 1286 | {:consumed true, :value '(:A :A :A)} 1287 | 1288 | (p (p/*sep-opt-by (tok :A) (tok :S)) 1289 | [:A :S :A :S :A :A]) 1290 | {:consumed true, :value '(:A :A :A)} 1291 | 1292 | (p (p/*sep-opt-by (tok :A) (tok :S)) 1293 | [:A :S :A :S :A :B]) 1294 | {:consumed true, :value '(:A :A :A)} 1295 | 1296 | (p (p/*sep-opt-by (tok :A) (tok :S)) 1297 | []) 1298 | {:consumed false, :value nil} 1299 | 1300 | (p (p/*sep-opt-by (tok :A) (tok :S)) 1301 | [:B]) 1302 | {:consumed false, :value nil} 1303 | 1304 | (p (p/*sep-opt-by (tok :A) (tok :S)) 1305 | [:S]) 1306 | {:consumed false, :value nil} 1307 | 1308 | )) 1309 | 1310 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 1311 | 1312 | (deftest get-state-t 1313 | (test/are [expr result] (= result expr) 1314 | 1315 | (-> (p/parse* (p/get-state) [:A]) 1316 | ((juxt (comp :input :value) (comp :input :state)))) 1317 | ['(:A) '(:A)] 1318 | 1319 | (-> (p/parse* (p/get-state :input) [:A]) 1320 | :value) 1321 | '(:A) 1322 | 1323 | (-> (p/parse* (p/after (p/set-state :user ::state) (p/get-state :user)) [:A]) 1324 | :value) 1325 | ::state 1326 | 1327 | )) 1328 | 1329 | (deftest set-state-t 1330 | (test/are [expr result] (= result expr) 1331 | 1332 | (-> (p/parse* (p/set-state ::state) [:A]) 1333 | :state) 1334 | ::state 1335 | 1336 | (-> (p/parse* (p/set-state :input [:B]) [:A]) 1337 | :state :input) 1338 | '(:B) 1339 | 1340 | (-> (p/parse* (p/set-state :input nil) [:A]) 1341 | :state :input) 1342 | '() 1343 | 1344 | (-> (p/parse* (p/set-state :user ::state) [:A]) 1345 | :state :user) 1346 | ::state 1347 | 1348 | )) 1349 | 1350 | (deftest update-state-t 1351 | (test/are [expr result] (= result expr) 1352 | 1353 | (-> (p/parse* (p/update-state (constantly ::state)) [:A]) 1354 | :state) 1355 | ::state 1356 | 1357 | (-> (p/parse* (p/update-state :input (constantly [:B])) [:A]) 1358 | :state :input) 1359 | '(:B) 1360 | 1361 | (-> (p/parse* (p/update-state :input (constantly nil)) [:A]) 1362 | :state :input) 1363 | '() 1364 | 1365 | (-> (p/parse* (p/update-state :user (constantly ::state)) [:A]) 1366 | :state :user) 1367 | ::state 1368 | 1369 | )) 1370 | 1371 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 1372 | 1373 | (deftest trace-t 1374 | (testing "trace state" 1375 | (test/are [expr result] (= result expr) 1376 | 1377 | (-> (p (p/for [_ (p/trace "a") 1378 | a (tok :A) 1379 | _ (p/trace "b") 1380 | b (tok :B)] 1381 | (p/result [a b])) 1382 | [:A :B :C]) 1383 | (with-out-str) 1384 | (string/split-lines)) 1385 | ["a: at index 0" 1386 | " - input: (:A :B :C)" 1387 | " - user: nil" 1388 | "b: at index 1" 1389 | " - input: (:B :C)" 1390 | " - user: nil"] 1391 | 1392 | (-> (p (p/for [_ (p/trace "a") 1393 | a (tok :A) 1394 | _ (p/trace "b") 1395 | b (tok :B)] 1396 | (p/result [a b])) 1397 | [:A :B]) 1398 | (with-out-str) 1399 | (string/split-lines)) 1400 | ["a: at index 0" 1401 | " - input: (:A :B)" 1402 | " - user: nil" 1403 | "b: at index 1" 1404 | " - input: (:B)" 1405 | " - user: nil"] 1406 | 1407 | (-> (p (p/for [a (tok :A) 1408 | _ (p/trace "a") 1409 | b (tok :B) 1410 | _ (p/trace "b")] 1411 | (p/result [a b])) 1412 | [:A :B]) 1413 | (with-out-str) 1414 | (string/split-lines)) 1415 | ["a: at index 1" 1416 | " - input: (:B)" 1417 | " - user: nil" 1418 | "b: at index 2" 1419 | " - input: ()" 1420 | " - user: nil"] 1421 | 1422 | )) 1423 | 1424 | (testing "trace parser" 1425 | (test/are [expr result] (= result expr) 1426 | 1427 | (-> (p (p/for [a (p/trace "a" (tok :A)) 1428 | b (p/trace "b" (tok :B))] 1429 | (p/result [a b])) 1430 | [:A :B :C]) 1431 | (with-out-str) 1432 | (string/split-lines)) 1433 | ["a: at index 0" 1434 | " - input: (:A :B :C)" 1435 | " - user: nil" 1436 | "b: at index 1" 1437 | " - input: (:B :C)" 1438 | " - user: nil"] 1439 | 1440 | (-> (p (p/for [a (p/trace "a" (tok :A)) 1441 | b (p/trace "b" (tok :B))] 1442 | (p/result [a b])) 1443 | [:A :B]) 1444 | (with-out-str) 1445 | (string/split-lines)) 1446 | ["a: at index 0" 1447 | " - input: (:A :B)" 1448 | " - user: nil" 1449 | "b: at index 1" 1450 | " - input: (:B)" 1451 | " - user: nil"] 1452 | 1453 | (-> (p (p/for [a (p/trace "a" (tok :A)) 1454 | b (p/trace "b" (tok :B))] 1455 | (p/result [a b])) 1456 | [:B :C]) 1457 | (with-out-str) 1458 | (string/split-lines)) 1459 | ["a: at index 0" 1460 | " - input: (:B :C)" 1461 | " - user: nil" 1462 | "a: backtracked"] 1463 | 1464 | (-> (p (p/for [a (p/trace "a" (tok :A)) 1465 | b (p/trace "b" (tok :B))] 1466 | (p/result [a b])) 1467 | [:A :C]) 1468 | (with-out-str) 1469 | (string/split-lines)) 1470 | ["a: at index 0" 1471 | " - input: (:A :C)" 1472 | " - user: nil" 1473 | "b: at index 1" 1474 | " - input: (:C)" 1475 | " - user: nil" 1476 | "b: backtracked"] 1477 | 1478 | ))) 1479 | 1480 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 1481 | 1482 | (deftest parse-t 1483 | (test/are [expr result] (= result expr) 1484 | 1485 | (p/parse (p/result :ok) []) 1486 | :ok 1487 | 1488 | (try (p/parse (p/fail "Error") []) 1489 | (catch #?@(:clj [Exception e] :default [:default e]) 1490 | (ex-message e))) 1491 | "error at index 0:\nError" 1492 | 1493 | )) 1494 | 1495 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,, 1496 | --------------------------------------------------------------------------------