├── deps.edn
├── resources
└── clj-kondo.exports
│ └── com.github.strojure
│ └── parsesso
│ └── config.edn
├── .idea
├── codeStyles
│ ├── codeStyleConfig.xml
│ └── Project.xml
├── cursive-test-integration.xml
└── ClojureProjectResolveSettings.xml
├── .gitignore
├── bb.edn
├── .clj-kondo
└── config.edn
├── test
├── dev
│ └── node_repl.cljc
└── strojure
│ └── parsesso
│ ├── expr_test.cljc
│ ├── char_test.cljc
│ └── parser_test.cljc
├── .github
└── workflows
│ └── tests.yml
├── project.clj
├── UNLICENSE
├── CHANGELOG.md
├── src
└── strojure
│ └── parsesso
│ ├── impl
│ ├── parser.cljc
│ ├── state.cljc
│ ├── pos.cljc
│ ├── char.cljc
│ ├── reply.cljc
│ └── error.cljc
│ ├── unicode.clj
│ ├── expr.cljc
│ ├── char.cljc
│ └── parser.cljc
├── doc
├── demo
│ └── honeysql_select.clj
└── benchmarks
│ └── compare.clj
└── README.md
/deps.edn:
--------------------------------------------------------------------------------
1 | {:paths ["src"]}
2 |
--------------------------------------------------------------------------------
/resources/clj-kondo.exports/com.github.strojure/parsesso/config.edn:
--------------------------------------------------------------------------------
1 | {:lint-as {strojure.parsesso.parser/for clojure.core/let}}
2 |
--------------------------------------------------------------------------------
/.idea/codeStyles/codeStyleConfig.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
--------------------------------------------------------------------------------
/.idea/cursive-test-integration.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | pom.xml
2 | pom.xml.asc
3 | *.jar
4 | *.class
5 | *.iml
6 | /lib/
7 | /classes/
8 | /target/
9 | /checkouts/
10 | /test/user
11 | /cljs-test-runner-out
12 | .idea/
13 | .lein-deps-sum
14 | .lein-repl-history
15 | .lein-plugins/
16 | .lein-failures
17 | .nrepl-port
18 | .cache/
19 | .calva/
20 | .cljs_node_repl/
21 | .cpcache/
--------------------------------------------------------------------------------
/bb.edn:
--------------------------------------------------------------------------------
1 | {:deps {com.github.strojure/parsesso {:local/root "."}}
2 | :tasks
3 | {test:bb {:extra-paths ["test"]
4 | :extra-deps {com.cognitect/test-runner {:git/url "https://github.com/cognitect-labs/test-runner"
5 | :sha "a522ab2851a2aa5bf9c22a942b45287a3a019310"}}
6 | :task cognitect.test-runner/-main}}}
7 |
--------------------------------------------------------------------------------
/.idea/ClojureProjectResolveSettings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 | PROJECT
7 |
8 |
--------------------------------------------------------------------------------
/.clj-kondo/config.edn:
--------------------------------------------------------------------------------
1 | {:linters {:missing-docstring {:level :warning}
2 | :redundant-fn-wrapper {:level :warning}
3 | :shadowed-var {:level :warning
4 | :exclude []
5 | :suggest {}}
6 | :unknown-require-option {:level :off}
7 | :unsorted-required-namespaces {:level :warning}}
8 | :lint-as {cljs.core/defrecord clojure.core/defrecord}
9 | :config-paths ["../resources/clj-kondo.exports/com.github.strojure/parsesso"]
10 | :config-in-comment {:linters {:redundant-expression {:level :off}
11 | :unresolved-namespace {:level :off}
12 | :unresolved-symbol {:level :off}
13 | :duplicate-require {:level :off}}}}
14 |
--------------------------------------------------------------------------------
/test/dev/node_repl.cljc:
--------------------------------------------------------------------------------
1 | (ns dev.node-repl
2 | "ClojureScript Node REPL."
3 | (:require [cljs.repl :as repl]
4 | [cljs.repl.node :as node]))
5 |
6 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
7 |
8 | (defn- start
9 | []
10 | (-> (node/repl-env)
11 | (repl/repl :quit-prompt (fn []
12 | (repl/repl-title)
13 | (repl/repl-quit-prompt)))))
14 |
15 | (def -main
16 | "Main entry point."
17 | start)
18 |
19 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
20 |
21 | (comment
22 | (start)
23 | :cljs/quit
24 | )
25 |
26 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
27 |
--------------------------------------------------------------------------------
/.idea/codeStyles/Project.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
18 |
19 |
20 |
21 |
22 |
23 |
--------------------------------------------------------------------------------
/.github/workflows/tests.yml:
--------------------------------------------------------------------------------
1 | name: tests
2 |
3 | on:
4 | workflow_dispatch: { }
5 | push:
6 | branches: [ default ]
7 | paths: [ "src/**", "test/**", "project.clj", "*.edn" ]
8 | pull_request:
9 | paths: [ "src/**", "test/**", "project.clj", "*.edn" ]
10 |
11 | jobs:
12 | tests:
13 | runs-on: ubuntu-latest
14 | steps:
15 | - name: Checkout
16 | uses: actions/checkout@v3
17 |
18 | - name: Prepare java
19 | uses: actions/setup-java@v3
20 | with:
21 | distribution: 'zulu'
22 | java-version: '11'
23 |
24 | - name: Install clojure tools
25 | uses: DeLaGuardo/setup-clojure@10.1
26 | with:
27 | bb: latest
28 | lein: latest
29 |
30 | - name: Cache clojure dependencies
31 | uses: actions/cache@v3
32 | with:
33 | path: |
34 | ~/.m2/repository
35 | ~/.gitlibs
36 | ~/.deps.clj
37 | key: cljdeps-${{ hashFiles('project.clj', 'bb.edn') }}
38 | restore-keys: cljdeps-
39 |
40 | - run: lein deps
41 |
42 | - run: lein test
43 |
44 | - run: lein cljs-test
45 |
46 | - run: bb test:bb
47 |
--------------------------------------------------------------------------------
/project.clj:
--------------------------------------------------------------------------------
1 | (defproject com.github.strojure/parsesso "1.2.3-SNAPSHOT"
2 | :description "Parser combinators library for Clojure(Script)."
3 | :url "https://github.com/strojure/parsesso"
4 | :license {:name "The Unlicense" :url "https://unlicense.org"}
5 |
6 | :dependencies []
7 |
8 | :profiles {:provided {:dependencies [[org.clojure/clojure "1.11.1"]
9 | [org.clojure/clojurescript "1.11.60"]]}
10 | :dev,,,,, {:dependencies [;; clojurescript tests
11 | [com.google.guava/guava "31.1-jre"]
12 | [olical/cljs-test-runner "3.8.0"]
13 | ;; inspiration libs
14 | [org.blancas/kern "1.1.0"]
15 | [rm-hull/jasentaa "0.2.5"]
16 | [the/parsatron "0.0.8"]]
17 | :source-paths ["doc"]}}
18 |
19 | :aliases {"cljs-test" ["run" "-m" "cljs-test-runner.main"]}
20 |
21 | :clean-targets ["target" "cljs-test-runner-out"]
22 |
23 | :deploy-repositories [["clojars" {:url "https://clojars.org/repo" :sign-releases false}]])
24 |
--------------------------------------------------------------------------------
/UNLICENSE:
--------------------------------------------------------------------------------
1 | This is free and unencumbered software released into the public domain.
2 |
3 | Anyone is free to copy, modify, publish, use, compile, sell, or
4 | distribute this software, either in source code form or as a compiled
5 | binary, for any purpose, commercial or non-commercial, and by any
6 | means.
7 |
8 | In jurisdictions that recognize copyright laws, the author or authors
9 | of this software dedicate any and all copyright interest in the
10 | software to the public domain. We make this dedication for the benefit
11 | of the public at large and to the detriment of our heirs and
12 | successors. We intend this dedication to be an overt act of
13 | relinquishment in perpetuity of all present and future rights to this
14 | software under copyright law.
15 |
16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19 | IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR
20 | OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
21 | ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22 | OTHER DEALINGS IN THE SOFTWARE.
23 |
24 | For more information, please refer to
25 |
--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
1 | # Changelog
2 |
3 | All notable changes to this project will be documented in this file.
4 |
5 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/).
6 |
7 | ## `1.2.3-SNAPSHOT`
8 |
9 | Release date `UNRELEASED`
10 |
11 | ## `1.2.2+295`
12 |
13 | Release date `2023-06-06`
14 |
15 | - (fix): allow destructuring in `p/for` [#8]
16 |
17 | [#8]: https://github.com/strojure/parsesso/issues/8
18 |
19 | ## `1.2.1+292`
20 |
21 | Release date `2023-05-28`
22 |
23 | - (docs): fix :arglists of `parser/parse` [#7]
24 |
25 | [#7]: https://github.com/strojure/parsesso/issues/7
26 |
27 | ## `1.2.0+287`
28 |
29 | Release date `2023-05-25`
30 |
31 | - (feat pos): allow to specify initial line/col for :text pos
32 | - (fix): cannot pass custom `InputPos` [#6]
33 |
34 | [#6]: https://github.com/strojure/parsesso/issues/6
35 |
36 | ## `1.1.2-283`
37 |
38 | Release date `2023-05-17`
39 |
40 | - (fix): `expecting` adds a message instead of replacing [#5]
41 |
42 | [#5]: https://github.com/strojure/parsesso/issues/5
43 |
44 | ## `1.1.1-274`
45 |
46 | Release date `2023-03-08`
47 |
48 | - (chore project) Implement `cljs-test` lein alias.
49 | - (fix cljs) `parser/update-state` for nil :input.
50 | - (chore) Change license to Unlicense.
51 |
52 | ## `1.1.0-258`
53 |
54 | Release date `2023-03-04`
55 |
56 | - feat: Make code compatible with `bb` and other platforms.
57 | - build: Add CI config to run lein test + bb test:bb.
58 |
59 | ## `1.0.253`
60 |
61 | Release date `2023-03-04`
62 |
--------------------------------------------------------------------------------
/src/strojure/parsesso/impl/parser.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.impl.parser
2 | {:no-doc true}
3 | (:require [strojure.parsesso.impl.reply :as r]))
4 |
5 | #?(:clj (set! *warn-on-reflection* true)
6 | :cljs (set! *warn-on-infer* true))
7 |
8 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9 |
10 | (deftype Continue [f])
11 |
12 | (defn go
13 | "Returns continuation for the parser `p`."
14 | [p state context]
15 | (Continue. (fn [] (p state context))))
16 |
17 | (defn run
18 | "Executes parser `p` in continuation loop."
19 | [p state]
20 | (loop [ret (go p state (r/new-context))]
21 | (if (instance? Continue ret)
22 | (recur ((.-f ^Continue ret)))
23 | ret)))
24 |
25 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
26 |
27 | (defn e-ok-throw-empty-input
28 | "Throws exception in `many` combinator."
29 | [_ _]
30 | (throw (ex-info (str "Combinator is applied to a parser that accepts an empty input.") {})))
31 |
32 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
33 |
34 | (def ^:private word-test-fn!
35 | (atom {}))
36 |
37 | (defn register-word-test-fn
38 | "Associates keyword `k` with test-fn of the [[word]] parser."
39 | [k, f]
40 | (assert (keyword k) "Requires keyword as word test-fn ID")
41 | (swap! word-test-fn! assoc k f))
42 |
43 | (defn word-test-fn
44 | "Returns registered test-fn for the keyword `k`."
45 | [k]
46 | (or (@word-test-fn! k)
47 | (throw (ex-info (str "The word test-fn is not registered:" k) {}))))
48 |
49 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
50 |
--------------------------------------------------------------------------------
/src/strojure/parsesso/impl/state.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.impl.state
2 | {:no-doc true}
3 | (:require [strojure.parsesso.impl.pos :as pos])
4 | #?(:clj (:import (clojure.lang ISeq))))
5 |
6 | #?(:clj (set! *warn-on-reflection* true)
7 | :cljs (set! *warn-on-infer* true))
8 |
9 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10 |
11 | (defrecord State [input pos user])
12 |
13 | (defn conform-input
14 | "Return non-nil input."
15 | [input]
16 | (or (seq input) ()))
17 |
18 | (defn init-state
19 | "Returns new instance of parser state."
20 | [input pos user]
21 | (State. (conform-input input) pos user))
22 |
23 | (defn next-state
24 | "Returns next (incremented) instance of parser state for parsed token `tok`."
25 | ([^State state, tok]
26 | (State. (#?(:bb rest :clj .more :cljs -rest :default rest) ^ISeq (.-input state))
27 | (pos/next-pos (.-pos state) tok)
28 | (.-user state)))
29 | ([^State state, tok, user-fn]
30 | (State. (#?(:bb rest :clj .more :cljs -rest :default rest) ^ISeq (.-input state))
31 | (pos/next-pos (.-pos state) tok)
32 | (user-fn (.-user state)))))
33 |
34 | (defn set-input-pos
35 | "Returns instance of parser state with new values of input and pos."
36 | [^State state, input, pos]
37 | (State. (conform-input input) pos (.-user state)))
38 |
39 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
40 |
41 | (defn input
42 | "Returns parsing state input."
43 | [state]
44 | (.-input ^State state))
45 |
46 | (defn pos
47 | "Returns parsing state position."
48 | [state]
49 | (.-pos ^State state))
50 |
51 | (defn user
52 | "Returns user state."
53 | [state]
54 | (.-user ^State state))
55 |
56 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
57 |
--------------------------------------------------------------------------------
/src/strojure/parsesso/unicode.clj:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.unicode
2 | "Unicode char parsers using `java.lang.Character`. Clojure only."
3 | (:require [strojure.parsesso.parser :as p]))
4 |
5 | (set! *warn-on-reflection* true)
6 |
7 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8 |
9 | (def lower?
10 | "Parser and predicate for the lower-case letter character according to
11 | `Character/isLowerCase`."
12 | (p/token #(Character/isLowerCase ^char %)
13 | "lower-case letter"))
14 |
15 | (def upper?
16 | "Parser and predicate for the upper-case letter character according to
17 | `Character/isUpperCase`."
18 | (p/token #(Character/isUpperCase ^char %)
19 | "upper-case letter"))
20 |
21 | (def title?
22 | "Parser and predicate for the title-case letter character according to
23 | `Character/isTitleCase`."
24 | (p/token #(Character/isTitleCase ^char %)
25 | "title-case letter"))
26 |
27 | (def digit?
28 | "Parser and predicate for the digit character according to
29 | `Character/isDigit`."
30 | (p/token #(Character/isDigit ^char %)
31 | "digit"))
32 |
33 | (def defined?
34 | "Parser and predicate for the character defined in Unicode, according to
35 | `Character/isDefined`."
36 | (p/token #(Character/isDefined ^char %)
37 | "unicode defined character"))
38 |
39 | (def letter?
40 | "Parser and predicate for the letter character according to
41 | `Character/isLetter`."
42 | (p/token #(Character/isLetter ^char %)
43 | "letter"))
44 |
45 | (def letter-or-digit?
46 | "Parser and predicate for the letter or digit character according to
47 | `Character/isLetterOrDigit`."
48 | (p/token #(Character/isLetterOrDigit ^char %)
49 | "letter or digit"))
50 |
51 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
52 |
53 | (def space?
54 | "Parser and predicate for the Unicode space character according to
55 | `Character/isSpaceChar`."
56 | (p/token #(Character/isSpaceChar ^char %)
57 | "space character"))
58 |
59 | (def white?
60 | "Parser and predicate for the white space character according to
61 | `Character/isWhitespace`."
62 | (p/token #(Character/isWhitespace ^char %)
63 | "whitespace character"))
64 |
65 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
66 |
--------------------------------------------------------------------------------
/src/strojure/parsesso/expr.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.expr
2 | "Parser combinators for expressions."
3 | (:require [strojure.parsesso.parser :as p]))
4 |
5 | #?(:clj (set! *warn-on-reflection* true)
6 | :cljs (set! *warn-on-infer* true))
7 |
8 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9 |
10 | (defn +chain-left
11 | "Parses _one_ or more occurrences of `p`, separated by `op`. Returns a value
12 | obtained by a _left_ associative application of all functions returned by `op`
13 | to the values returned by `p`. This parser can for example be used to
14 | eliminate left recursion which typically occurs in expression grammars.
15 |
16 | (def mulop (p/alt (p/after (char/is \\*) (p/result *))
17 | (p/after (char/is \\/) (p/result /))))
18 |
19 | (def addop (p/alt (p/after (char/is \\+) (p/result +))
20 | (p/after (char/is \\-) (p/result -))))
21 |
22 | (def expr (+chain-left term addop))
23 | (def term (+chain-left factor mulop))
24 | (def factor (p/alt (parens expr) integer))
25 | "
26 | [p op]
27 | (letfn [(more [x]
28 | (p/alt (p/for [f op, y p]
29 | (more (f x y)))
30 | (p/result x)))]
31 | (p/for [x p]
32 | (more x))))
33 |
34 | (defn *chain-left
35 | "Parses _zero_ or more occurrences of `p`, separated by `op`. Returns a value
36 | obtained by a _left_ associative application of all functions returned by `op`
37 | to the values returned by `p`. If there are zero occurrences of `p`, the value
38 | `x` is returned."
39 | [p op x]
40 | (p/option (+chain-left p op) x))
41 |
42 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
43 |
44 | (defn +chain-right
45 | "Parses _one_ or more occurrences of `p`, separated by `op`. Returns a value
46 | obtained by a _right_ associative application of all functions returned by
47 | `op` to the values returned by `p`."
48 | [p op]
49 | (letfn [(scan []
50 | (p/for [x p]
51 | (more x)))
52 | (more [x]
53 | (p/alt (p/for [f op, y (scan)]
54 | (p/result (f x y)))
55 | (p/result x)))]
56 | (scan)))
57 |
58 | (defn *chain-right
59 | "Parses _zero_ or more occurrences of `p`, separated by `op`. Returns a value
60 | obtained by a _right_ associative application of all functions returned by
61 | `op` to the values returned by `p`. If there are no occurrences of `p`, the
62 | value `x` is returned."
63 | [p op x]
64 | (p/option (+chain-right p op) x))
65 |
66 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
67 |
--------------------------------------------------------------------------------
/src/strojure/parsesso/impl/pos.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.impl.pos
2 | {:no-doc true})
3 |
4 | #?(:clj (set! *warn-on-reflection* true)
5 | :cljs (set! *warn-on-infer* true))
6 |
7 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8 |
9 | (defprotocol InputPos
10 | (next-pos [pos token]
11 | "Returns new source pos for the current token.")
12 | (compare-pos [pos1 pos2]
13 | "Comparator. Returns -1/0/1 like `compare`."))
14 |
15 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
16 |
17 | (defmulti init-pos
18 | "Returns initial InputPos for given options and input."
19 | (fn [opts _input] (:pos opts)))
20 |
21 | (defmethod init-pos :default
22 | [{:keys [pos]} _]
23 | (when (keyword? pos)
24 | (throw (ex-info (str "Cannot init input position for: " pos) {})))
25 | pos)
26 |
27 | (defmethod init-pos nil
28 | [opts input]
29 | ;; Tries to detect text input and use text pos.
30 | (let [f (get-method init-pos (if (or (string? input) (char? (first input)))
31 | :text :sequence))]
32 | (f opts input)))
33 |
34 | (defmethod init-pos :disabled
35 | [_ _]
36 | nil)
37 |
38 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
39 |
40 | (extend-protocol InputPos
41 | nil
42 | (next-pos [_ _])
43 | (compare-pos [_ _] 0)
44 | #?(:clj Number :cljs number)
45 | (next-pos [pos _] (inc pos))
46 | (compare-pos [a b] (compare a b)))
47 |
48 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
49 |
50 | (defrecord IndexPos [^long i]
51 | InputPos
52 | (next-pos [_ _] (IndexPos. (unchecked-inc i)))
53 | (compare-pos [_ pos] (compare i (:i pos)))
54 | Object
55 | (toString [_] (str "index " i)))
56 |
57 | (defmethod init-pos :sequence
58 | [_ _]
59 | (IndexPos. 0))
60 |
61 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
62 |
63 | (defn- compare*
64 | [x y]
65 | (let [c (compare x y)]
66 | (when-not (zero? c)
67 | c)))
68 |
69 | (defrecord TextPos [tab, ^long line, ^long col]
70 | InputPos
71 | (next-pos [pos c]
72 | (case c \tab
73 | (update pos :col #(-> % (+ tab) (- (mod (dec %) tab))))
74 | \newline
75 | (TextPos. tab (unchecked-inc line) 1)
76 | ;; default
77 | (TextPos. tab line (unchecked-inc col))))
78 | (compare-pos [_ pos]
79 | (or (compare* line (:line pos))
80 | (compare* col (:col pos))
81 | 0))
82 | Object
83 | (toString [_] (str "line " line ", column " col)))
84 |
85 | (defmethod init-pos :text
86 | [opts _]
87 | (TextPos. (or (:tab opts) 8) (or (:line opts) 1) (or (:col opts) 1)))
88 |
89 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
90 |
--------------------------------------------------------------------------------
/src/strojure/parsesso/impl/char.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.impl.char
2 | {:no-doc true}
3 | (:require [clojure.string :as string])
4 | #?(:cljs (:import [goog.string StringBuffer])))
5 |
6 | #?(:clj (set! *warn-on-reflection* true)
7 | :cljs (set! *warn-on-infer* true))
8 |
9 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10 |
11 | (defn equals-ignorecase
12 | "True if chars are equal, case insensitive. "
13 | [c1 c2]
14 | (or (= c1 c2)
15 | #?(:bb
16 | (= (string/lower-case c1)
17 | (string/lower-case c2))
18 | :clj
19 | (.equals ^Object (Character/toLowerCase ^char c1)
20 | (Character/toLowerCase ^char c2))
21 | :default
22 | (= (string/lower-case c1)
23 | (string/lower-case c2)))))
24 |
25 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
26 |
27 | (def ^:private string-pred-fn!
28 | (atom {}))
29 |
30 | (defn register-string-pred-fn
31 | "Associates keyword `k` with predicate function of the `is` and `is-not`
32 | parsers."
33 | [k, f]
34 | (assert (keyword k) "Requires keyword as `is` test-fn ID")
35 | (swap! string-pred-fn! assoc k f))
36 |
37 | (defn string-pred-fn
38 | "Returns predicate for the keyword `k` and string of characters `s`."
39 | [k s]
40 | (if-let [f (@string-pred-fn! k)]
41 | (f s)
42 | (throw (ex-info (str "The `is` predicate function is not registered:" k) {}))))
43 |
44 | (defn string-pred-default
45 | "Default predicate for `is` and `is-not` parsers."
46 | [s]
47 | #?(:bb
48 | (fn [c] (string/index-of s c))
49 | :clj
50 | (if (char? s)
51 | (fn [c] (.equals ^Character s c))
52 | (fn [c] (<= 0 (.indexOf ^String s ^int (.charValue ^Character c)))))
53 | :default
54 | (fn [c] (string/index-of s c))))
55 |
56 | (defn string-pred-ignorecase
57 | "Default predicate for `is` and `is-not` parsers."
58 | [s]
59 | (let [s (string/lower-case s)]
60 | (fn [c] #?(:bb
61 | (string/index-of s (string/lower-case c))
62 | :clj
63 | (<= 0 (.indexOf ^String s ^int (Character/toLowerCase ^char c)))
64 | :default
65 | (string/index-of s (string/lower-case c))))))
66 |
67 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
68 |
69 | (defn str*
70 | "Builds string from (possibly nested) collections of parsed characters and
71 | strings."
72 | ([x] (-> #?(:clj (StringBuilder.) :cljs (StringBuffer.))
73 | (str* x)
74 | (str)))
75 | ([sb x]
76 | (if (sequential? x)
77 | (reduce str* sb x)
78 | #?(:clj (.append ^StringBuilder sb (str x))
79 | :cljs (.append ^StringBuffer sb (str x))))))
80 |
81 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
82 |
--------------------------------------------------------------------------------
/src/strojure/parsesso/impl/reply.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.impl.reply
2 | {:no-doc true})
3 |
4 | #?(:clj (set! *warn-on-reflection* true)
5 | :cljs (set! *warn-on-infer* true))
6 |
7 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
8 |
9 | (deftype Context [cok, eok, cerr, eerr])
10 |
11 | (defn c-ok
12 | "Replies with result value as consumed (consumed-ok)."
13 | [^Context context, state, x]
14 | ((.-cok context) state x))
15 |
16 | (defn e-ok
17 | "Replies with result value as not consumed (empty-ok)."
18 | [^Context context, state, x]
19 | ((.-eok context) state x))
20 |
21 | (defn c-err
22 | "Fails with parser error as consumed (consumed-error)."
23 | [^Context context, error]
24 | ((.-cerr context) error))
25 |
26 | (defn e-err
27 | "Fails with parser error as not consumed (empty-error)."
28 | [^Context context, error]
29 | ((.-eerr context) error))
30 |
31 | (defn assign*
32 | "Returns new instance of context with replaced functions, nil arg keep
33 | functions untouched. To be used with macro."
34 | [^Context context, -c-ok, -e-ok, -c-err, -e-err]
35 | (Context. (or -c-ok (.-cok context))
36 | (or -e-ok (.-eok context))
37 | (or -c-err (.-cerr context))
38 | (or -e-err (.-eerr context))))
39 |
40 | (defmacro assign
41 | "Expands to code updating specified context functions at once."
42 | [context m]
43 | (assert (map? m))
44 | (let [m (update-keys m (comp eval eval))]
45 | (assert (every? #{c-ok e-ok c-err e-err} (keys m)))
46 | `(assign* ~context ~(m c-ok) ~(m e-ok) ~(m c-err) ~(m e-err))))
47 |
48 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
49 |
50 | (defrecord Result [value consumed state])
51 |
52 | (defrecord Failure [consumed error])
53 |
54 | (defn result?
55 | "True if `reply` is parsing result with value."
56 | {:inline (fn [reply] `(instance? Result ~reply))}
57 | [reply]
58 | (instance? Result reply))
59 |
60 | (defn error?
61 | "True if `reply` is parser error."
62 | {:inline (fn [reply] `(instance? Failure ~reply))}
63 | [reply]
64 | (instance? Failure reply))
65 |
66 | (defn value
67 | "Returns value for Result reply or throws exception otherwise."
68 | [reply]
69 | (cond
70 | (result? reply) (:value reply)
71 | (error? reply) (throw (ex-info (str (:error reply)) reply))
72 | :else (throw (ex-info "Invalid parser reply" {::reply reply}))))
73 |
74 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
75 |
76 | (defn new-context
77 | "Returns new instance of context with initialized reply functions."
78 | []
79 | (Context. (fn c-ok [s x] (Result. x true s))
80 | (fn e-ok [s x] (Result. x false s))
81 | (fn c-err [e] (Failure. true e))
82 | (fn e-err [e] (Failure. false e))))
83 |
84 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
85 |
--------------------------------------------------------------------------------
/doc/demo/honeysql_select.clj:
--------------------------------------------------------------------------------
1 | (ns demo.honeysql-select
2 | "Demo: Parse SQL SELECT query to HoneySQL data structures."
3 | {:clj-kondo/config '{:linters {:missing-docstring {:level :off}}}}
4 | (:require [strojure.parsesso.char :as char]
5 | [strojure.parsesso.parser :as p]))
6 |
7 | (set! *warn-on-reflection* true)
8 |
9 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10 |
11 | (comment
12 | "SELECT u.username, s.name FROM user AS u, status AS s WHERE (u.statusid = s.id) AND (u.id = ?)"
13 |
14 | {:select [:u.username :s.name]
15 | :from [[:user :u] [:status :s]]
16 | :where [:and [:= :u.statusid :s.id]
17 | [:= :u.id 9]]}
18 |
19 | "SELECT username, name FROM user, status WHERE (user.statusid = status.id) AND (user.id = ?)"
20 |
21 | {:select [:username :name]
22 | :from [:user :status]
23 | :where [:and [:= :user.statusid :status.id]
24 | [:= :user.id 9]]})
25 |
26 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
27 |
28 | (def *space (p/*skip char/white?))
29 |
30 | (def +space (p/+skip char/white?))
31 |
32 | (defn comma-sep
33 | "Parses `p` separated by commas."
34 | [p]
35 | (p/+sep-by p (p/maybe (-> (char/is \,)
36 | (p/between *space)))))
37 |
38 | (def table-name
39 | "Parses table name as `:table`."
40 | (-> (p/+many char/letter?)
41 | (p/value char/str* keyword)))
42 |
43 | (def column-name
44 | "Parses column as `:column` or `:table.column`."
45 | (-> (p/group (p/option (p/maybe (p/group (p/+many char/letter?)
46 | (char/is \.))))
47 | (p/+many char/letter?))
48 | (p/value char/str* keyword)))
49 |
50 | (comment
51 | (p/parse column-name "username") #_=> :username
52 | (p/parse column-name "u.username") #_=> :u.username
53 | (p/parse column-name "u.u.username") #_=> :u.u
54 | )
55 |
56 | (def as-expr
57 | "Parses alias keyword like `:alias` after AS."
58 | (p/after (p/maybe (-> (p/word "as" :ic) (p/between +space)))
59 | (-> (p/+many char/letter?)
60 | (p/value char/str* keyword))))
61 |
62 | (comment
63 | (p/parse as-expr " AS name") #_=> :name
64 | )
65 |
66 | (defn with-as
67 | "Parses `p` with optional alias like `:name` or `[:name :alias]`."
68 | [p]
69 | (-> (p/group p (p/option as-expr))
70 | (p/value (fn [[x as]] (if as [x as] x)))))
71 |
72 | (comment
73 | (p/parse (with-as column-name) "u.username") #_=> :u.username
74 | (p/parse (with-as column-name) "u.username AS name") #_=> [:u.username :name]
75 | )
76 |
77 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
78 |
79 | (def select-statement
80 | "Parses SQL SELECT statement to `{:select [...] :from [...] ...}`."
81 | (p/for [_ (p/maybe (p/after (p/word "select" :ic) +space))
82 | select (comma-sep (with-as column-name))
83 | _ (-> (p/word "from" :ic) (p/between +space))
84 | from (comma-sep (with-as table-name))]
85 | (p/result
86 | {:select (vec select)
87 | :from (vec from)})))
88 |
89 | (comment
90 | (def -q "SELECT username, u.name AS x FROM user AS u, status")
91 | (p/parse select-statement -q)
92 | #_=> {:select [:username [:u.name :x]],
93 | :from [[:user :u] :status]}
94 | )
95 |
96 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
97 |
--------------------------------------------------------------------------------
/src/strojure/parsesso/char.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.char
2 | "Basic function for parsing sequences of characters."
3 | (:refer-clojure :exclude [newline number?])
4 | (:require #?@(:bb [[clojure.string :as string]] :clj [] :default [[clojure.string :as string]])
5 | [strojure.parsesso.impl.char :as impl]
6 | [strojure.parsesso.parser :as p]))
7 |
8 | #?(:clj (set! *warn-on-reflection* true)
9 | :cljs (set! *warn-on-infer* true))
10 |
11 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
12 |
13 | (defn register-string-pred
14 | "Associates keyword `k` with predicate function of the [[is]] and
15 | [[is-not]] parsers."
16 | [k, f]
17 | (impl/register-string-pred-fn k f))
18 |
19 | (register-string-pred :default impl/string-pred-default)
20 | (register-string-pred :ic impl/string-pred-ignorecase)
21 |
22 | (defn is
23 | "Returns parser and predicate for the character `c` which is in the supplied
24 | string of characters `s` (or a single character). The optional `pred-k`
25 | keyword refers to function `(fn [pred-k s] (fn [c] ...))` which returns custom
26 | predicate for chars against `s`. The new `pred-k` should be registered using
27 | [[register-string-pred]], predefined values are `:default` for default and
28 | `:ic` for case-insensitive matching.
29 |
30 | (def control-char (char/is \"EX\"))
31 |
32 | (def control-char-ignorecase (char/is \"ex\" :ic))
33 | "
34 | ([s]
35 | (p/token (impl/string-pred-default s)
36 | (delay (str (p/render s) " character"))))
37 | ([s, pred-k]
38 | (p/token (impl/string-pred-fn pred-k s)
39 | (delay (str (p/render s) " character")))))
40 |
41 | (defn is-not
42 | "Returns parser and predicate for the character `c` which is _not_ in the
43 | supplied string of characters `s` (or a single character). See also [[is]]
44 | about optional `pred-k` argument."
45 | ([s]
46 | (p/token (complement (is s))
47 | (delay (str "not " (p/render s) " character"))))
48 | ([s, pred-k]
49 | (p/token (complement (is s pred-k))
50 | (delay (str "not " (p/render s) " character")))))
51 |
52 | (defn regex
53 | "Returns parser and predicate for the character `c` matching regex pattern
54 | `re`."
55 | [re]
56 | (p/token (fn [c] (re-find re (str c)))
57 | (delay (str "character matching regex " (p/render re)))))
58 |
59 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
60 |
61 | (def upper?
62 | "Parser and predicate for ASCII 7 bit upper-case letter character."
63 | (p/token (fn [c] #?(:bb
64 | (re-find #"[A-Z]" (str c))
65 | :clj
66 | (let [c (unchecked-int (.charValue ^Character c))]
67 | (and (<= 65 c) (<= c 90)))
68 | :default
69 | (re-find #"[A-Z]" (str c))))
70 | "upper-case ascii letter"))
71 |
72 | (def lower?
73 | "Parser and predicate for ASCII 7 bit lower-case letter character."
74 | (p/token (fn [c] #?(:bb
75 | (re-find #"[a-z]" (str c))
76 | :clj
77 | (let [c (unchecked-int (.charValue ^Character c))]
78 | (and (<= 97 c) (<= c 122)))
79 | :default
80 | (re-find #"[a-z]" (str c))))
81 | "lower-case ascii letter"))
82 |
83 | (def letter?
84 | "Parser and predicate for ASCII 7 bit letter character."
85 | (p/token (fn [c] #?(:bb
86 | (re-find #"[a-zA-Z]" (str c))
87 | :clj
88 | (or (upper? c) (lower? c))
89 | :default
90 | (re-find #"[a-zA-Z]" (str c))))
91 | "ascii letter"))
92 |
93 | (def number?
94 | "Parser and predicate for ASCII 7 bit number character."
95 | (p/token (fn [c] #?(:bb
96 | (re-find #"[0-9]" (str c))
97 | :clj
98 | (let [c (unchecked-int (.charValue ^Character c))]
99 | (and (<= 48 c) (<= c 57)))
100 | :default
101 | (re-find #"[0-9]" (str c))))
102 | "ascii number"))
103 |
104 | (def letter-or-number?
105 | "Parser and predicate for ASCII 7 bit letter or number character."
106 | (p/token (fn [c] #?(:bb
107 | (re-find #"[a-zA-Z0-9]" (str c))
108 | :clj
109 | (or (letter? c) (number? c))
110 | :default
111 | (re-find #"[a-zA-Z0-9]" (str c))))
112 | "ascii letter or number"))
113 |
114 | (def white?
115 | "Parser and predicate for ASCII 7 bit whitespace character."
116 | (p/token (fn [c] #?(:bb
117 | (string/index-of " \n\r\t\f" c)
118 | :clj
119 | (Character/isSpace c)
120 | :default
121 | (string/index-of " \n\r\t\f" c)))
122 | "whitespace character"))
123 |
124 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
125 |
126 | (def newline
127 | "Parses a CRLF or LF end of line. Returns a `\\newline` character."
128 | (p/alt (is \newline)
129 | (p/after (is \return) (is \newline))))
130 |
131 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
132 |
133 | (defn str*
134 | "Builds string from (possibly nested) collections of parsed characters and
135 | strings. To be used with [[strojure.parsesso.parser/value]]."
136 | [x]
137 | (impl/str* x))
138 |
139 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
140 |
--------------------------------------------------------------------------------
/test/strojure/parsesso/expr_test.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.expr-test
2 | (:require [clojure.string :as string]
3 | [clojure.test :as test :refer [deftest]]
4 | [strojure.parsesso.expr :as expr]
5 | [strojure.parsesso.parser :as p]))
6 |
7 | #_(test/run-tests)
8 |
9 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10 |
11 | (defn- p
12 | "Parses test input using given parser. Returns custom map with test result."
13 | [parser input]
14 | (let [result (p/parse* parser input)]
15 | (if-let [error (:error result)]
16 | (-> (select-keys result [:consumed])
17 | (assoc :error (-> (str error) (string/split-lines))))
18 | (select-keys result [:consumed :value]))))
19 |
20 | (defn- tok
21 | [& cs]
22 | (p/token (set cs)))
23 |
24 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
25 |
26 | (deftest +chain-left-t
27 | (test/are [expr result] (= result expr)
28 |
29 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9)
30 | (tok + - * /))
31 | [8 - 2 / 2])
32 | {:consumed true, :value 3}
33 |
34 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9)
35 | (tok + - * /))
36 | [8 - 2 2])
37 | {:consumed true, :value 6}
38 |
39 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9)
40 | (tok + - * /))
41 | [1])
42 | {:consumed true, :value 1}
43 |
44 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9)
45 | (tok + - * /))
46 | [+])
47 | {:consumed false, :error ["error at index 0:"
48 | (str "unexpected " (p/render +))]}
49 |
50 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9)
51 | (tok + - * /))
52 | [0])
53 | {:consumed false, :error ["error at index 0:"
54 | "unexpected 0"]}
55 |
56 | (p (expr/+chain-left (tok 1 2 3 3 4 5 6 7 8 9)
57 | (tok + - * /))
58 | [])
59 | {:consumed false, :error ["error at index 0:"
60 | "unexpected end of input"]}
61 |
62 | ))
63 |
64 | (deftest *chain-left-t
65 | (test/are [expr result] (= result expr)
66 |
67 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9)
68 | (tok + - * /)
69 | 0)
70 | [8 - 2 / 2])
71 | {:consumed true, :value 3}
72 |
73 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9)
74 | (tok + - * /)
75 | 0)
76 | [8 - 2 2])
77 | {:consumed true, :value 6}
78 |
79 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9)
80 | (tok + - * /)
81 | 0)
82 | [1])
83 | {:consumed true, :value 1}
84 |
85 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9)
86 | (tok + - * /)
87 | 0)
88 | [+])
89 | {:consumed false, :value 0}
90 |
91 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9)
92 | (tok + - * /)
93 | 0)
94 | [0])
95 | {:consumed false, :value 0}
96 |
97 | (p (expr/*chain-left (tok 1 2 3 3 4 5 6 7 8 9)
98 | (tok + - * /)
99 | 0)
100 | [])
101 | {:consumed false, :value 0}
102 |
103 | ))
104 |
105 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
106 |
107 | (deftest +chain-right-t
108 | (test/are [expr result] (= result expr)
109 |
110 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9)
111 | (tok + - * /))
112 | [8 - 2 / 2])
113 | {:consumed true, :value 7}
114 |
115 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9)
116 | (tok + - * /))
117 | [8 - 2 2])
118 | {:consumed true, :value 6}
119 |
120 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9)
121 | (tok + - * /))
122 | [1])
123 | {:consumed true, :value 1}
124 |
125 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9)
126 | (tok + - * /))
127 | [+])
128 | {:consumed false, :error ["error at index 0:"
129 | (str "unexpected " (p/render +))]}
130 |
131 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9)
132 | (tok + - * /))
133 | [0])
134 | {:consumed false, :error ["error at index 0:"
135 | "unexpected 0"]}
136 |
137 | (p (expr/+chain-right (tok 1 2 3 3 4 5 6 7 8 9)
138 | (tok + - * /))
139 | [])
140 | {:consumed false, :error ["error at index 0:"
141 | "unexpected end of input"]}
142 |
143 | ))
144 |
145 | (deftest *chain-right-t
146 | (test/are [expr result] (= result expr)
147 |
148 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9)
149 | (tok + - * /)
150 | 0)
151 | [8 - 2 / 2])
152 | {:consumed true, :value 7}
153 |
154 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9)
155 | (tok + - * /)
156 | 0)
157 | [8 - 2 2])
158 | {:consumed true, :value 6}
159 |
160 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9)
161 | (tok + - * /)
162 | 0)
163 | [1])
164 | {:consumed true, :value 1}
165 |
166 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9)
167 | (tok + - * /)
168 | 0)
169 | [+])
170 | {:consumed false, :value 0}
171 |
172 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9)
173 | (tok + - * /)
174 | 0)
175 | [0])
176 | {:consumed false, :value 0}
177 |
178 | (p (expr/*chain-right (tok 1 2 3 3 4 5 6 7 8 9)
179 | (tok + - * /)
180 | 0)
181 | [])
182 | {:consumed false, :value 0}
183 |
184 | ))
185 |
186 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
187 |
--------------------------------------------------------------------------------
/src/strojure/parsesso/impl/error.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.impl.error
2 | {:no-doc true}
3 | (:require [clojure.string :as string]
4 | [strojure.parsesso.impl.pos :as pos]
5 | [strojure.parsesso.impl.state :as state]))
6 |
7 | #?(:clj (set! *warn-on-reflection* true)
8 | :cljs (set! *warn-on-infer* true))
9 |
10 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
11 |
12 | (defprotocol IRenderObject
13 | (render-object [obj]
14 | "Returns string representation of the `obj` in parser error messages."))
15 |
16 | #?(:clj
17 | (extend-protocol IRenderObject
18 | nil,,,,,, (render-object [x] (pr-str x))
19 | Object,,, (render-object [x] (pr-str x))
20 | Character (render-object [c] (pr-str (str c))))
21 |
22 | :cljs
23 | (extend-protocol IRenderObject
24 | nil,,,,, (render-object [x] (pr-str x))
25 | object,, (render-object [x] (pr-str x))
26 | string,, (render-object [x] (pr-str x))
27 | function (render-object [x] (pr-str x))
28 | number,, (render-object [x] (str x))))
29 |
30 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
31 |
32 | (declare render-messages)
33 |
34 | (defrecord ParseError [pos messages]
35 | Object
36 | (toString [_]
37 | (str "error at " pos ":\n" (render-messages messages))))
38 |
39 | (defn- new-error
40 | [state typ msg]
41 | (ParseError. (state/pos state) (cons [typ msg] nil)))
42 |
43 | (defn sys-unexpected
44 | "Returns “unexpected input” parser error with message `msg`."
45 | [state msg]
46 | (new-error state ::sys-unexpected msg))
47 |
48 | (defn sys-unexpected-eof
49 | "Returns “unexpected input” parser error in case of end of input."
50 | [state]
51 | (new-error state ::sys-unexpected nil))
52 |
53 | (defn unexpected
54 | "Returns “unexpected item” parser error with message `msg`."
55 | [state msg]
56 | (new-error state ::unexpected msg))
57 |
58 | (defn expecting
59 | "Returns new instance of the parser error `err` with replaced “expected item”
60 | message. Ignores nil `msg` and returns just `err`."
61 | [^ParseError err, msg]
62 | (if msg
63 | (ParseError. (.-pos err)
64 | (cons [::expecting msg] (filter #(not= ::expecting (first %))
65 | (.-messages err))))
66 | err))
67 |
68 | (defn message
69 | "Returns parser error with some general parser message `msg`, generated by the
70 | `fail` combinator."
71 | [state msg]
72 | (new-error state ::message msg))
73 |
74 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
75 |
76 | (defn merge-errors
77 | "Returns parser error with messages from another two errors."
78 | [e1 e2]
79 | (let [m1 (:messages e1), m2 (:messages e2)]
80 | ;; prefer meaningful errors
81 | (cond (and m1 (nil? m2)) e1
82 | (and m2 (nil? m1)) e2
83 | :else (let [pos1 (:pos e1)]
84 | ;; select the longest match
85 | (case (int (pos/compare-pos pos1 (:pos e2)))
86 | 1 e1, -1 e2, (ParseError. pos1 (reduce conj m1 m2)))))))
87 |
88 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
89 |
90 | (defn- comma-sep
91 | [-or xs]
92 | (let [xs (->> xs (map str) (filter seq))]
93 | (case (count xs)
94 | 0 nil
95 | 1 (str (first xs))
96 | (str (string/join ", " (butlast xs)) " " -or " " (last xs)))))
97 |
98 | (defn- render-many
99 | [xs -or prefix]
100 | (when xs
101 | (cond->> (->> xs (map (comp force second))
102 | (comma-sep -or))
103 | prefix (str prefix " "))))
104 |
105 | (defn render-messages
106 | "The standard function for showing error messages. Formats a list of error
107 | messages in English. The resulting string will be formatted like:
108 |
109 | - unexpected _{The first UnExpect or a SysUnExpect message}_
110 | - expecting _{comma separated list of Expect messages}_
111 | - _{comma separated list of Message messages}_
112 |
113 | Example:
114 |
115 | unexpected UnExpect1 or UnExpect2
116 | expecting Expect1, Expect2 or Expect3
117 | Message1 or Message2
118 | "
119 | {:arglists '([{:keys [unknown expecting unexpected end-of-input or] :as dict}, messages]
120 | [messages])}
121 | ([messages] (render-messages nil messages))
122 | ([dict messages]
123 | (let [dict (->> dict (merge {:unknown "unknown parse error"
124 | :expecting "expecting"
125 | :unexpected "unexpected"
126 | :end-of-input "end of input"
127 | :or "or"}))]
128 | (if messages
129 | (let [xs (->> messages
130 | (map #(update % 1 force))
131 | (distinct)
132 | (reverse)
133 | (group-by first))]
134 | (->> [(when-let [[[_ msg]] (and (not (xs ::unexpected))
135 | (xs ::sys-unexpected))]
136 | (str (dict :unexpected) " " (or (not-empty msg)
137 | (dict :end-of-input))))
138 | (render-many (xs ::unexpected) (dict :or) (dict :unexpected))
139 | (render-many (xs ::expecting) (dict :or) (dict :expecting))
140 | (render-many (xs ::message) (dict :or) nil)]
141 | (filter some?)
142 | (string/join "\n")))
143 | (dict :unknown)))))
144 |
145 | (comment
146 | (->> (list [::message "Message2"]
147 | [::message (delay "Message1")]
148 | [::message "Message1"]
149 | [::expecting "Expect3"]
150 | [::expecting ""]
151 | [::expecting "Expect2"]
152 | [::expecting (delay "Expect2")]
153 | [::expecting "Expect1"]
154 | [::unexpected (delay "UnExpect2")]
155 | [::unexpected "UnExpect1"]
156 | [::sys-unexpected "SysUnExpect"]
157 | [::sys-unexpected nil])
158 | (render-messages)
159 | (println))
160 | )
161 |
162 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
163 |
--------------------------------------------------------------------------------
/test/strojure/parsesso/char_test.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.char-test
2 | (:require [clojure.string :as string]
3 | [clojure.test :as test :refer [deftest testing]]
4 | [strojure.parsesso.char :as char]
5 | [strojure.parsesso.parser :as p]))
6 |
7 | #_(test/run-tests)
8 |
9 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
10 |
11 | (defn- p
12 | "Parses test input using given parser. Returns custom map with test result."
13 | [parser input]
14 | (let [result (p/parse* parser input)]
15 | (if-let [error (:error result)]
16 | (-> (select-keys result [:consumed])
17 | (assoc :error (-> (str error) (string/split-lines))))
18 | (select-keys result [:consumed :value]))))
19 |
20 | (defn- c
21 | "Cross-platform char."
22 | [s]
23 | #?(:cljs s, :default (first s)))
24 |
25 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
26 |
27 | (deftest is-t
28 | (testing "default matching"
29 | (test/are [expr result] (= result expr)
30 |
31 | (p (char/is "abc")
32 | "a")
33 | {:consumed true, :value (c "a")}
34 |
35 | (p (char/is "abc")
36 | "b")
37 | {:consumed true, :value (c "b")}
38 |
39 | (p (char/is "abc")
40 | "c")
41 | {:consumed true, :value (c "c")}
42 |
43 | (p (char/is "abc")
44 | "d")
45 | {:consumed false, :error ["error at line 1, column 1:"
46 | "unexpected \"d\""
47 | "expecting \"abc\" character"]}
48 |
49 | (p (char/is "abc")
50 | "")
51 | {:consumed false, :error ["error at line 1, column 1:"
52 | "unexpected end of input"
53 | "expecting \"abc\" character"]}
54 |
55 | (p (char/is "a")
56 | "d")
57 | {:consumed false, :error ["error at line 1, column 1:"
58 | "unexpected \"d\""
59 | "expecting \"a\" character"]}
60 |
61 | (p (char/is "a")
62 | "")
63 | {:consumed false, :error ["error at line 1, column 1:"
64 | "unexpected end of input"
65 | "expecting \"a\" character"]}
66 |
67 | ))
68 |
69 | (testing "case-insensitive matching"
70 | (test/are [expr result] (= result expr)
71 |
72 | (p (char/is "abc" :ic)
73 | "a")
74 | {:consumed true, :value (c "a")}
75 |
76 | (p (char/is "abc" :ic)
77 | "A")
78 | {:consumed true, :value (c "A")}
79 |
80 | (p (char/is "ABC" :ic)
81 | "a")
82 | {:consumed true, :value (c "a")}
83 |
84 | (p (char/is "abc" :ic)
85 | "d")
86 | {:consumed false, :error ["error at line 1, column 1:"
87 | "unexpected \"d\""
88 | "expecting \"abc\" character"]}
89 |
90 | )))
91 |
92 | (deftest is-not-t
93 | (testing "default matching"
94 | (test/are [expr result] (= result expr)
95 |
96 | (p (char/is-not "abc")
97 | "x")
98 | {:consumed true, :value (c "x")}
99 |
100 | (p (char/is-not "abc")
101 | "a")
102 | {:consumed false, :error ["error at line 1, column 1:"
103 | "unexpected \"a\""
104 | "expecting not \"abc\" character"]}
105 |
106 | (p (char/is-not "abc")
107 | "")
108 | {:consumed false, :error ["error at line 1, column 1:"
109 | "unexpected end of input"
110 | "expecting not \"abc\" character"]}
111 |
112 | (p (char/is-not "a")
113 | "a")
114 | {:consumed false, :error ["error at line 1, column 1:"
115 | "unexpected \"a\""
116 | "expecting not \"a\" character"]}
117 |
118 | (p (char/is-not "a")
119 | "")
120 | {:consumed false, :error ["error at line 1, column 1:"
121 | "unexpected end of input"
122 | "expecting not \"a\" character"]}
123 |
124 | ))
125 |
126 | (testing "case insensitive matching"
127 | (test/are [expr result] (= result expr)
128 |
129 | (p (char/is-not "abc" :ic)
130 | "x")
131 | {:consumed true, :value (c "x")}
132 |
133 | (p (char/is-not "abc" :ic)
134 | "a")
135 | {:consumed false, :error ["error at line 1, column 1:"
136 | "unexpected \"a\""
137 | "expecting not \"abc\" character"]}
138 |
139 | (p (char/is-not "abc" :ic)
140 | "A")
141 | {:consumed false, :error ["error at line 1, column 1:"
142 | "unexpected \"A\""
143 | "expecting not \"abc\" character"]}
144 |
145 | (p (char/is-not "a" :ic)
146 | "a")
147 | {:consumed false, :error ["error at line 1, column 1:"
148 | "unexpected \"a\""
149 | "expecting not \"a\" character"]}
150 |
151 | (p (char/is-not "a" :ic)
152 | "A")
153 | {:consumed false, :error ["error at line 1, column 1:"
154 | "unexpected \"A\""
155 | "expecting not \"a\" character"]}
156 |
157 | )))
158 |
159 | (deftest regex-t
160 | (test/are [expr result] (= result expr)
161 |
162 | (p (p/*many (char/regex #"[a-z]"))
163 | "abc")
164 | {:consumed true, :value (seq "abc")}
165 |
166 | (p (char/regex #"[a-z]")
167 | "A")
168 | {:consumed false, :error ["error at line 1, column 1:"
169 | "unexpected \"A\""
170 | "expecting character matching regex #\"[a-z]\""]}
171 |
172 | (p (char/regex #"[a-z]")
173 | "")
174 | {:consumed false, :error ["error at line 1, column 1:"
175 | "unexpected end of input"
176 | "expecting character matching regex #\"[a-z]\""]}
177 |
178 | ))
179 |
180 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
181 |
182 | (deftest letter?-t
183 | (test/are [expr result] (= result expr)
184 |
185 | (p char/letter?
186 | "a")
187 | {:consumed true, :value (c "a")}
188 |
189 | (p char/letter?
190 | "1")
191 | {:consumed false, :error ["error at line 1, column 1:"
192 | "unexpected \"1\""
193 | "expecting ascii letter"]}
194 |
195 | (p char/letter?
196 | "")
197 | {:consumed false, :error ["error at line 1, column 1:"
198 | "unexpected end of input"
199 | "expecting ascii letter"]}
200 |
201 | ))
202 |
203 | (deftest upper?-t
204 | (test/are [expr result] (= result expr)
205 |
206 | (p (p/*many char/upper?)
207 | "ABC")
208 | {:consumed true, :value (seq "ABC")}
209 |
210 | (p char/upper?
211 | "a")
212 | {:consumed false, :error ["error at line 1, column 1:"
213 | "unexpected \"a\""
214 | "expecting upper-case ascii letter"]}
215 |
216 | (p char/upper?
217 | "")
218 | {:consumed false, :error ["error at line 1, column 1:"
219 | "unexpected end of input"
220 | "expecting upper-case ascii letter"]}
221 |
222 | ))
223 |
224 | (deftest lower?-t
225 | (test/are [expr result] (= result expr)
226 |
227 | (p (p/*many char/lower?)
228 | "abc")
229 | {:consumed true, :value (seq "abc")}
230 |
231 | (p char/lower?
232 | "A")
233 | {:consumed false, :error ["error at line 1, column 1:"
234 | "unexpected \"A\""
235 | "expecting lower-case ascii letter"]}
236 |
237 | (p char/lower?
238 | "")
239 | {:consumed false, :error ["error at line 1, column 1:"
240 | "unexpected end of input"
241 | "expecting lower-case ascii letter"]}
242 |
243 | ))
244 |
245 | (deftest number?-t
246 | (test/are [expr result] (= result expr)
247 |
248 | (p (p/*many char/number?)
249 | "01234567890")
250 | {:consumed true, :value (seq "01234567890")}
251 |
252 | (p char/number?
253 | "a")
254 | {:consumed false, :error ["error at line 1, column 1:"
255 | "unexpected \"a\""
256 | "expecting ascii number"]}
257 |
258 | (p char/number?
259 | "")
260 | {:consumed false, :error ["error at line 1, column 1:"
261 | "unexpected end of input"
262 | "expecting ascii number"]}
263 |
264 | ))
265 |
266 | (deftest letter-or-number?-t
267 | (test/are [expr result] (= result expr)
268 |
269 | (p (p/*many char/letter-or-number?)
270 | "12345abcABC")
271 | {:consumed true, :value (seq "12345abcABC")}
272 |
273 | (p char/letter-or-number?
274 | "-")
275 | {:consumed false, :error ["error at line 1, column 1:"
276 | "unexpected \"-\""
277 | "expecting ascii letter or number"]}
278 |
279 | (p char/letter-or-number?
280 | "")
281 | {:consumed false, :error ["error at line 1, column 1:"
282 | "unexpected end of input"
283 | "expecting ascii letter or number"]}
284 |
285 | ))
286 |
287 | (deftest white?-t
288 | (test/are [expr result] (= result expr)
289 |
290 | (p (p/*many char/white?)
291 | " \t\r\n")
292 | {:consumed true, :value (seq " \t\r\n")}
293 |
294 | (p char/white?
295 | "a")
296 | {:consumed false, :error ["error at line 1, column 1:"
297 | "unexpected \"a\""
298 | "expecting whitespace character"]}
299 |
300 | (p char/white?
301 | "")
302 | {:consumed false, :error ["error at line 1, column 1:"
303 | "unexpected end of input"
304 | "expecting whitespace character"]}
305 |
306 | ))
307 |
308 | (deftest newline-t
309 | (test/are [expr result] (= result expr)
310 |
311 | (p char/newline
312 | "\n")
313 | {:consumed true, :value (c "\n")}
314 |
315 | (p char/newline
316 | "\r\n")
317 | {:consumed true, :value (c "\n")}
318 |
319 | (p char/newline
320 | "\ra")
321 | {:consumed true, :error ["error at line 1, column 2:"
322 | "unexpected \"a\""
323 | "expecting \"\\n\" character"]}
324 |
325 | (p char/newline
326 | "\r")
327 | {:consumed true, :error ["error at line 1, column 2:"
328 | "unexpected end of input"
329 | "expecting \"\\n\" character"]}
330 |
331 | (p char/newline
332 | "a")
333 | {:consumed false, :error ["error at line 1, column 1:"
334 | "unexpected \"a\""
335 | "expecting \"\\n\" character or \"\\r\" character"]}
336 |
337 | (p char/newline
338 | "")
339 | {:consumed false, :error ["error at line 1, column 1:"
340 | "unexpected end of input"
341 | "expecting \"\\n\" character or \"\\r\" character"]}
342 |
343 | ))
344 |
345 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
346 |
347 | (deftest str*-t
348 | (test/are [expr result] (= result expr)
349 |
350 | (p (-> (char/is "abc")
351 | (p/value char/str*))
352 | "abc")
353 | {:consumed true, :value "a"}
354 |
355 | (p (-> (p/group (p/+many (char/is "abc"))
356 | (p/+many (char/is "123")))
357 | (p/value char/str*))
358 | "abc123")
359 | {:consumed true, :value "abc123"}
360 |
361 | (p (-> (p/*many (char/is "abc"))
362 | (p/value char/str*))
363 | "123")
364 | {:consumed false, :value ""}
365 |
366 | (p (-> (char/is "abc")
367 | (p/value char/str*))
368 | "123")
369 | {:consumed false, :error ["error at line 1, column 1:"
370 | "unexpected \"1\""
371 | "expecting \"abc\" character"]}
372 |
373 | ))
374 |
375 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
376 |
--------------------------------------------------------------------------------
/doc/benchmarks/compare.clj:
--------------------------------------------------------------------------------
1 | (ns benchmarks.compare
2 | "Some benchmarks between parsesso, kern and parsatron. There is no jasentaa
3 | here because it is very slow."
4 | (:require [blancas.kern.core :as k]
5 | [strojure.parsesso.char :as char]
6 | [strojure.parsesso.parser :as p]
7 | [the.parsatron :as t]))
8 |
9 | (set! *warn-on-reflection* true)
10 |
11 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
12 |
13 | (defn- t-run
14 | [p input]
15 | (t/run-parser p (t/->InputState input (t/->SourcePos 1 1))))
16 |
17 | (def ^:private -input-10000 (repeat 10000 :a))
18 |
19 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
20 |
21 | ;; ## Return value without parsing ##
22 |
23 | (p/parse (p/result :x) [])
24 | ; Execution time mean : 175,877194 ns
25 | ; Execution time std-deviation : 34,075714 ns
26 | ; Execution time lower quantile : 153,729903 ns ( 2,5%)
27 | ; Execution time upper quantile : 217,875203 ns (97,5%)
28 |
29 | (k/parse (k/return :x) [])
30 | ; Execution time mean : 233,470315 ns
31 | ; Execution time std-deviation : 66,244027 ns
32 | ; Execution time lower quantile : 178,201399 ns ( 2,5%)
33 | ; Execution time upper quantile : 326,518209 ns (97,5%)
34 |
35 | (t/run (t/always :x) [])
36 | ; Execution time mean : 168,392753 ns
37 | ; Execution time std-deviation : 68,636364 ns
38 | ; Execution time lower quantile : 123,449569 ns ( 2,5%)
39 | ; Execution time upper quantile : 252,628602 ns (97,5%)
40 |
41 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
42 |
43 | ;; ## Fail immediately without parsing ##
44 |
45 | (p/parse* (p/fail :x) [])
46 | ; Execution time mean : 188,952263 ns
47 | ; Execution time std-deviation : 17,000877 ns
48 | ; Execution time lower quantile : 172,453755 ns ( 2,5%)
49 | ; Execution time upper quantile : 210,153699 ns (97,5%)
50 |
51 | (k/parse (k/fail :x) [])
52 | ; Execution time mean : 386,590746 ns
53 | ; Execution time std-deviation : 156,097460 ns
54 | ; Execution time lower quantile : 266,519628 ns ( 2,5%)
55 | ; Execution time upper quantile : 640,785168 ns (97,5%)
56 |
57 | (t-run (t/never) [])
58 | ; Execution time mean : 841,250545 ns
59 | ; Execution time std-deviation : 206,671857 ns
60 | ; Execution time lower quantile : 703,388694 ns ( 2,5%)
61 | ; Execution time upper quantile : 1,115857 µs (97,5%)
62 |
63 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
64 |
65 | ;; ## Parse token ##
66 |
67 | (p/parse (p/token #(= \a %)) "abc")
68 | ; Execution time mean : 280,963465 ns
69 | ; Execution time std-deviation : 16,328760 ns
70 | ; Execution time lower quantile : 268,625666 ns ( 2,5%)
71 | ; Execution time upper quantile : 307,169162 ns (97,5%)
72 |
73 | (k/parse (k/satisfy #(= \a %)) "abc")
74 | ; Execution time mean : 245,984170 ns
75 | ; Execution time std-deviation : 13,553994 ns
76 | ; Execution time lower quantile : 235,005603 ns ( 2,5%)
77 | ; Execution time upper quantile : 268,329750 ns (97,5%)
78 |
79 | (t/run (t/token #(= \a %)) "abc")
80 | ; Execution time mean : 557,024259 ns
81 | ; Execution time std-deviation : 14,359373 ns
82 | ; Execution time lower quantile : 541,631508 ns ( 2,5%)
83 | ; Execution time upper quantile : 578,875966 ns (97,5%)
84 |
85 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
86 |
87 | ;; ## Parse word ##
88 |
89 | (p/parse (p/word "abc") "abc")
90 | ; Execution time mean : 492,578323 ns
91 | ; Execution time std-deviation : 23,516467 ns
92 | ; Execution time lower quantile : 471,832427 ns ( 2,5%)
93 | ; Execution time upper quantile : 531,601202 ns (97,5%)
94 |
95 | (k/parse (k/token* "abc") "abc")
96 | ; Execution time mean : 4,020720 µs
97 | ; Execution time std-deviation : 429,048420 ns
98 | ; Execution time lower quantile : 3,767589 µs ( 2,5%)
99 | ; Execution time upper quantile : 4,754242 µs (97,5%)
100 |
101 | (t/run (t/string "abc") "abc")
102 | ; Execution time mean : 2,212562 µs
103 | ; Execution time std-deviation : 91,094400 ns
104 | ; Execution time lower quantile : 2,126279 µs ( 2,5%)
105 | ; Execution time upper quantile : 2,342896 µs (97,5%)
106 |
107 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
108 |
109 | ;; ## Parse word, case-insensitive ##
110 |
111 | (p/parse (p/word "abc" :ic) "ABC")
112 | ; Execution time mean : 631,199580 ns
113 | ; Execution time std-deviation : 9,939793 ns
114 | ; Execution time lower quantile : 618,951019 ns ( 2,5%)
115 | ; Execution time upper quantile : 641,954344 ns (97,5%)
116 |
117 | (k/parse (k/token- "abc") "ABC")
118 | ; Execution time mean : 5,063223 µs
119 | ; Execution time std-deviation : 212,754488 ns
120 | ; Execution time lower quantile : 4,915983 µs ( 2,5%)
121 | ; Execution time upper quantile : 5,412170 µs (97,5%)
122 |
123 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
124 |
125 | ;; ## Parse long word ##
126 |
127 | (p/parse (p/word -input-10000) -input-10000)
128 | ; Execution time mean : 190,951777 µs
129 | ; Execution time std-deviation : 15,767078 µs
130 | ; Execution time lower quantile : 177,441117 µs ( 2,5%)
131 | ; Execution time upper quantile : 209,664373 µs (97,5%)
132 |
133 | (comment
134 | (k/parse (k/token* -input-10000) -input-10000))
135 | ; Execution error (StackOverflowError) at blancas.kern.core/>>=$fn
136 |
137 | (t/run (t/string -input-10000) -input-10000)
138 | ; Execution time mean : 5,677465 ms
139 | ; Execution time std-deviation : 961,844848 µs
140 | ; Execution time lower quantile : 4,976587 ms ( 2,5%)
141 | ; Execution time upper quantile : 6,805795 ms (97,5%)
142 |
143 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
144 |
145 | ;; ## Parse letters ##
146 |
147 | (p/parse (p/*many char/letter?) "abc")
148 | ; Execution time mean : 975,326535 ns
149 | ; Execution time std-deviation : 65,828611 ns
150 | ; Execution time lower quantile : 915,594047 ns ( 2,5%)
151 | ; Execution time upper quantile : 1,059000 µs (97,5%)
152 |
153 | (k/parse (k/many k/letter) "abc")
154 | ; Execution time mean : 1,911586 µs
155 | ; Execution time std-deviation : 511,124107 ns
156 | ; Execution time lower quantile : 1,646502 µs ( 2,5%)
157 | ; Execution time upper quantile : 2,783604 µs (97,5%)
158 |
159 | (t/run (t/many (t/letter)) "abc")
160 | ; Execution time mean : 2,599675 µs
161 | ; Execution time std-deviation : 576,904794 ns
162 | ; Execution time lower quantile : 2,193151 µs ( 2,5%)
163 | ; Execution time upper quantile : 3,354449 µs (97,5%)
164 |
165 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
166 |
167 | ;; ## Parse letters as string ##
168 |
169 | (p/parse (-> (p/*many char/letter?) (p/value char/str*)) "abc")
170 | ; Execution time mean : 1,514160 µs
171 | ; Execution time std-deviation : 104,898493 ns
172 | ; Execution time lower quantile : 1,439323 µs ( 2,5%)
173 | ; Execution time upper quantile : 1,680704 µs (97,5%)
174 |
175 | (k/parse (k/<+> (k/many k/letter)) "abc")
176 | ; Execution time mean : 5,568215 µs
177 | ; Execution time std-deviation : 145,037838 ns
178 | ; Execution time lower quantile : 5,459951 µs ( 2,5%)
179 | ; Execution time upper quantile : 5,810555 µs (97,5%)
180 |
181 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
182 |
183 | ;; ## Parse `many` for long input ##
184 |
185 | (p/parse (p/*many (p/token #(= :a %))) -input-10000)
186 | ; Execution time mean : 1,311809 ms
187 | ; Execution time std-deviation : 96,377398 µs
188 | ; Execution time lower quantile : 1,223376 ms ( 2,5%)
189 | ; Execution time upper quantile : 1,426319 ms (97,5%)
190 |
191 | (k/parse (k/many (k/satisfy #(= :a %))) -input-10000)
192 | ; Execution time mean : 1,928105 ms
193 | ; Execution time std-deviation : 62,373984 µs
194 | ; Execution time lower quantile : 1,868339 ms ( 2,5%)
195 | ; Execution time upper quantile : 2,024112 ms (97,5%)
196 |
197 | (t/run (t/many (t/token #(= :a %))) -input-10000)
198 | ; Execution time mean : 1,066323 sec
199 | ; Execution time std-deviation : 159,363140 ms
200 | ; Execution time lower quantile : 984,876092 ms ( 2,5%)
201 | ; Execution time upper quantile : 1,341844 sec (97,5%)
202 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
203 |
204 | ;; ## Skip `many` for long input ##
205 |
206 | (p/parse (p/*skip (p/token #(= :a %))) -input-10000)
207 | ; Execution time mean : 1,043996 ms
208 | ; Execution time std-deviation : 252,158552 µs
209 | ; Execution time lower quantile : 893,890237 µs ( 2,5%)
210 | ; Execution time upper quantile : 1,465919 ms (97,5%)
211 |
212 | (k/parse (k/skip-many (k/satisfy #(= :a %))) -input-10000)
213 | ; Execution time mean : 1,416146 ms
214 | ; Execution time std-deviation : 35,717820 µs
215 | ; Execution time lower quantile : 1,379739 ms ( 2,5%)
216 | ; Execution time upper quantile : 1,451345 ms (97,5%)
217 |
218 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
219 |
220 | ;; ## The `alt` combinator ##
221 |
222 | (p/parse (p/alt (p/fail "a")
223 | (p/fail "b")
224 | (p/result :x)) [])
225 | ; Execution time mean : 573,340067 ns
226 | ; Execution time std-deviation : 46,346310 ns
227 | ; Execution time lower quantile : 511,933832 ns ( 2,5%)
228 | ; Execution time upper quantile : 624,550670 ns (97,5%)
229 |
230 | (k/parse (k/<|> (k/fail "a")
231 | (k/fail "b")
232 | (k/return :x)) [])
233 | ; Execution time mean : 1,754808 µs
234 | ; Execution time std-deviation : 148,221426 ns
235 | ; Execution time lower quantile : 1,618505 µs ( 2,5%)
236 | ; Execution time upper quantile : 1,924351 µs (97,5%)
237 |
238 | (t/run (t/choice (t/never)
239 | (t/never)
240 | (t/always :x)) [])
241 | ; Execution time mean : 697,151006 ns
242 | ; Execution time std-deviation : 165,879602 ns
243 | ; Execution time lower quantile : 570,024598 ns ( 2,5%)
244 | ; Execution time upper quantile : 961,147185 ns (97,5%)
245 |
246 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
247 |
248 | ;; ## Wrap with `expecting` ##
249 |
250 | (p/parse (-> (p/result :x) (p/expecting "x")) [])
251 | ; Execution time mean : 212,033445 ns
252 | ; Execution time std-deviation : 20,071125 ns
253 | ; Execution time lower quantile : 196,685023 ns ( 2,5%)
254 | ; Execution time upper quantile : 238,117212 ns (97,5%)
255 |
256 | (k/parse (k/> (k/return :x) "x") [])
257 | ; Execution time mean : 222,587325 ns
258 | ; Execution time std-deviation : 16,924812 ns
259 | ; Execution time lower quantile : 205,615791 ns ( 2,5%)
260 | ; Execution time upper quantile : 240,220579 ns (97,5%)
261 |
262 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
263 |
264 | ;; ## Test for the end of input ##
265 |
266 | (p/parse* p/eof " ")
267 | ; Execution time mean : 231,661354 ns
268 | ; Execution time std-deviation : 25,008376 ns
269 | ; Execution time lower quantile : 209,952763 ns ( 2,5%)
270 | ; Execution time upper quantile : 262,847436 ns (97,5%)
271 |
272 | (k/parse k/eof " ")
273 | ; Execution time mean : 1,428015 µs
274 | ; Execution time std-deviation : 81,057937 ns
275 | ; Execution time lower quantile : 1,352623 µs ( 2,5%)
276 | ; Execution time upper quantile : 1,560179 µs (97,5%)
277 |
278 | (t-run (t/eof) " ")
279 | ; Execution time mean : 882,705676 ns
280 | ; Execution time std-deviation : 46,738939 ns
281 | ; Execution time lower quantile : 837,580307 ns ( 2,5%)
282 | ; Execution time upper quantile : 948,317437 ns (97,5%)
283 |
284 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
285 |
286 | (p/parse (p/after (p/word ""))))
288 | "")
289 | ; Execution time mean : 7,450434 µs
290 | ; Execution time std-deviation : 607,080144 ns
291 | ; Execution time lower quantile : 6,900613 µs ( 2,5%)
292 | ; Execution time upper quantile : 8,221736 µs (97,5%)
293 |
294 | (k/parse (k/>> (k/token* ""))))
296 | "")
297 | ; Execution time mean : 84,653453 µs
298 | ; Execution time std-deviation : 2,870985 µs
299 | ; Execution time lower quantile : 81,222728 µs ( 2,5%)
300 | ; Execution time upper quantile : 87,938498 µs (97,5%)
301 |
302 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
303 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # parsesso
2 |
3 | [Parser combinators](https://en.wikipedia.org/wiki/Parser_combinator) for
4 | Clojure(Script).
5 |
6 | [](https://clojars.org/com.github.strojure/parsesso)
7 | 
8 |
9 | [](https://cljdoc.org/d/com.github.strojure/parsesso)
10 | [](https://clojurescript.org/)
11 | [](https://book.babashka.org#badges)
12 | [](https://github.com/strojure/parsesso/actions/workflows/tests.yml)
13 |
14 | ## Motivation
15 |
16 | * Idiomatic and convenient API for parser combinators in Clojure and
17 | ClojureScript.
18 |
19 | ## Inspiration
20 |
21 | * [haskell/parsec](https://github.com/haskell/parsec)
22 | * [blancas/kern](https://github.com/blancas/kern)
23 | * [youngnh/parsatron](https://github.com/youngnh/parsatron)
24 | * [rm-hull/jasentaa](https://github.com/rm-hull/jasentaa)
25 |
26 | ## Documentation
27 |
28 | As far as there is no comprehensive documentation how to use `parsesso` there
29 | are another resources to get familiar with idea of parser combinators in Clojure:
30 |
31 | - [Kern documentation wiki](https://github.com/blancas/kern/wiki).
32 |
33 | ## Cheat sheet
34 |
35 | | Parsesso | Parsec[1],[2],[3] | Kern[4] | Parsatron[5] |
36 | |---------------------------------------|---------------------------------|-------------------------|-------------------------|
37 | | [p/do-parser] | | `fwd` | `defparser` |
38 | | [p/result] | `return` | `return` | `always` |
39 | | [p/fail] | `fail` | `fail` | `never` |
40 | | [p/fail-unexpected] | `unexpected` | `unexpected` | |
41 | | [p/expecting] | `>`, `label` | `>`, `expect` | |
42 | | [p/bind] | `>>=` | `>>=` | `bind` |
43 | | [p/for] | `do` | `bind` | `let->>` |
44 | | [p/after] | `>>` | `>>` | `>>`, `nxt` |
45 | | [p/value] | `fmap` | `<$>` | |
46 | | [p/maybe] | `try` | `<:>` | `attempt` |
47 | | [p/look-ahead] | `lookAhead` | `look-ahead` | `lookahead` |
48 | | [p/not-followed-by] | `notFollowedBy` | `not-followed-by` | |
49 | | [p/*many] | `many` | `many` | `many` |
50 | | [p/+many] | `many1` | `many1` | `many1` |
51 | | [p/*skip] | `skipMany` | `skip-many` | |
52 | | [p/+skip] | `skipMany1` | `skip-many1` | |
53 | | [p/token] | `token`, `satisfy` | `satisfy` | `token` |
54 | | [p/token-not] | | | |
55 | | [p/word] | `tokens`, `string` | `token*` | `string` |
56 | | [p/any-token] | `anyToken`,`anyChar` | `any-char` | `any-char` |
57 | | [p/eof] | `eof` | `eof` | `eof` |
58 | | [p/group] | `<*>` | `<*>` | |
59 | | [p/alt] | <|>, `choice` | <|> | `choice` |
60 | | [p/option] | `option`, `optional` | `option`, `optional` | |
61 | | [p/between] | `between` | `between` | `between` |
62 | | [p/times] | `count` | `times` | `times` |
63 | | [p/*many-till] | `manyTill` | `many-till` | |
64 | | [p/*sep-by] | `sepBy` | `sep-by` | |
65 | | [p/+sep-by] | `sepBy1` | `sep-by1` | |
66 | | [p/*sep-end-by] | `endBy` | `end-by` | |
67 | | [p/+sep-end-by] | `endBy1` | `end-by1` | |
68 | | [p/*sep-opt-by] | `sepEndBy` | `sep-end-by` | |
69 | | [p/+sep-opt-by] | `sepEndBy1` | `sep-end-by1` | |
70 | | [p/get-state] | `getParserState`... | input, pos, user state | |
71 | | [p/set-state] | `setParserState`... | input, pos, user state | |
72 | | [p/update-state] | `updateParserState`... | user state | |
73 | | [p/trace] | `parserTrace`, `parserTraced` | | |
74 | | [expr/*chain-left] | `chainl` | `chainl` | |
75 | | [expr/+chain-left] | `chainl1` | `chainl1` | |
76 | | [expr/*chain-right] | `chainr` | `chainr` | |
77 | | [expr/+chain-right] | `chainr1` | `chainr1` | |
78 | | [char/is] | `char`, `oneOf` | `sym*`, `one-of*` | `char` |
79 | | [char/is-not] | `noneOf` | `none-of*` | |
80 | | [char/regex] | | | |
81 | | [char/upper?] | `upper` | `upper` (unicode) | |
82 | | [char/lower?] | `lower` | `lower` (unicode) | |
83 | | [char/letter?] | `letter` | `letter` (unicode) | `letter` (unicode) |
84 | | [char/number?] | `digit` | `digit` (unicode) | `digit` (unicode) |
85 | | [char/letter-or-number?] | `alphaNum` | `alpha-num` (unicode) | |
86 | | [char/white?] | `space` | `white-space` (unicode) | |
87 | | [char/newline] | `endOfLine` | `new-line*` | |
88 | | [char/str*] | | `<+>` | |
89 |
90 | [1]: https://github.com/haskell/parsec/blob/master/src/Text/Parsec/Prim.hs
91 |
92 | [2]: https://github.com/haskell/parsec/blob/master/src/Text/Parsec/Combinator.hs
93 |
94 | [3]: https://github.com/haskell/parsec/blob/master/src/Text/Parsec/Char.hs
95 |
96 | [4]: https://github.com/blancas/kern/blob/master/src/main/clojure/blancas/kern/core.clj
97 |
98 | [5]: https://github.com/youngnh/parsatron/blob/master/src/clj/the/parsatron.clj
99 |
100 | [p/do-parser]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#do-parser
101 |
102 | [p/result]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#result
103 |
104 | [p/fail]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#fail
105 |
106 | [p/fail-unexpected]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#fail-unexpected
107 |
108 | [p/expecting]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#expecting
109 |
110 | [p/bind]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#bind
111 |
112 | [p/for]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#for
113 |
114 | [p/after]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#after
115 |
116 | [p/value]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#value
117 |
118 | [p/maybe]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#maybe
119 |
120 | [p/look-ahead]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#look-ahead
121 |
122 | [p/not-followed-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#not-followed-by
123 |
124 | [p/*many]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*many
125 |
126 | [p/+many]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#+many
127 |
128 | [p/*skip]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*skip
129 |
130 | [p/+skip]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#+skip
131 |
132 | [p/token]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#token
133 |
134 | [p/token-not]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#token-not
135 |
136 | [p/word]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#word
137 |
138 | [p/any-token]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#any-token
139 |
140 | [p/eof]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#eof
141 |
142 | [p/group]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#group
143 |
144 | [p/alt]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#alt
145 |
146 | [p/option]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#option
147 |
148 | [p/between]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#between
149 |
150 | [p/times]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#times
151 |
152 | [p/*many-till]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*many-till
153 |
154 | [p/*sep-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*sep-by
155 |
156 | [p/+sep-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#+sep-by
157 |
158 | [p/*sep-end-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*sep-end-by
159 |
160 | [p/+sep-end-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#+sep-end-by
161 |
162 | [p/*sep-opt-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#*sep-opt-by
163 |
164 | [p/+sep-opt-by]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#+sep-opt-by
165 |
166 | [p/get-state]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#get-state
167 |
168 | [p/set-state]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#set-state
169 |
170 | [p/update-state]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#update-state
171 |
172 | [p/trace]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.parser#trace
173 |
174 | [expr/*chain-left]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.expr#*chain-left
175 |
176 | [expr/+chain-left]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.expr#+chain-left
177 |
178 | [expr/*chain-right]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.expr#*chain-right
179 |
180 | [expr/+chain-right]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.expr#+chain-right
181 |
182 | [char/is]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#is
183 |
184 | [char/is-not]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#is-not
185 |
186 | [char/regex]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#regex
187 |
188 | [char/upper?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#upper?
189 |
190 | [char/lower?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#lower?
191 |
192 | [char/letter?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#letter?
193 |
194 | [char/number?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#number?
195 |
196 | [char/letter-or-number?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#letter-or-number?
197 |
198 | [char/white?]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#white?
199 |
200 | [char/newline]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#newline
201 |
202 | [char/str*]: https://cljdoc.org/d/com.github.strojure/parsesso/CURRENT/api/strojure.parsesso.char#str*
203 |
204 | ## Examples
205 |
206 | * [HoneySQL SELECT](doc/demo/honeysql_select.clj)
207 |
208 | ## Performance
209 |
210 | See some benchmarks [here](doc/benchmarks/compare.clj).
211 |
212 | ## FAQ
213 |
214 | **What parser combinators are & are good for? How does it differ e.g. from
215 | Instaparse, which also parses text into data?**
216 |
217 | A parser combinator library is a library with functions that can be composed
218 | into a parser. Instaparse takes a grammar specification, but in a parser
219 | combinator library you build the specification from functions, rather than a
220 | DSL.
221 |
222 | **When should I pick parser combinators over EBNF? Do they offer the same,
223 | and it is only question of which one I prefer to learn or is there some distinct
224 | advantage over a DSL such as EBNF? Perhaps it is easier to describe more complex
225 | grammars b/c I can make my own helper functions, or something?**
226 |
227 | In general, parser combinators such as `parsesso` are for creating top-down
228 | (i.e. LL) parsers, with the ability to reuse common code (this lib). Parser
229 | Generators typically generate a finite state automaton for a bottom-up (LR)
230 | parser. Though nowadays there are also combinators for LR grammars and
231 | generators for LL ones (e.g. ANTLR). Which one you should use, depends on how
232 | hard your grammar is, and how fast the parser needs to be. Especially if the
233 | grammar has lot of non-trivial ambiguities then it might be easier with the more
234 | flexible combinators approach.
235 |
236 | ## Contributors
237 |
238 | - [Michiel Borkent](https://github.com/borkdude)
239 | + Compatibility with babashka.
240 | + Github CI configuration.
241 | + Clj-kondo configuration tips.
242 | - [Jakub Holý](https://github.com/holyjak)
243 | + Questions and answers in FAQ.
244 |
--------------------------------------------------------------------------------
/src/strojure/parsesso/parser.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.parser
2 | "Main namespace with parsers and their combinators."
3 | (:refer-clojure :exclude [for])
4 | (:require [strojure.parsesso.impl.char :as char]
5 | [strojure.parsesso.impl.error :as error]
6 | [strojure.parsesso.impl.parser :as parser]
7 | [strojure.parsesso.impl.pos :as pos]
8 | [strojure.parsesso.impl.reply :as reply :include-macros true]
9 | [strojure.parsesso.impl.state :as state])
10 | #?(:clj (:import (clojure.lang ISeq))
11 | :cljs (:require-macros [strojure.parsesso.parser :refer [for do-parser]])))
12 |
13 | #?(:clj (set! *warn-on-reflection* true)
14 | :cljs (set! *warn-on-infer* true))
15 |
16 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
17 |
18 | (def ^{:arglists '([obj])}
19 | render
20 | "Returns string representation of the `obj` in parser error messages."
21 | error/render-object)
22 |
23 | (defmacro do-parser
24 | "Delays the evaluation of a parser that was forward (declare)d and
25 | it has not been defined yet. For use in (def)s of no-arg parsers,
26 | since the parser expression evaluates immediately."
27 | [& body]
28 | `(fn [state# context#]
29 | (parser/go (do ~@body) state# context#)))
30 |
31 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
32 |
33 | ;; ## Parsers ##
34 |
35 | (defn result
36 | "This parser always succeeds with value `x` without consuming any input.
37 |
38 | - Fails: never.
39 | - Consumes: never.
40 | "
41 | [x]
42 | (fn [state context]
43 | (reply/e-ok context state x)))
44 |
45 | (defn fail
46 | "This parser always fails with message `msg` without consuming any input.
47 |
48 | - Fails: always.
49 | - Consumes: never.
50 | "
51 | ([msg]
52 | (fn [state context]
53 | (reply/e-err context (error/message state msg))))
54 | ([]
55 | (fail nil)))
56 |
57 | (defn fail-unexpected
58 | "This parser always fails with an unexpected error message `msg` without
59 | consuming any input.
60 |
61 | - Fails: always.
62 | - Consumes: never.
63 | "
64 | [msg]
65 | (fn [state context]
66 | (reply/e-err context (error/unexpected state (or msg (delay (render msg)))))))
67 |
68 | (defn expecting
69 | "This parser behaves as parser `p`, but whenever the parser `p` fails _without
70 | consuming any input_, it replaces expect error messages with the expect error
71 | message `msg`.
72 |
73 | This is normally used at the end of a set alternatives where we want to return
74 | an error message in terms of a higher level construct rather than returning
75 | all possible characters. For example, if the `expr` parser from the [[maybe]]
76 | example would fail, the error message is: '...: expecting expression'. Without
77 | the [[expecting]] combinator, the message would be like '...: expecting
78 | \"let\" or alphabetic character', which is less friendly.
79 |
80 | The parsers [[fail]], [[fail-unexpected]] and [[expecting]] are the three
81 | parsers used to generate error messages. Of these, only [[expecting]] is
82 | commonly used.
83 | "
84 | [p msg]
85 | (fn [state context]
86 | (letfn [(e-err [e] (reply/e-err context (error/expecting e msg)))]
87 | (parser/go p state (reply/assign context {reply/e-err e-err})))))
88 |
89 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
90 |
91 | (defn bind
92 | "This parser applies parser `p` and then parser `(f x)` where x is a return
93 | value of the parser `p`.
94 |
95 | - Fails: when any of parsers `p` or `(f x)` fails.
96 | - Consumes: when any of parsers `p` or `(f x)` consumes some input.
97 | "
98 | [p f]
99 | (fn [state context]
100 | (letfn [(c-ok-p [s x]
101 | ;; - if (f x) doesn't consume input, but is okay, we still return in the consumed
102 | ;; continuation
103 | ;; - if (f x) doesn't consume input, but errors, we return the error in the
104 | ;; 'consumed-err' continuation
105 | (parser/go (f x) s (reply/assign context {reply/e-ok (partial reply/c-ok context)
106 | reply/e-err (partial reply/c-err context)})))
107 | (e-ok-p [s x]
108 | ;; - in these cases, (f x) can return as empty
109 | (parser/go (f x) s context))]
110 | (parser/go p state (reply/assign context {reply/c-ok c-ok-p
111 | reply/e-ok e-ok-p})))))
112 |
113 | (defmacro for
114 | "Expands into nested bind forms and a function body.
115 |
116 | The pattern:
117 |
118 | (p/bind p (fn [x]
119 | (p/bind q (fn [y]
120 | ...
121 | (p/result (f x y ...))))))
122 |
123 | can be more conveniently be written as:
124 |
125 | (p/for [x p
126 | y q
127 | ...]
128 | (p/result (f x y ...)))
129 | "
130 | [[& bindings] & body]
131 | (let [[sym p :as pair] (take 2 bindings)]
132 | (assert (= 2 (count pair)) "Requires an even number of forms in bindings")
133 | (assert (some? body) "Requires some body")
134 | (if (= 2 (count bindings))
135 | `(bind ~p (fn [~sym] ~@body))
136 | `(bind ~p (fn [~sym] (for ~(drop 2 bindings) ~@body))))))
137 |
138 | (defn after
139 | "This parser tries to apply the parsers in order, until last of them succeeds.
140 | Returns the value of the last parser, discards result of all preceding
141 | parsers.
142 |
143 | - Fails: when any of tried parsers fails.
144 | - Consumes: when any of tried parsers consumes some input.
145 | "
146 | ([q p]
147 | (bind q (fn [_] p)))
148 | ([q qq p]
149 | (->> p (after (after q qq))))
150 | ([q qq qqq & more]
151 | (reduce after (list* q qq qqq more))))
152 |
153 | (defn value
154 | "This parser applies series of functions to the result value of the parser `p`.
155 |
156 | - Fails: when `p` fails.
157 | - Consumes: when `p` consumes some input.
158 | "
159 | ([p f]
160 | (bind p (fn [x] (result (f x)))))
161 | ([p f g]
162 | (bind p (fn [x] (result (g (f x))))))
163 | ([p f g h]
164 | (bind p (fn [x] (result (h (g (f x)))))))
165 | ([p f g h & more]
166 | (bind p (fn [x] (result (reduce #(%2 %1) x (list* f g h more)))))))
167 |
168 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
169 |
170 | (defn maybe
171 | "This parser behaves like parser `p`, except that it pretends that it hasn't
172 | consumed any input when an error occurs.
173 |
174 | - Fails: when `p` fails.
175 | - Consumes: when `p` succeeds and consumes some input.
176 |
177 | This combinator is used whenever arbitrary look ahead is needed. Since it
178 | pretends that it hasn't consumed any input when `p` fails, the [[alt]]
179 | combinator will try its second alternative even when the first parser failed
180 | while consuming input.
181 |
182 | The [[maybe]] combinator can for example be used to distinguish identifiers
183 | and reserved words. Both reserved words and identifiers are a sequence of
184 | letters. Whenever we expect a certain reserved word where we can also expect
185 | an identifier we have to use the [[maybe]] combinator. Suppose we write:
186 |
187 | (def identifier
188 | (p/+many char/letter?))
189 |
190 | (def let-expr
191 | (p/after (p/word \"let\")
192 | ...))
193 |
194 | (def expr
195 | (-> (p/alt let-expr
196 | identifier)
197 | (p/expecting \"expression\"))
198 |
199 | If the user writes \"lexical\", the parser fails with: `unexpected \"x\",
200 | expecting \"t\" of (word \"let\")`. Indeed, since the [[alt]] combinator only
201 | tries alternatives when the first alternative hasn't consumed input, the
202 | `identifier` parser is never tried (because the prefix \"le\" of the `(p/word
203 | \"let\")` parser is already consumed). The right behaviour can be obtained by
204 | adding the [[maybe]] combinator:
205 |
206 | (def let-expr
207 | (p/after (p/maybe (p/word \"let\"))
208 | ...))
209 | "
210 | [p]
211 | (fn [state context]
212 | (parser/go p state (reply/assign context {reply/c-err (partial reply/e-err context)}))))
213 |
214 | (defn look-ahead
215 | "Parses `p` without consuming any input. If `p` fails and consumes some input,
216 | so does [[look-ahead]]. Combine with [[maybe]] if this is undesirable.
217 |
218 | - Fails: when `p` fails.
219 | - Consumes: when `p` fails and consumes some input.
220 | "
221 | [p]
222 | (fn [state context]
223 | (letfn [(e-ok [_ x] (reply/e-ok context state x))]
224 | (parser/go p state (reply/assign context {reply/c-ok e-ok,
225 | reply/e-ok e-ok})))))
226 |
227 | (letfn
228 | [(not-followed-by* [q]
229 | (fn [x]
230 | (fn [state context]
231 | (letfn [(e-ok [_ _] (reply/e-err context (if-let [input (seq (state/input state))]
232 | (error/unexpected state (delay (render (first input))))
233 | (error/sys-unexpected-eof state))))
234 | (e-err [_] (reply/e-ok context state x))]
235 | (parser/go q state (reply/assign context {reply/c-ok e-ok
236 | reply/e-ok e-ok
237 | reply/c-err e-err
238 | reply/e-err e-err}))))))]
239 | (defn not-followed-by
240 | "This parser behaves like parser `p`, except that it only succeeds when parser
241 | `q` fails. This parser can be used to implement the 'longest match' rule. For
242 | example, when recognizing keywords (for example `let`), we want to make sure
243 | that a keyword is not followed by a legal identifier character, in which case
244 | the keyword is actually an identifier (for example `lets`). We can write this
245 | behaviour as follows:
246 |
247 | (-> (p/word \"let\")
248 | (p/not-followed-by char/letter-or-number?))
249 |
250 | - Fails:
251 | - when `p` fails.
252 | - when `q` succeeds.
253 | - Consumes:
254 | - when `p` consumes some input.
255 | "
256 | ([p q]
257 | (bind p (not-followed-by* q)))
258 | ([q]
259 | ((not-followed-by* q) nil))))
260 |
261 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
262 |
263 | (defn *many
264 | "This parser applies the parser `p` _zero_ or more times. Returns a sequence
265 | of the returned values or `p`.
266 |
267 | - Fails: when `p` fails and consumes some input.
268 | - Consumes: when `p` consumes some input.
269 |
270 | Example:
271 |
272 | (def identifier
273 | (p/for [c char/letter?
274 | cs (p/*many (p/alt char/letter-or-number?
275 | (char/is \"_\")))]
276 | (p/result (cons c cs))))
277 | "
278 | [p]
279 | (fn [state context]
280 | (letfn [(walk [xs s x]
281 | (let [xs (conj! xs x)
282 | e-err (fn [_] (reply/c-ok context s (seq (persistent! xs))))]
283 | (parser/go p s (reply/assign context {reply/c-ok (partial walk xs)
284 | reply/e-ok parser/e-ok-throw-empty-input
285 | reply/e-err e-err}))))]
286 | (parser/go p state (reply/assign context {reply/c-ok (partial walk (transient []))
287 | reply/e-ok parser/e-ok-throw-empty-input
288 | reply/e-err (fn [_] (reply/e-ok context state nil))})))))
289 |
290 | (defn +many
291 | "This parser applies the parser `p` _one_ or more times. Returns a sequence of
292 | the returned values of `p`.
293 |
294 | - Fails: when `p` does not succeed at least once.
295 | - Consumes: when `p` consumes some input.
296 |
297 | Example:
298 |
299 | (def word
300 | (p/+many char/letter?)
301 | "
302 | [p]
303 | (for [x p, xs (*many p)]
304 | (result (cons x xs))))
305 |
306 | (defn *skip
307 | "This parser applies the parser `p` _zero_ or more times, skipping its result.
308 |
309 | - Fails: when `p` fails and consumes some input.
310 | - Consumes: when `p` consumes some input.
311 |
312 | Example:
313 |
314 | (def spaces
315 | (p/*skip char/white?))
316 | "
317 | [p]
318 | (fn [state context]
319 | (letfn [(c-ok [s _]
320 | (parser/go p s (reply/assign context {reply/c-ok c-ok
321 | reply/e-ok parser/e-ok-throw-empty-input
322 | reply/e-err (fn [_] (reply/c-ok context s nil))})))]
323 | (parser/go p state (reply/assign context {reply/c-ok c-ok
324 | reply/e-ok parser/e-ok-throw-empty-input
325 | reply/e-err (fn [_] (reply/e-ok context state nil))})))))
326 |
327 | (defn +skip
328 | "This parser applies the parser `p` _one_ or more times, skipping its result.
329 |
330 | - Fails: when `p` does not succeed at least once.
331 | - Consumes: when `p` consumes some input.
332 | "
333 | [p]
334 | (after p (*skip p)))
335 |
336 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
337 |
338 | (defn token
339 | "This parser accepts a token when `(pred token)` returns logical true, and
340 | optional expecting `msg`. 1-arity behaves as `pred` and can be used in
341 | predicate composition.
342 |
343 | - Fails: when `(pred token)` return logical false.
344 | - Consumes: when succeeds.
345 | "
346 | {:inline (fn [pred] `(token ~pred nil)) :inline-arities #{1}}
347 | ([pred] (token pred nil))
348 | ([pred msg]
349 | (fn
350 | ;; Predicate behaviour.
351 | ([tok] (pred tok))
352 | ;; Parser behaviour.
353 | ([state context]
354 | (if-let [input (-> ^ISeq (state/input state) #?(:bb seq :clj .seq :cljs -seq :default seq))]
355 | (let [tok (#?(:bb first :clj .first :cljs -first :default first) input)]
356 | (if (pred tok)
357 | (reply/c-ok context (state/next-state state tok) tok)
358 | (reply/e-err context (cond-> (error/sys-unexpected state (delay (render tok)))
359 | msg (error/expecting msg)))))
360 | (reply/e-err context (cond-> (error/sys-unexpected-eof state)
361 | msg (error/expecting msg))))))))
362 |
363 | (defn token-not
364 | "This parser accepts a token when `(pred token)` returns logical false, and
365 | optional expecting `msg`. 1-arity behaves as `(complement pred)` and can be
366 | used in predicate composition.
367 |
368 | - Fails: when `(pred token)` return logical true.
369 | - Consumes: when succeeds.
370 | "
371 | ([pred]
372 | (token (complement pred)))
373 | ([pred msg]
374 | (token (complement pred) msg)))
375 |
376 | (defn register-word-test
377 | "Associates keyword `k` with test-fn of the [[word]] parser."
378 | [k, f]
379 | (parser/register-word-test-fn k f))
380 |
381 | (register-word-test :default =)
382 | (register-word-test :ic char/equals-ignorecase)
383 |
384 | (defn word
385 | "Parses a sequence of tokens given by `ts` and returns `ts`. The optional
386 | function `(test-fn word-token input-token)` is used to match tokens
387 | differently than simple equality. The `test-fn` can be referred by keyword
388 | registered using [[register-word-test]]. There are two predefined keywords
389 | registered: `:default` for `=` and `:ic` for case insensitive char comparison.
390 |
391 | - Fails: when any of tokens don't match the input.
392 | - Consumes: when at least first token matches the input.
393 |
394 | Example:
395 |
396 | (def let-keyword (p/word \"let\"))
397 |
398 | (def let-keyword-ignorecase (p/word \"let\" :ic))
399 | "
400 | {:inline (fn [tokens] `(word ~tokens =)) :inline-arities #{1}}
401 | ([tokens] (word tokens =))
402 | ([tokens, test-fn]
403 | (let [test-fn (cond-> test-fn (keyword? test-fn) (parser/word-test-fn))]
404 | (fn [state context]
405 | (if-let [ws (seq tokens)]
406 | (loop [^ISeq ws ws
407 | ^ISeq input (seq (state/input state))
408 | reply-err reply/e-err]
409 | (cond
410 | (not ws)
411 | (let [new-pos (reduce pos/next-pos (state/pos state) tokens)
412 | new-state (state/set-input-pos state input new-pos)]
413 | (reply/c-ok context new-state tokens))
414 | (not input)
415 | (reply-err context (-> (error/sys-unexpected-eof state)
416 | (error/expecting (delay (render tokens)))))
417 | :else
418 | (let [w (#?(:bb first :clj .first :cljs -first :default first) ws)
419 | t (#?(:bb first :clj .first :cljs -first :default first) input)]
420 | (if (test-fn w t)
421 | (recur (#?(:bb next :clj .next :cljs -next :default next) ws)
422 | (#?(:bb next :clj .next :cljs -next :default next) input)
423 | reply/c-err)
424 | (reply-err context (-> (error/sys-unexpected state (delay (render t)))
425 | (error/expecting (delay (render tokens)))))))))
426 | (reply/e-ok context state tokens))))))
427 |
428 | (def any-token
429 | "This parser accepts any kind of token. Returns the accepted token.
430 |
431 | - Fails: at the end of input.
432 | - Consumes: when succeeds.
433 | "
434 | (token any?))
435 |
436 | (def ^{:arglists '([] [x])}
437 | eof
438 | "This parser only succeeds with value `x` at the end of the input.
439 |
440 | - Fails: when input is not completely consumed.
441 | - Consumes: never.
442 | "
443 | (letfn [(eof* [x]
444 | (fn
445 | ([] eof)
446 | ([x] (eof* x))
447 | ([state context]
448 | (if-let [input (seq (state/input state))]
449 | (reply/e-err context (-> (error/unexpected state (delay (render (first input))))
450 | (error/expecting "end of input")))
451 | (reply/e-ok context state x)))))]
452 | (eof* nil)))
453 |
454 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
455 |
456 | ;; ## Combinators ##
457 |
458 | (defn group*
459 | "This parser tries to apply parsers of `ps` in order until all of them
460 | succeeds. Returns a sequence of values returned by every parser.
461 |
462 | - Fails: when any of tried parsers fails.
463 | - Consumes: when any of tried parsers consumes some input.
464 | "
465 | [ps]
466 | (if-let [p (first ps)]
467 | (for [x p, xs (group* (rest ps))]
468 | (result (cons x xs)))
469 | (result nil)))
470 |
471 | (defn group
472 | "This parser tries to apply parsers in order until all of them succeeds.
473 | Returns a sequence of values returned by every parser.
474 |
475 | - Fails: when any of tried parsers fails.
476 | - Consumes: when any of tried parsers consumes some input.
477 | "
478 | [p q & ps]
479 | (group* (cons p (cons q ps))))
480 |
481 | (defn alt
482 | "This parser tries to apply the parsers in order, until one of them succeeds.
483 | Returns the value of the succeeding parser.
484 |
485 | - Fails:
486 | - when any of tried parsers fails consuming some input.
487 | - when all tried parsers fail without consuming any input.
488 | - Consumes:
489 | - when any of tried parsers consumes some input.
490 |
491 | The parser first applies `p`. If it succeeds, the value of `p` is returned. If
492 | `p` fails _without consuming any input_, parser `q` is tried and so on.
493 |
494 | The parser is called _predictive_ since `q` is only tried when parser `p`
495 | didn't consume any input (i.e. the look ahead is 1). This non-backtracking
496 | behaviour allows for both an efficient implementation of the parser
497 | combinators and the generation of good error messages.
498 | "
499 | ([p q]
500 | (fn [state context]
501 | (letfn [(e-err-p [e]
502 | (letfn [(e-ok-q [s x] (reply/e-ok context s x))
503 | (e-err-q [ee] (reply/e-err context (error/merge-errors e ee)))]
504 | (parser/go q state (reply/assign context {reply/e-ok e-ok-q
505 | reply/e-err e-err-q}))))]
506 | (parser/go p state (reply/assign context {reply/e-err e-err-p})))))
507 | ([p q qq]
508 | (-> p (alt q) (alt qq)))
509 | ([p q qq & more]
510 | (reduce alt (list* p q qq more))))
511 |
512 | (defn option
513 | "This parser tries to apply parser `p`. If `p` fails without consuming input,
514 | it returns the value `x` (or `nil`), otherwise the value returned by `p`.
515 |
516 | - Fails: when `p` fails and consumes come input.
517 | - Consumes: when `p` consumes some input.
518 | "
519 | ([p] (option p nil))
520 | ([p x]
521 | (alt p (result x))))
522 |
523 | (defn between
524 | "Parses `open`, followed by `p` and `close`. Returns the value returned by `p`.
525 |
526 | - Fails: when any of parses fail.
527 | - Consumes: in all cases except when `open` fails without consuming any input.
528 |
529 | Example:
530 |
531 | (defn braces [p]
532 | (-> p (p/between (char/is \"{\")
533 | (char/is \"}\"))))
534 | "
535 | ([p around] (between p around around))
536 | ([p open close]
537 | (for [_ open, x p, _ close]
538 | (result x))))
539 |
540 | (defn times
541 | "Parses `n` occurrences of `p`. If `n` is smaller or equal to zero, the parser
542 | equals to `(p/result nil)`. Returns a sequence of `n` values returned by `p`."
543 | [n p]
544 | (group* (repeat n p)))
545 |
546 | (defn *many-till
547 | "This parser applies parser `p` _zero_ or more times until parser `end`
548 | succeeds. Returns a sequence of values returned by `p`.
549 |
550 | - Fails:
551 | - when `p` fails.
552 | - when `end` does not succeed before end of input.
553 | - Consumes:
554 | - when `p` or `end` consumes some input.
555 |
556 | Example:
557 |
558 | (def simple-comment
559 | (p/after (p/word \"\")))))
561 |
562 | Note the overlapping parsers [[any-token]] and `(p/word \"-->\")`, and
563 | therefore the use of the [[maybe]] combinator.
564 | "
565 | [p end]
566 | (letfn [(scan [] (alt (after end (result nil))
567 | (for [x p, xs (scan)]
568 | (result (cons x xs)))))]
569 | (scan)))
570 |
571 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
572 |
573 | (defn +sep-by
574 | "Parses _one_ or more occurrences of `p`, separated by `sep`. Returns a
575 | sequence of values returned by `p`."
576 | [p sep]
577 | (for [x p, xs (*many (after sep p))]
578 | (result (cons x xs))))
579 |
580 | (defn *sep-by
581 | "Parses _zero_ or more occurrences of `p`, separated by `sep`. Returns a
582 | sequence of values returned by `p`.
583 |
584 | (defn comma-sep [p]
585 | (p/*sep-by p (p/after (char/is \",\")
586 | (p/*skip char/white?))))
587 | "
588 | [p sep]
589 | (option (+sep-by p sep)))
590 |
591 | (defn +sep-end-by
592 | "Parses _one_ or more occurrences of `p`, separated and ended by `sep`.
593 | Returns a sequence of values returned by `p`."
594 | [p sep]
595 | (+many (for [x p, _ sep]
596 | (result x))))
597 |
598 | (defn *sep-end-by
599 | "Parses _zero_ or more occurrences of `p`, separated and ended by `sep`.
600 | Returns a sequence of values returned by `p`."
601 | [p sep]
602 | (option (+sep-end-by p sep)))
603 |
604 | (defn +sep-opt-by
605 | "Parses _one_ or more occurrences of `p`, separated and optionally ended by
606 | `sep`. Returns a sequence of values returned by `p`."
607 | [p sep]
608 | (for [x p]
609 | (alt (for [_ sep, xs (option (+sep-opt-by p sep))]
610 | (result (cons x xs)))
611 | (result [x]))))
612 |
613 | (defn *sep-opt-by
614 | "Parses _zero_ or more occurrences of `p`, separated and optionally ended by
615 | `sep`. Returns a sequence of values returned by `p`."
616 | [p sep]
617 | (option (+sep-opt-by p sep)))
618 |
619 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
620 |
621 | ;; ## Parser state combinators ##
622 |
623 | (defn get-state
624 | "This parser returns the parser state field `:input`, `:pos` or `:user`.
625 | Without `field` it returns the parser state record itself."
626 | {:arglists '([] [:input] [:pos] [:user])}
627 | ([]
628 | (fn [state context]
629 | (reply/e-ok context state state)))
630 | ([field]
631 | (fn [state context]
632 | (reply/e-ok context state (field state)))))
633 |
634 | (defn update-state
635 | "This parser applies function `f` to the parser state field `:input`, `:pos`
636 | or `:user` and returns modified value. Without `field` it applies `f` to the
637 | parser state record itself. Suppose that we want to count identifiers in a
638 | source, we could use the user state as:
639 |
640 | (p/for [x identifier
641 | _ (p/update-state :user inc)]
642 | (p/result x))"
643 | {:arglists '([f] [:input, f] [:pos, f] [:user, f])}
644 | ([f]
645 | (fn [state context]
646 | (let [s (f state)]
647 | (reply/e-ok context s s))))
648 | ([field f]
649 | (fn [state context]
650 | (let [v (cond-> (f (field state))
651 | (= :input field) (state/conform-input))]
652 | (reply/e-ok context (assoc state field v) v)))))
653 |
654 | (defn set-state
655 | "This parser sets the parser state field `:input`, `:pos` or `:user` to `x`.
656 | Without `field` it sets the parser state record itself to `state`."
657 | {:arglists '([state] [:input, new-input] [:pos, new-pos] [:user, new-user-state])}
658 | ([state]
659 | (update-state (constantly state)))
660 | ([field x]
661 | (update-state field (constantly x))))
662 |
663 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
664 |
665 | (defn trace
666 | "This parser prints the parser state (position, remaining input and user
667 | state) at the time it is invoked. When `p` is provided it then continues to
668 | apply parser `p`, and if `p` fails will indicate that the label has been
669 | backtracked. It is intended to be used for debugging parsers by inspecting
670 | their intermediate states.
671 |
672 | - Fails: when `p` fails.
673 | - Consumes: when `p` consumes some input.
674 |
675 | Examples:
676 |
677 | (p/parse (p/after (char/is \"aeiou\")
678 | (p/trace \"test-label\"))
679 | \"atest\")
680 |
681 | > test-label: at line 1, column 2
682 | > - input: (\\t \\e \\s \\t)
683 | > - user: nil
684 |
685 | (p/parse (p/after (char/is \"aeiou\")
686 | (p/trace \"test-label\" (char/is \"nope\")))
687 | \"atest\")
688 |
689 | > test-label: at line 1, column 2
690 | > - input: (\\t \\e \\s \\t)
691 | > - user: nil
692 | > test-label: backtracked
693 |
694 | > error at line 1, column 2:
695 | > unexpected \"t\"
696 | > expecting character of \"nope\"
697 | "
698 | ([label]
699 | (fn [state context]
700 | (println (str label ": at " (state/pos state)
701 | "\n - input: " (pr-str (take 20 (state/input state)))
702 | "\n - user: " (pr-str (state/user state))))
703 | (reply/e-ok context state nil)))
704 | ([label p]
705 | (after (trace label)
706 | (alt p, (do-parser (println (str label ": backtracked"))
707 | (fail))))))
708 |
709 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
710 |
711 | (defn parse*
712 | "Executes parser `p` given `input` sequence of tokens, returns reply record.
713 | See [[parse]] for available `opts`."
714 | ([p input]
715 | (parser/run p (state/init-state input (pos/init-pos nil input) nil)))
716 | ([p input opts]
717 | (parser/run p (state/init-state input (pos/init-pos opts input) (:user-state opts)))))
718 |
719 | (defn parse
720 | "Executes parser `p` given `input` sequence of tokens, returns result value or
721 | throws exception on parsing error.
722 |
723 | Options:
724 |
725 | - `:pos` − The instance of InputPos or keyword for `pos/init-pos` to init
726 | parser pos. By default, pos is initialized to TextPos for string
727 | input or first token of char type, or IndexPos otherwise.
728 |
729 | - TextPos options:
730 | - `:tab` − tab size, default: 8.
731 | - `:line` − line number, default: 1.
732 | - `:col` − column number, default: 1.
733 |
734 | - `:user-state` − Initial value of user state.
735 | "
736 | {:arglists '([p input]
737 | [p input {:keys [pos user-state] :as options}]
738 | [p input {:keys [tab line col user-state] :as options}])}
739 | ([p input]
740 | (-> (parse* p input) (reply/value)))
741 | ([p input opts]
742 | (-> (parse* p input opts) (reply/value))))
743 |
744 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
745 |
--------------------------------------------------------------------------------
/test/strojure/parsesso/parser_test.cljc:
--------------------------------------------------------------------------------
1 | (ns strojure.parsesso.parser-test
2 | (:require [clojure.string :as string]
3 | [clojure.test :as test :refer [deftest testing]]
4 | [strojure.parsesso.parser :as p]))
5 |
6 | #_(test/run-tests)
7 |
8 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
9 |
10 | (defn- p
11 | "Parses test input using given parser. Returns custom map with test result."
12 | [parser input]
13 | (let [result (p/parse* parser input)]
14 | (if-let [error (:error result)]
15 | (-> (select-keys result [:consumed])
16 | (assoc :error (-> (str error) (string/split-lines))))
17 | (select-keys result [:consumed :value]))))
18 |
19 | (defn- tok
20 | [& cs]
21 | (p/token (set cs)))
22 |
23 | (defn- fail-consumed
24 | "Returns parser which fails when `p` is successfully consumed."
25 | [parser]
26 | (p/alt (p/for [x parser] (p/fail (str "Test failure after parsing " x)))
27 | parser))
28 |
29 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
30 |
31 | (deftest result-t
32 | (test/are [expr result] (= result expr)
33 |
34 | (p (p/result :A)
35 | [])
36 | {:consumed false, :value :A}
37 |
38 | (p (p/result :A)
39 | [:B])
40 | {:consumed false, :value :A}
41 |
42 | (p (fail-consumed (p/result :A))
43 | [])
44 | {:consumed false, :value :A}
45 |
46 | ))
47 |
48 | (deftest fail-t
49 | (test/are [expr result] (= result expr)
50 |
51 | (p (p/fail "Test failure")
52 | [])
53 | {:consumed false, :error ["error at index 0:"
54 | "Test failure"]}
55 |
56 | (p (p/fail "Test failure")
57 | [:A])
58 | {:consumed false, :error ["error at index 0:"
59 | "Test failure"]}
60 |
61 | (p (p/fail nil)
62 | [])
63 | {:consumed false, :error ["error at index 0:"]}
64 |
65 | (p (p/fail)
66 | [])
67 | {:consumed false, :error ["error at index 0:"]}
68 |
69 | ))
70 |
71 | (deftest fail-unexpected-t
72 | (test/are [expr result] (= result expr)
73 |
74 | (p (p/fail-unexpected "Boom")
75 | [])
76 | {:consumed false, :error ["error at index 0:"
77 | "unexpected Boom"]}
78 |
79 | (p (-> (p/fail-unexpected "Boom")
80 | (p/expecting "description"))
81 | [])
82 | {:consumed false, :error ["error at index 0:"
83 | "unexpected Boom"
84 | "expecting description"]}
85 |
86 | (p (p/fail-unexpected nil)
87 | [])
88 | {:consumed false, :error ["error at index 0:"
89 | "unexpected nil"]}
90 |
91 | ))
92 |
93 | (deftest expecting-t
94 | (test/are [expr result] (= result expr)
95 |
96 | (p (-> (p/fail "Test failure")
97 | (p/expecting "Expect"))
98 | [])
99 | {:consumed false, :error ["error at index 0:"
100 | "expecting Expect"
101 | "Test failure"]}
102 |
103 | (p (-> (p/fail "Test failure")
104 | (p/expecting (delay "Expect")))
105 | [])
106 | {:consumed false, :error ["error at index 0:"
107 | "expecting Expect"
108 | "Test failure"]}
109 |
110 | (p (-> (p/fail "Test failure")
111 | (p/expecting nil))
112 | [])
113 | {:consumed false, :error ["error at index 0:"
114 | "Test failure"]}
115 |
116 | (p (-> (p/fail "Test failure")
117 | (p/expecting "Inner")
118 | (p/expecting "Outer"))
119 | [])
120 | {:consumed false, :error ["error at index 0:"
121 | "expecting Outer"
122 | "Test failure"]}
123 |
124 | (p (-> (p/fail "Test failure")
125 | (p/expecting "Inner")
126 | (p/expecting nil))
127 | [])
128 | {:consumed false, :error ["error at index 0:"
129 | "expecting Inner"
130 | "Test failure"]}
131 |
132 | ))
133 |
134 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
135 |
136 | (deftest bind-t
137 | (test/are [expr result] (= result expr)
138 |
139 | (p (p/bind (tok :A) p/result)
140 | [:A])
141 | {:consumed true, :value :A}
142 |
143 | (p (p/bind (tok :A) (fn [_] (p/fail "Oops")))
144 | [:A])
145 | {:consumed true, :error ["error at index 1:"
146 | "Oops"]}
147 |
148 | (p (p/bind (tok :A) p/result)
149 | [:B])
150 | {:consumed false, :error ["error at index 0:"
151 | "unexpected :B"]}
152 |
153 | (p (p/bind (tok :A) (fn [_] (p/fail "Oops")))
154 | [:B])
155 | {:consumed false, :error ["error at index 0:"
156 | "unexpected :B"]}
157 |
158 | (p (p/bind (tok :A) (fn [_] (tok :B)))
159 | [:A :B])
160 | {:consumed true, :value :B}
161 |
162 | (p (p/bind (tok :A) (fn [_] (tok :B)))
163 | [:B :A])
164 | {:consumed false, :error ["error at index 0:"
165 | "unexpected :B"]}
166 |
167 | (p (p/bind (tok :A) (fn [_] (tok :B)))
168 | [:A :A])
169 | {:consumed true, :error ["error at index 1:"
170 | "unexpected :A"]}
171 |
172 | ))
173 |
174 | (deftest after-t
175 | (test/are [expr result] (= result expr)
176 |
177 | (p (p/after (tok :A) (tok :B))
178 | [:A :B])
179 | {:consumed true, :value :B}
180 |
181 | (p (p/after (tok :A) (tok :B))
182 | [:A :A])
183 | {:consumed true, :error ["error at index 1:"
184 | "unexpected :A"]}
185 |
186 | (p (p/after (tok :A) (tok :B))
187 | [:A])
188 | {:consumed true, :error ["error at index 1:"
189 | "unexpected end of input"]}
190 |
191 | (p (p/after (fail-consumed (tok :A)) (tok :B))
192 | [:A :B])
193 | {:consumed true, :error ["error at index 1:"
194 | "Test failure after parsing :A"]}
195 |
196 | (p (p/after (tok :A) (fail-consumed (tok :B)))
197 | [:A :B])
198 | {:consumed true, :error ["error at index 2:"
199 | "Test failure after parsing :B"]}
200 |
201 | (p (p/after (tok :A) (tok :B) (tok :C))
202 | [:A :B :C])
203 | {:consumed true, :value :C}
204 |
205 | ))
206 |
207 | (deftest value-t
208 | (test/are [expr result] (= result expr)
209 |
210 | (p (p/value (tok :A) name)
211 | [:A])
212 | {:consumed true, :value "A"}
213 |
214 | (p (p/value (p/token number?) inc inc)
215 | [1])
216 | {:consumed true, :value 3}
217 |
218 | (p (p/value (p/token number?) inc inc inc str)
219 | [1])
220 | {:consumed true, :value "4"}
221 |
222 | (p (p/value (tok :A) name)
223 | [:B])
224 | {:consumed false, :error ["error at index 0:"
225 | "unexpected :B"]}
226 |
227 | (p (p/value (tok :A) name)
228 | [])
229 | {:consumed false, :error ["error at index 0:"
230 | "unexpected end of input"]}
231 |
232 | (p (p/value (fail-consumed (tok :A)) name)
233 | [:A])
234 | {:consumed true, :error ["error at index 1:"
235 | "Test failure after parsing :A"]}
236 |
237 | (p (p/value (fail-consumed (tok :A)) name)
238 | [:B])
239 | {:consumed false, :error ["error at index 0:"
240 | "unexpected :B"]}
241 |
242 | (p (p/value (fail-consumed (tok :A)) name)
243 | [])
244 | {:consumed false, :error ["error at index 0:"
245 | "unexpected end of input"]}
246 |
247 | ))
248 |
249 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
250 |
251 | (deftest maybe-t
252 | (test/are [expr result] (= result expr)
253 |
254 | (p (p/maybe (tok :A))
255 | [:A])
256 | {:consumed true, :value :A}
257 |
258 | (p (p/maybe (tok :A))
259 | [:B])
260 | {:consumed false, :error ["error at index 0:"
261 | "unexpected :B"]}
262 |
263 | (p (p/maybe (tok :A))
264 | [])
265 | {:consumed false, :error ["error at index 0:"
266 | "unexpected end of input"]}
267 |
268 | (p (p/maybe (fail-consumed (tok :A)))
269 | [:A])
270 | {:consumed false, :error ["error at index 1:"
271 | "Test failure after parsing :A"]}
272 |
273 | (p (p/maybe (fail-consumed (tok :A)))
274 | [:B])
275 | {:consumed false, :error ["error at index 0:"
276 | "unexpected :B"]}
277 |
278 | (p (p/maybe (fail-consumed (tok :A)))
279 | [])
280 | {:consumed false, :error ["error at index 0:"
281 | "unexpected end of input"]}
282 |
283 | ))
284 |
285 | (deftest look-ahead-t
286 | (test/are [expr result] (= result expr)
287 |
288 | (p (p/look-ahead (tok :A))
289 | [:A])
290 | {:consumed false, :value :A}
291 |
292 | (p (p/look-ahead (tok :A))
293 | [:B])
294 | {:consumed false, :error ["error at index 0:"
295 | "unexpected :B"]}
296 |
297 | (p (p/look-ahead (tok :A))
298 | [])
299 | {:consumed false, :error ["error at index 0:"
300 | "unexpected end of input"]}
301 |
302 | (p (p/look-ahead (fail-consumed (tok :A)))
303 | [:A])
304 | {:consumed true, :error ["error at index 1:"
305 | "Test failure after parsing :A"]}
306 |
307 | (p (p/look-ahead (fail-consumed (tok :A)))
308 | [:B])
309 | {:consumed false, :error ["error at index 0:"
310 | "unexpected :B"]}
311 |
312 | (p (p/look-ahead (fail-consumed (tok :A)))
313 | [])
314 | {:consumed false, :error ["error at index 0:"
315 | "unexpected end of input"]}
316 |
317 | ))
318 |
319 | (deftest not-followed-by-t
320 | (testing "not-followed-by [p q]"
321 | (test/are [expr result] (= result expr)
322 |
323 | (p (p/not-followed-by (p/result :X)
324 | (tok :A))
325 | [:B])
326 | {:consumed false, :value :X}
327 |
328 | (p (p/not-followed-by (tok :X)
329 | (tok :A))
330 | [:X :B])
331 | {:consumed true, :value :X}
332 |
333 | (p (p/not-followed-by (p/result :X)
334 | (p/after (tok :A) (tok :B)))
335 | [:A :A])
336 | {:consumed false, :value :X}
337 |
338 | (p (p/not-followed-by (tok :X)
339 | (p/after (tok :A) (tok :B)))
340 | [:X :A :A])
341 | {:consumed true, :value :X}
342 |
343 | (p (p/not-followed-by (p/result :X)
344 | (tok :A))
345 | [])
346 | {:consumed false, :value :X}
347 |
348 | (p (p/not-followed-by (tok :X)
349 | (tok :A))
350 | [:X])
351 | {:consumed true, :value :X}
352 |
353 | (p (p/not-followed-by (p/result :X)
354 | p/any-token)
355 | [])
356 | {:consumed false, :value :X}
357 |
358 | (p (p/not-followed-by (tok :X)
359 | p/any-token)
360 | [:X])
361 | {:consumed true, :value :X}
362 |
363 | (p (p/not-followed-by (p/result :X)
364 | (tok :A))
365 | [:A])
366 | {:consumed false, :error ["error at index 0:"
367 | "unexpected :A"]}
368 |
369 | (p (p/not-followed-by (tok :X)
370 | (tok :A))
371 | [:X :A])
372 | {:consumed true, :error ["error at index 1:"
373 | "unexpected :A"]}
374 |
375 | (p (p/not-followed-by (p/result :X)
376 | (p/after (tok :A) (tok :B)))
377 | [:A :B])
378 | {:consumed false, :error ["error at index 0:"
379 | "unexpected :A"]}
380 |
381 | (p (p/not-followed-by (tok :X)
382 | (p/after (tok :A) (tok :B)))
383 | [:X :A :B])
384 | {:consumed true, :error ["error at index 1:"
385 | "unexpected :A"]}
386 |
387 | (p (p/not-followed-by (p/result :X)
388 | p/any-token)
389 | [:A])
390 | {:consumed false, :error ["error at index 0:"
391 | "unexpected :A"]}
392 |
393 | (p (p/not-followed-by (tok :X)
394 | p/any-token)
395 | [:X :A])
396 | {:consumed true, :error ["error at index 1:"
397 | "unexpected :A"]}
398 |
399 | (p (p/not-followed-by (p/result :X)
400 | (p/eof))
401 | [])
402 | {:consumed false, :error ["error at index 0:"
403 | "unexpected end of input"]}
404 |
405 | (p (p/not-followed-by (tok :X)
406 | (p/eof))
407 | [:X])
408 | {:consumed true, :error ["error at index 1:"
409 | "unexpected end of input"]}
410 |
411 | ))
412 |
413 | (testing "not-followed-by [q]"
414 | (test/are [expr result] (= result expr)
415 |
416 | (p (p/not-followed-by (tok :A))
417 | [:B])
418 | {:consumed false, :value nil}
419 |
420 | (p (p/not-followed-by (p/after (tok :A) (tok :B)))
421 | [:A :A])
422 | {:consumed false, :value nil}
423 |
424 | (p (p/not-followed-by (tok :A))
425 | [])
426 | {:consumed false, :value nil}
427 |
428 | (p (p/not-followed-by p/any-token)
429 | [])
430 | {:consumed false, :value nil}
431 |
432 | (p (p/not-followed-by (tok :A))
433 | [:A])
434 | {:consumed false, :error ["error at index 0:"
435 | "unexpected :A"]}
436 |
437 | (p (p/not-followed-by (p/after (tok :A) (tok :B)))
438 | [:A :B])
439 | {:consumed false, :error ["error at index 0:"
440 | "unexpected :A"]}
441 |
442 | (p (p/not-followed-by p/any-token)
443 | [:A])
444 | {:consumed false, :error ["error at index 0:"
445 | "unexpected :A"]}
446 |
447 | (p (p/not-followed-by p/eof)
448 | [])
449 | {:consumed false, :error ["error at index 0:"
450 | "unexpected end of input"]}
451 |
452 | )))
453 |
454 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
455 |
456 | (deftest *many-t
457 | (test/are [expr result] (= result expr)
458 |
459 | (p (p/*many (tok :A :B :C))
460 | [:A :B :C :D :E :F])
461 | {:consumed true, :value [:A :B :C]}
462 |
463 | (p (p/*many (fail-consumed (tok :A :B :C)))
464 | [:A :B :C :D :E :F])
465 | {:consumed true, :error ["error at index 1:"
466 | "Test failure after parsing :A"]}
467 |
468 | (p (p/*many (tok :D :E :F))
469 | [:A :B :C :D :E :F])
470 | {:consumed false, :value nil}
471 |
472 | (p (p/*many (tok :A :B :C))
473 | [])
474 | {:consumed false, :value nil}
475 |
476 | (p (p/*many (tok :A))
477 | (repeat 10000 :A))
478 | {:consumed true, :value (repeat 10000 :A)}
479 |
480 | ))
481 |
482 | (deftest +many-t
483 | (test/are [expr result] (= result expr)
484 |
485 | (p (p/+many (tok :A :B :C))
486 | [:A :B :C :D :E :F])
487 | {:consumed true, :value [:A :B :C]}
488 |
489 | (p (p/+many (tok :D :E :F))
490 | [:A :B :C :D :E :F])
491 | {:consumed false, :error ["error at index 0:"
492 | "unexpected :A"]}
493 |
494 | (p (p/+many (tok :A :B :C))
495 | [])
496 | {:consumed false, :error ["error at index 0:"
497 | "unexpected end of input"]}
498 |
499 | (p (p/+many (tok :A))
500 | (repeat 10000 :A))
501 | {:consumed true, :value (repeat 10000 :A)}
502 |
503 | ))
504 |
505 | (deftest *skip-t
506 | (test/are [expr result] (= result expr)
507 |
508 | (p (p/*skip (tok :A))
509 | [:A :A :A :B :B :B])
510 | {:consumed true, :value nil}
511 |
512 | (p (p/*skip (fail-consumed (tok :A)))
513 | [:A :A :A :B :B :B])
514 | {:consumed true, :error ["error at index 1:"
515 | "Test failure after parsing :A"]}
516 |
517 | (p (p/*skip (tok :A))
518 | [:B :B :B])
519 | {:consumed false, :value nil}
520 |
521 | (p (p/*skip (tok :A))
522 | [])
523 | {:consumed false, :value nil}
524 |
525 | )
526 | )
527 |
528 | (deftest +skip-t
529 | (test/are [expr result] (= result expr)
530 |
531 | (p (p/+skip (tok :A))
532 | [:A :A :A :B :B :B])
533 | {:consumed true, :value nil}
534 |
535 | (p (p/+skip (tok :A))
536 | [:B :B :B])
537 | {:consumed false, :error ["error at index 0:"
538 | "unexpected :B"]}
539 |
540 | (p (p/+skip (tok :A))
541 | [])
542 | {:consumed false, :error ["error at index 0:"
543 | "unexpected end of input"]}
544 |
545 | ))
546 |
547 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
548 |
549 | (deftest token-t
550 | (test/are [expr result] (= result expr)
551 |
552 | (p (p/token #{:A})
553 | [:A])
554 | {:consumed true, :value :A}
555 |
556 | (p (p/token #{:A})
557 | [:B])
558 | {:consumed false, :error ["error at index 0:"
559 | "unexpected :B"]}
560 |
561 | (p (p/token #{:A})
562 | [])
563 | {:consumed false, :error ["error at index 0:"
564 | "unexpected end of input"]}
565 |
566 | (p (fail-consumed (p/token #{:A}))
567 | [:A])
568 | {:consumed true, :error ["error at index 1:"
569 | "Test failure after parsing :A"]}
570 |
571 | (p (fail-consumed (p/token #{:A}))
572 | [:B])
573 | {:consumed false, :error ["error at index 0:"
574 | "unexpected :B"]}
575 |
576 | (p (fail-consumed (p/token #{:A}))
577 | [])
578 | {:consumed false, :error ["error at index 0:"
579 | "unexpected end of input"]}
580 |
581 | ))
582 |
583 | (deftest token-not-t
584 | (test/are [expr result] (= result expr)
585 |
586 | (p (p/token-not #{:A})
587 | [:B])
588 | {:consumed true, :value :B}
589 |
590 | (p (p/token-not #{:A})
591 | [:A])
592 | {:consumed false, :error ["error at index 0:"
593 | "unexpected :A"]}
594 |
595 | (p (p/token-not #{:A})
596 | [])
597 | {:consumed false, :error ["error at index 0:"
598 | "unexpected end of input"]}
599 |
600 | (p (fail-consumed (p/token-not #{:A}))
601 | [:B])
602 | {:consumed true, :error ["error at index 1:"
603 | "Test failure after parsing :B"]}
604 |
605 | (p (fail-consumed (p/token-not #{:A}))
606 | [:A])
607 | {:consumed false, :error ["error at index 0:"
608 | "unexpected :A"]}
609 |
610 | (p (fail-consumed (p/token-not #{:A}))
611 | [])
612 | {:consumed false, :error ["error at index 0:"
613 | "unexpected end of input"]}
614 |
615 | ))
616 |
617 | (deftest word-t
618 | (testing "default matching"
619 | (test/are [expr result] (= result expr)
620 |
621 | (p (p/word [:A :B :C])
622 | [:A :B :C])
623 | {:consumed true, :value [:A :B :C]}
624 |
625 | (p (p/word [:A :B :C])
626 | [:A :B])
627 | {:consumed true, :error ["error at index 0:"
628 | "unexpected end of input"
629 | "expecting [:A :B :C]"]}
630 |
631 | (p (p/word [:A :B :C])
632 | [])
633 | {:consumed false, :error ["error at index 0:"
634 | "unexpected end of input"
635 | "expecting [:A :B :C]"]}
636 |
637 | (p (p/word [:A :B :C])
638 | [:A :B :X])
639 | {:consumed true, :error ["error at index 0:"
640 | "unexpected :X"
641 | "expecting [:A :B :C]"]}
642 |
643 | (p (p/word [:A :B :C])
644 | [:X :Y :Z])
645 | {:consumed false, :error ["error at index 0:"
646 | "unexpected :X"
647 | "expecting [:A :B :C]"]}
648 |
649 | (p (p/word [:ns/A :ns/B :ns/C]
650 | (fn [w t] (= (name w) (name t))))
651 | [:A :B :C])
652 | {:consumed true, :value [:ns/A :ns/B :ns/C]}
653 |
654 | ))
655 |
656 | (testing "case insensitive matching"
657 | (test/are [expr result] (= result expr)
658 |
659 | (p (p/word "abc" :ic)
660 | "abc")
661 | {:consumed true, :value "abc"}
662 |
663 | (p (p/word "abc" :ic)
664 | "ABC")
665 | {:consumed true, :value "abc"}
666 |
667 | (p (p/word "ABC" :ic)
668 | "abc")
669 | {:consumed true, :value "ABC"}
670 |
671 | (p (p/word "abc" :ic)
672 | "abd")
673 | {:consumed true, :error ["error at line 1, column 1:"
674 | "unexpected \"d\""
675 | "expecting \"abc\""]}
676 |
677 | (p (p/word "abc" :ic)
678 | "ab")
679 | {:consumed true, :error ["error at line 1, column 1:"
680 | "unexpected end of input"
681 | "expecting \"abc\""]}
682 |
683 | )))
684 |
685 | (deftest any-token-t
686 | (test/are [expr result] (= result expr)
687 |
688 | (p p/any-token
689 | [:A])
690 | {:consumed true, :value :A}
691 |
692 | (p p/any-token
693 | [])
694 | {:consumed false, :error ["error at index 0:"
695 | "unexpected end of input"]}
696 |
697 | ))
698 |
699 | (deftest eof-t
700 | (test/are [expr result] (= result expr)
701 |
702 | (p p/eof
703 | [])
704 | {:consumed false, :value nil}
705 |
706 | (p (p/eof :ok)
707 | [])
708 | {:consumed false, :value :ok}
709 |
710 | (p p/eof
711 | [:A])
712 | {:consumed false, :error ["error at index 0:"
713 | "unexpected :A"
714 | "expecting end of input"]}
715 |
716 | ))
717 |
718 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
719 |
720 | (deftest group*-t
721 | (test/are [expr result] (= result expr)
722 |
723 | (p (p/group* [(tok :A) (tok :B) (tok :C)])
724 | [:A :B :C])
725 | '{:consumed true, :value (:A :B :C)}
726 |
727 | (p (p/group* [(tok :A) (tok :B) (tok :C)])
728 | [:B :C])
729 | {:consumed false, :error ["error at index 0:"
730 | "unexpected :B"]}
731 |
732 | (p (p/group* [(fail-consumed (tok :A)) (tok :B) (tok :C)])
733 | [:A :B :C])
734 | {:consumed true, :error ["error at index 1:"
735 | "Test failure after parsing :A"]}
736 |
737 | (p (p/group* [])
738 | [:A :B :C])
739 | {:consumed false, :value nil}
740 |
741 | (p (p/group* nil)
742 | [:A :B :C])
743 | {:consumed false, :value nil}
744 |
745 | ))
746 |
747 | (deftest group-t
748 | (test/are [expr result] (= result expr)
749 |
750 | (p (p/group (tok :A) (tok :B) (tok :C))
751 | [:A :B :C])
752 | '{:consumed true, :value (:A :B :C)}
753 |
754 | (p (p/group (tok :A) (tok :B) (tok :C))
755 | [:B :C])
756 | {:consumed false, :error ["error at index 0:"
757 | "unexpected :B"]}
758 |
759 | (p (p/group (fail-consumed (tok :A)) (tok :B) (tok :C))
760 | [:A :B :C])
761 | {:consumed true, :error ["error at index 1:"
762 | "Test failure after parsing :A"]}
763 |
764 | ))
765 |
766 | (deftest alt-t
767 | (test/are [expr result] (= result expr)
768 |
769 | (p (p/alt (tok :A)
770 | (tok :B))
771 | [:A])
772 | {:consumed true, :value :A}
773 |
774 | (p (p/alt (tok :A)
775 | (tok :B))
776 | [:B])
777 | {:consumed true, :value :B}
778 |
779 | (p (p/alt (tok :A)
780 | (tok :B))
781 | [:C])
782 | {:consumed false, :error ["error at index 0:"
783 | "unexpected :C"]}
784 |
785 | (p (p/alt (tok :A)
786 | (tok :B))
787 | [])
788 | {:consumed false, :error ["error at index 0:"
789 | "unexpected end of input"]}
790 |
791 | (p (p/alt (fail-consumed (tok :A))
792 | (tok :B))
793 | [:A])
794 | {:consumed true, :error ["error at index 1:"
795 | "Test failure after parsing :A"]}
796 |
797 | (p (p/alt (fail-consumed (tok :A))
798 | (tok :B))
799 | [:B])
800 | {:consumed true, :value :B}
801 |
802 | (p (p/alt (fail-consumed (tok :A))
803 | (tok :B))
804 | [:C])
805 | {:consumed false, :error ["error at index 0:"
806 | "unexpected :C"]}
807 |
808 | (p (p/alt (fail-consumed (tok :A))
809 | (tok :B))
810 | [])
811 | {:consumed false, :error ["error at index 0:"
812 | "unexpected end of input"]}
813 |
814 | (p (p/alt (tok :A)
815 | (fail-consumed (tok :B)))
816 | [:A])
817 | {:consumed true, :value :A}
818 |
819 | (p (p/alt (tok :A)
820 | (fail-consumed (tok :B)))
821 | [:B])
822 | {:consumed true, :error ["error at index 1:"
823 | "Test failure after parsing :B"]}
824 |
825 | (p (p/alt (tok :A)
826 | (fail-consumed (tok :B)))
827 | [:C])
828 | {:consumed false, :error ["error at index 0:"
829 | "unexpected :C"]}
830 |
831 | (p (p/alt (tok :A)
832 | (fail-consumed (tok :B)))
833 | [])
834 | {:consumed false, :error ["error at index 0:"
835 | "unexpected end of input"]}
836 |
837 | (p (p/alt (p/expecting (tok :A) :A)
838 | (p/expecting (tok :B) :B))
839 | [:C])
840 | {:consumed false, :error ["error at index 0:"
841 | "unexpected :C"
842 | "expecting :A or :B"]}
843 |
844 | ))
845 |
846 | (deftest option-t
847 | (testing "The `option` without default."
848 | (test/are [expr result] (= result expr)
849 |
850 | (p (p/option (tok :A))
851 | [:A])
852 | {:consumed true, :value :A}
853 |
854 | (p (p/option (tok :A))
855 | [:B])
856 | {:consumed false, :value nil}
857 |
858 | (p (p/option (tok :A))
859 | [])
860 | {:consumed false, :value nil}
861 |
862 | (p (p/option (fail-consumed (tok :A)))
863 | [:A])
864 | {:consumed true, :error ["error at index 1:"
865 | "Test failure after parsing :A"]}
866 |
867 | (p (p/option (fail-consumed (tok :A)))
868 | [:B])
869 | {:consumed false, :value nil}
870 |
871 | (p (p/option (fail-consumed (tok :A)))
872 | [])
873 | {:consumed false, :value nil}
874 |
875 | ))
876 |
877 | (testing "The `option` with default value."
878 | (test/are [expr result] (= result expr)
879 |
880 | (p (p/option (tok :A) :X)
881 | [:A])
882 | {:consumed true, :value :A}
883 |
884 | (p (p/option (tok :A) :X)
885 | [:B])
886 | {:consumed false, :value :X}
887 |
888 | (p (p/option (tok :A) :X)
889 | [])
890 | {:consumed false, :value :X}
891 |
892 | (p (p/option (fail-consumed (tok :A)) :X)
893 | [:A])
894 | {:consumed true, :error ["error at index 1:"
895 | "Test failure after parsing :A"]}
896 |
897 | (p (p/option (fail-consumed (tok :A)) :X)
898 | [:B])
899 | {:consumed false, :value :X}
900 |
901 | (p (p/option (fail-consumed (tok :A)) :X)
902 | [])
903 | {:consumed false, :value :X}
904 |
905 | )))
906 |
907 | (deftest between-t
908 | (test/are [expr result] (= result expr)
909 |
910 | (p (p/between (tok :A) (tok :L) (tok :R))
911 | [:L :A :R])
912 | {:consumed true, :value :A}
913 |
914 | (p (p/between (tok :A) (tok :L) (tok :R))
915 | [:R :A :L])
916 | {:consumed false, :error ["error at index 0:"
917 | "unexpected :R"]}
918 |
919 | (p (p/between (tok :A) (tok :L) (tok :R))
920 | [:L :A])
921 | {:consumed true, :error ["error at index 2:"
922 | "unexpected end of input"]}
923 |
924 | (p (p/between (tok :A) (tok :L) (tok :R))
925 | [:A :R])
926 | {:consumed false, :error ["error at index 0:"
927 | "unexpected :A"]}
928 |
929 | (p (p/between (tok :A) (tok :L) (tok :R))
930 | [:A])
931 | {:consumed false, :error ["error at index 0:"
932 | "unexpected :A"]}
933 |
934 | (p (p/between (tok :A) (tok :L) (tok :R))
935 | [])
936 | {:consumed false, :error ["error at index 0:"
937 | "unexpected end of input"]}
938 |
939 | (p (p/between (tok :A) (tok :I))
940 | [:I :A :I])
941 | {:consumed true, :value :A}
942 |
943 | (p (p/between (tok :A) (tok :I))
944 | [:I :A])
945 | {:consumed true, :error ["error at index 2:"
946 | "unexpected end of input"]}
947 |
948 | (p (p/between (tok :A) (tok :I))
949 | [:A :I])
950 | {:consumed false, :error ["error at index 0:"
951 | "unexpected :A"]}
952 |
953 | (p (p/between (tok :A) (tok :I))
954 | [:A])
955 | {:consumed false, :error ["error at index 0:"
956 | "unexpected :A"]}
957 |
958 | (p (p/between (tok :A) (tok :I))
959 | [])
960 | {:consumed false, :error ["error at index 0:"
961 | "unexpected end of input"]}
962 |
963 | ))
964 |
965 | (deftest times-t
966 | (test/are [expr result] (= result expr)
967 |
968 | (p (p/times 3 (tok :A1 :A2 :A3))
969 | [:A1 :A2 :A3])
970 | {:consumed true, :value '(:A1 :A2 :A3)}
971 |
972 | (p (p/times 3 (tok :A1 :A2 :A3))
973 | [:A1 :A2 :A3 :A4])
974 | {:consumed true, :value '(:A1 :A2 :A3)}
975 |
976 | (p (p/times 3 (tok :A1 :A2 :A3))
977 | [:A1 :A2 :A3 :B])
978 | {:consumed true, :value '(:A1 :A2 :A3)}
979 |
980 | (p (p/times 3 (tok :A1 :A2 :A3))
981 | [:A1 :A2])
982 | {:consumed true, :error ["error at index 2:"
983 | "unexpected end of input"]}
984 |
985 | (p (p/times 3 (tok :A1 :A2 :A3))
986 | [:A1 :A2 :B])
987 | {:consumed true, :error ["error at index 2:"
988 | "unexpected :B"]}
989 |
990 | (p (p/times 3 (tok :A1 :A2 :A3))
991 | [:B :A1 :A2 :A3])
992 | {:consumed false, :error ["error at index 0:"
993 | "unexpected :B"]}
994 |
995 | (p (p/times 3 (tok :A1 :A2 :A3))
996 | [:B :A1])
997 | {:consumed false, :error ["error at index 0:"
998 | "unexpected :B"]}
999 |
1000 | (p (p/times 3 (tok :A1 :A2 :A3))
1001 | [])
1002 | {:consumed false, :error ["error at index 0:"
1003 | "unexpected end of input"]}
1004 |
1005 | (p (p/times 0 (tok :A1 :A2 :A3))
1006 | [:A1 :A2 :A3])
1007 | {:consumed false, :value nil}
1008 |
1009 | (p (p/times -3 (tok :A1 :A2 :A3))
1010 | [:A1 :A2 :A3])
1011 | {:consumed false, :value nil}
1012 |
1013 | ))
1014 |
1015 | (deftest *many-till-t
1016 | (test/are [expr result] (= result expr)
1017 |
1018 | (p (p/*many-till (tok :A1 :A2 :A3)
1019 | (tok :END))
1020 | [:A1 :A2 :A3 :END])
1021 | {:consumed true, :value '(:A1 :A2 :A3)}
1022 |
1023 | (p (p/*many-till (tok :A1 :A2 :A3)
1024 | (tok :END))
1025 | [:A1 :A2 :A3 :B :END])
1026 | {:consumed true, :error ["error at index 3:"
1027 | "unexpected :B"]}
1028 |
1029 | (p (p/*many-till (tok :A1 :A2 :A3)
1030 | (tok :END))
1031 | [:B :END])
1032 | {:consumed false, :error ["error at index 0:"
1033 | "unexpected :B"]}
1034 |
1035 | (p (p/*many-till (tok :A1 :A2 :A3)
1036 | (tok :END))
1037 | [:A1 :A2 :A3])
1038 | {:consumed true, :error ["error at index 3:"
1039 | "unexpected end of input"]}
1040 |
1041 | (p (p/*many-till (fail-consumed (tok :A1 :A2 :A3))
1042 | (tok :END))
1043 | [:A1 :A2 :A3 :END])
1044 | {:consumed true, :error ["error at index 1:"
1045 | "Test failure after parsing :A1"]}
1046 |
1047 | (p (p/*many-till (tok :A1 :A2 :A3)
1048 | (tok :END))
1049 | [:END])
1050 | {:consumed true, :value nil}
1051 |
1052 | (p (p/*many-till (p/alt (tok :A1 :A2 :A3)
1053 | (p/*many-till (tok :B1 :B2 :B3)
1054 | (tok :END)))
1055 | (tok :END))
1056 | [:A1 :A2 :A3 :B1 :B2 :B3 :END :A1 :A2 :A3 :END])
1057 | {:consumed true, :value '(:A1 :A2 :A3 (:B1 :B2 :B3) :A1 :A2 :A3)}
1058 |
1059 | (p (p/*many-till (tok :A1 :A2 :A3)
1060 | (tok :END))
1061 | (concat (take 10000 (cycle [:A1 :A2 :A3])) [:END]))
1062 | {:consumed true, :value (take 10000 (cycle [:A1 :A2 :A3]))}
1063 |
1064 | ))
1065 |
1066 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1067 |
1068 | (deftest +sep-by-t
1069 | (test/are [expr result] (= result expr)
1070 |
1071 | (p (p/+sep-by (tok :A) (tok :S))
1072 | [:A :S :A :S :A])
1073 | {:consumed true, :value '(:A :A :A)}
1074 |
1075 | (p (p/+sep-by (tok :A) (tok :S))
1076 | [:A :S :A :S :A :S])
1077 | {:consumed true, :error ["error at index 6:"
1078 | "unexpected end of input"]}
1079 |
1080 | (p (p/+sep-by (tok :A) (tok :S))
1081 | [:A :S :A :S :A :B])
1082 | {:consumed true, :value '(:A :A :A)}
1083 |
1084 | (p (p/+sep-by (tok :A) (tok :S))
1085 | [])
1086 | {:consumed false, :error ["error at index 0:"
1087 | "unexpected end of input"]}
1088 |
1089 | (p (p/+sep-by (tok :A) (tok :S))
1090 | [:B])
1091 | {:consumed false, :error ["error at index 0:"
1092 | "unexpected :B"]}
1093 |
1094 | (p (p/+sep-by (tok :A) (tok :S))
1095 | [:S])
1096 | {:consumed false, :error ["error at index 0:"
1097 | "unexpected :S"]}
1098 |
1099 | ))
1100 |
1101 | (deftest *sep-by-t
1102 | (test/are [expr result] (= result expr)
1103 |
1104 | (p (p/*sep-by (tok :A) (tok :S))
1105 | [:A :S :A :S :A])
1106 | {:consumed true, :value '(:A :A :A)}
1107 |
1108 | (p (p/*sep-by (tok :A) (tok :S))
1109 | [:A :S :A :S :A :S])
1110 | {:consumed true, :error ["error at index 6:"
1111 | "unexpected end of input"]}
1112 |
1113 | (p (p/*sep-by (tok :A) (tok :S))
1114 | [:A :S :A :S :A :B])
1115 | {:consumed true, :value '(:A :A :A)}
1116 |
1117 | (p (p/*sep-by (tok :A) (tok :S))
1118 | [])
1119 | {:consumed false, :value nil}
1120 |
1121 | (p (p/*sep-by (tok :A) (tok :S))
1122 | [:B])
1123 | {:consumed false, :value nil}
1124 |
1125 | (p (p/*sep-by (tok :A) (tok :S))
1126 | [:S])
1127 | {:consumed false, :value nil}
1128 |
1129 | ))
1130 |
1131 | (deftest +sep-end-by-t
1132 | (test/are [expr result] (= result expr)
1133 |
1134 | (p (p/+sep-end-by (tok :A) (tok :S))
1135 | [:A :S :A :S :A :S])
1136 | {:consumed true, :value '(:A :A :A)}
1137 |
1138 | (p (p/+sep-end-by (tok :A) (tok :S))
1139 | [:A :S :A :S :A :S :A])
1140 | {:consumed true, :error ["error at index 7:"
1141 | "unexpected end of input"]}
1142 |
1143 | (p (p/+sep-end-by (tok :A) (tok :S))
1144 | [:A :S :A :S :A :S :B])
1145 | {:consumed true, :value '(:A :A :A)}
1146 |
1147 | (p (p/+sep-end-by (tok :A) (tok :S))
1148 | [:A :S :A :S :A])
1149 | {:consumed true, :error ["error at index 5:"
1150 | "unexpected end of input"]}
1151 |
1152 | (p (p/+sep-end-by (tok :A) (tok :S))
1153 | [:A :S :A :S :A :A])
1154 | {:consumed true, :error ["error at index 5:"
1155 | "unexpected :A"]}
1156 |
1157 | (p (p/+sep-end-by (tok :A) (tok :S))
1158 | [:A :S :A :S :A :B])
1159 | {:consumed true, :error ["error at index 5:"
1160 | "unexpected :B"]}
1161 |
1162 | (p (p/+sep-end-by (tok :A) (tok :S))
1163 | [])
1164 | {:consumed false, :error ["error at index 0:"
1165 | "unexpected end of input"]}
1166 |
1167 | (p (p/+sep-end-by (tok :A) (tok :S))
1168 | [:B])
1169 | {:consumed false, :error ["error at index 0:"
1170 | "unexpected :B"]}
1171 |
1172 | (p (p/+sep-end-by (tok :A) (tok :S))
1173 | [:S])
1174 | {:consumed false, :error ["error at index 0:"
1175 | "unexpected :S"]}
1176 |
1177 | )
1178 | )
1179 |
1180 | (deftest *sep-end-by-t
1181 | (test/are [expr result] (= result expr)
1182 |
1183 | (p (p/*sep-end-by (tok :A) (tok :S))
1184 | [:A :S :A :S :A :S])
1185 | {:consumed true, :value '(:A :A :A)}
1186 |
1187 | (p (p/*sep-end-by (tok :A) (tok :S))
1188 | [:A :S :A :S :A :S :A])
1189 | {:consumed true, :error ["error at index 7:"
1190 | "unexpected end of input"]}
1191 |
1192 | (p (p/*sep-end-by (tok :A) (tok :S))
1193 | [:A :S :A :S :A :S :B])
1194 | {:consumed true, :value '(:A :A :A)}
1195 |
1196 | (p (p/*sep-end-by (tok :A) (tok :S))
1197 | [:A :S :A :S :A])
1198 | {:consumed true, :error ["error at index 5:"
1199 | "unexpected end of input"]}
1200 |
1201 | (p (p/*sep-end-by (tok :A) (tok :S))
1202 | [:A :S :A :S :A :A])
1203 | {:consumed true, :error ["error at index 5:"
1204 | "unexpected :A"]}
1205 |
1206 | (p (p/*sep-end-by (tok :A) (tok :S))
1207 | [:A :S :A :S :A :B])
1208 | {:consumed true, :error ["error at index 5:"
1209 | "unexpected :B"]}
1210 |
1211 | (p (p/*sep-end-by (tok :A) (tok :S))
1212 | [])
1213 | {:consumed false, :value nil}
1214 |
1215 | (p (p/*sep-end-by (tok :A) (tok :S))
1216 | [:B])
1217 | {:consumed false, :value nil}
1218 |
1219 | (p (p/*sep-end-by (tok :A) (tok :S))
1220 | [:S])
1221 | {:consumed false, :value nil}
1222 |
1223 | ))
1224 |
1225 | (deftest +sep-opt-by-t
1226 | (test/are [expr result] (= result expr)
1227 |
1228 | (p (p/+sep-opt-by (tok :A) (tok :S))
1229 | [:A :S :A :S :A :S])
1230 | {:consumed true, :value '(:A :A :A)}
1231 |
1232 | (p (p/+sep-opt-by (tok :A) (tok :S))
1233 | [:A :S :A :S :A :S :A])
1234 | {:consumed true, :value '(:A :A :A :A)}
1235 |
1236 | (p (p/+sep-opt-by (tok :A) (tok :S))
1237 | [:A :S :A :S :A :S :B])
1238 | {:consumed true, :value '(:A :A :A)}
1239 |
1240 | (p (p/+sep-opt-by (tok :A) (tok :S))
1241 | [:A :S :A :S :A])
1242 | {:consumed true, :value '(:A :A :A)}
1243 |
1244 | (p (p/+sep-opt-by (tok :A) (tok :S))
1245 | [:A :S :A :S :A :A])
1246 | {:consumed true, :value '(:A :A :A)}
1247 |
1248 | (p (p/+sep-opt-by (tok :A) (tok :S))
1249 | [:A :S :A :S :A :B])
1250 | {:consumed true, :value '(:A :A :A)}
1251 |
1252 | (p (p/+sep-opt-by (tok :A) (tok :S))
1253 | [])
1254 | {:consumed false, :error ["error at index 0:"
1255 | "unexpected end of input"]}
1256 |
1257 | (p (p/+sep-opt-by (tok :A) (tok :S))
1258 | [:B])
1259 | {:consumed false, :error ["error at index 0:"
1260 | "unexpected :B"]}
1261 |
1262 | (p (p/+sep-opt-by (tok :A) (tok :S))
1263 | [:S])
1264 | {:consumed false, :error ["error at index 0:"
1265 | "unexpected :S"]}
1266 |
1267 | ))
1268 |
1269 | (deftest *sep-opt-by-t
1270 | (test/are [expr result] (= result expr)
1271 |
1272 | (p (p/*sep-opt-by (tok :A) (tok :S))
1273 | [:A :S :A :S :A :S])
1274 | {:consumed true, :value '(:A :A :A)}
1275 |
1276 | (p (p/*sep-opt-by (tok :A) (tok :S))
1277 | [:A :S :A :S :A :S :A])
1278 | {:consumed true, :value '(:A :A :A :A)}
1279 |
1280 | (p (p/*sep-opt-by (tok :A) (tok :S))
1281 | [:A :S :A :S :A :S :B])
1282 | {:consumed true, :value '(:A :A :A)}
1283 |
1284 | (p (p/*sep-opt-by (tok :A) (tok :S))
1285 | [:A :S :A :S :A])
1286 | {:consumed true, :value '(:A :A :A)}
1287 |
1288 | (p (p/*sep-opt-by (tok :A) (tok :S))
1289 | [:A :S :A :S :A :A])
1290 | {:consumed true, :value '(:A :A :A)}
1291 |
1292 | (p (p/*sep-opt-by (tok :A) (tok :S))
1293 | [:A :S :A :S :A :B])
1294 | {:consumed true, :value '(:A :A :A)}
1295 |
1296 | (p (p/*sep-opt-by (tok :A) (tok :S))
1297 | [])
1298 | {:consumed false, :value nil}
1299 |
1300 | (p (p/*sep-opt-by (tok :A) (tok :S))
1301 | [:B])
1302 | {:consumed false, :value nil}
1303 |
1304 | (p (p/*sep-opt-by (tok :A) (tok :S))
1305 | [:S])
1306 | {:consumed false, :value nil}
1307 |
1308 | ))
1309 |
1310 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1311 |
1312 | (deftest get-state-t
1313 | (test/are [expr result] (= result expr)
1314 |
1315 | (-> (p/parse* (p/get-state) [:A])
1316 | ((juxt (comp :input :value) (comp :input :state))))
1317 | ['(:A) '(:A)]
1318 |
1319 | (-> (p/parse* (p/get-state :input) [:A])
1320 | :value)
1321 | '(:A)
1322 |
1323 | (-> (p/parse* (p/after (p/set-state :user ::state) (p/get-state :user)) [:A])
1324 | :value)
1325 | ::state
1326 |
1327 | ))
1328 |
1329 | (deftest set-state-t
1330 | (test/are [expr result] (= result expr)
1331 |
1332 | (-> (p/parse* (p/set-state ::state) [:A])
1333 | :state)
1334 | ::state
1335 |
1336 | (-> (p/parse* (p/set-state :input [:B]) [:A])
1337 | :state :input)
1338 | '(:B)
1339 |
1340 | (-> (p/parse* (p/set-state :input nil) [:A])
1341 | :state :input)
1342 | '()
1343 |
1344 | (-> (p/parse* (p/set-state :user ::state) [:A])
1345 | :state :user)
1346 | ::state
1347 |
1348 | ))
1349 |
1350 | (deftest update-state-t
1351 | (test/are [expr result] (= result expr)
1352 |
1353 | (-> (p/parse* (p/update-state (constantly ::state)) [:A])
1354 | :state)
1355 | ::state
1356 |
1357 | (-> (p/parse* (p/update-state :input (constantly [:B])) [:A])
1358 | :state :input)
1359 | '(:B)
1360 |
1361 | (-> (p/parse* (p/update-state :input (constantly nil)) [:A])
1362 | :state :input)
1363 | '()
1364 |
1365 | (-> (p/parse* (p/update-state :user (constantly ::state)) [:A])
1366 | :state :user)
1367 | ::state
1368 |
1369 | ))
1370 |
1371 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1372 |
1373 | (deftest trace-t
1374 | (testing "trace state"
1375 | (test/are [expr result] (= result expr)
1376 |
1377 | (-> (p (p/for [_ (p/trace "a")
1378 | a (tok :A)
1379 | _ (p/trace "b")
1380 | b (tok :B)]
1381 | (p/result [a b]))
1382 | [:A :B :C])
1383 | (with-out-str)
1384 | (string/split-lines))
1385 | ["a: at index 0"
1386 | " - input: (:A :B :C)"
1387 | " - user: nil"
1388 | "b: at index 1"
1389 | " - input: (:B :C)"
1390 | " - user: nil"]
1391 |
1392 | (-> (p (p/for [_ (p/trace "a")
1393 | a (tok :A)
1394 | _ (p/trace "b")
1395 | b (tok :B)]
1396 | (p/result [a b]))
1397 | [:A :B])
1398 | (with-out-str)
1399 | (string/split-lines))
1400 | ["a: at index 0"
1401 | " - input: (:A :B)"
1402 | " - user: nil"
1403 | "b: at index 1"
1404 | " - input: (:B)"
1405 | " - user: nil"]
1406 |
1407 | (-> (p (p/for [a (tok :A)
1408 | _ (p/trace "a")
1409 | b (tok :B)
1410 | _ (p/trace "b")]
1411 | (p/result [a b]))
1412 | [:A :B])
1413 | (with-out-str)
1414 | (string/split-lines))
1415 | ["a: at index 1"
1416 | " - input: (:B)"
1417 | " - user: nil"
1418 | "b: at index 2"
1419 | " - input: ()"
1420 | " - user: nil"]
1421 |
1422 | ))
1423 |
1424 | (testing "trace parser"
1425 | (test/are [expr result] (= result expr)
1426 |
1427 | (-> (p (p/for [a (p/trace "a" (tok :A))
1428 | b (p/trace "b" (tok :B))]
1429 | (p/result [a b]))
1430 | [:A :B :C])
1431 | (with-out-str)
1432 | (string/split-lines))
1433 | ["a: at index 0"
1434 | " - input: (:A :B :C)"
1435 | " - user: nil"
1436 | "b: at index 1"
1437 | " - input: (:B :C)"
1438 | " - user: nil"]
1439 |
1440 | (-> (p (p/for [a (p/trace "a" (tok :A))
1441 | b (p/trace "b" (tok :B))]
1442 | (p/result [a b]))
1443 | [:A :B])
1444 | (with-out-str)
1445 | (string/split-lines))
1446 | ["a: at index 0"
1447 | " - input: (:A :B)"
1448 | " - user: nil"
1449 | "b: at index 1"
1450 | " - input: (:B)"
1451 | " - user: nil"]
1452 |
1453 | (-> (p (p/for [a (p/trace "a" (tok :A))
1454 | b (p/trace "b" (tok :B))]
1455 | (p/result [a b]))
1456 | [:B :C])
1457 | (with-out-str)
1458 | (string/split-lines))
1459 | ["a: at index 0"
1460 | " - input: (:B :C)"
1461 | " - user: nil"
1462 | "a: backtracked"]
1463 |
1464 | (-> (p (p/for [a (p/trace "a" (tok :A))
1465 | b (p/trace "b" (tok :B))]
1466 | (p/result [a b]))
1467 | [:A :C])
1468 | (with-out-str)
1469 | (string/split-lines))
1470 | ["a: at index 0"
1471 | " - input: (:A :C)"
1472 | " - user: nil"
1473 | "b: at index 1"
1474 | " - input: (:C)"
1475 | " - user: nil"
1476 | "b: backtracked"]
1477 |
1478 | )))
1479 |
1480 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1481 |
1482 | (deftest parse-t
1483 | (test/are [expr result] (= result expr)
1484 |
1485 | (p/parse (p/result :ok) [])
1486 | :ok
1487 |
1488 | (try (p/parse (p/fail "Error") [])
1489 | (catch #?@(:clj [Exception e] :default [:default e])
1490 | (ex-message e)))
1491 | "error at index 0:\nError"
1492 |
1493 | ))
1494 |
1495 | ;;,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,,
1496 |
--------------------------------------------------------------------------------