├── doc └── intro.md ├── .gitignore ├── deps.edn ├── CHANGELOG.md ├── src └── token_matcher │ ├── example-kb-api.clj │ └── core.clj ├── LICENSE ├── test └── token_matcher │ └── core_test.clj └── README.md /doc/intro.md: -------------------------------------------------------------------------------- 1 | # Introduction to token-matcher 2 | 3 | TODO: write [great documentation](http://jacobian.org/writing/what-to-write/) 4 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /target 2 | /classes 3 | /checkouts 4 | profiles.clj 5 | pom.xml 6 | pom.xml.asc 7 | *.jar 8 | *.class 9 | /.lein-* 10 | /.nrepl-port 11 | /.prepl-port 12 | .hgignore 13 | .hg/ 14 | -------------------------------------------------------------------------------- /deps.edn: -------------------------------------------------------------------------------- 1 | {:deps 2 | {io.github.bobschrag/clolog {:git/tag "v0.4.0" :git/sha "213b286"} 3 | riddley/riddley {:mvn/version "0.2.0"}} 4 | :aliases 5 | {:test {:extra-paths ["test"] 6 | :extra-deps {io.github.cognitect-labs/test-runner 7 | {:git/tag "v0.5.1" :git/sha "dfb30dd"}} 8 | :main-opts ["-m" "cognitect.test-runner"]}}} 9 | -------------------------------------------------------------------------------- /CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # Changelog 2 | 3 | ## [1.4.0] - 2023-09-18 4 | 5 | Add `match-uniquely`, which raises an error in case of multiple matches. 6 | 7 | ## [1.3.0]: 2023-08-14 8 | 9 | Explain application integration and illustrate knowledge base interaction. 10 | 11 | ## [1.2]: 2023-08-07 12 | 13 | Introduce `thing`, the universal kind. 14 | 15 | ## [1.1] - 2023-07-29 16 | 17 | Replace original kind registry with type representation and reasoning in clolog. 18 | 19 | 20 | -------------------------------------------------------------------------------- /src/token_matcher/example-kb-api.clj: -------------------------------------------------------------------------------- 1 | ;;; ---------------------------------------------------------------- 2 | ;;; Knowledge base: 3 | 4 | ;;; This section could reasonably be construed as generic for any user 5 | ;;; of token-matcher that also uses clolog. See also 6 | ;;; `defn-templating-core` and `defn-templating-strings` in section 7 | ;;; 'token-matcher interface'. 8 | 9 | ;;; `*model*` is our knowledge base of Prolog assertions. 10 | (def ^:dynamic *model* 11 | ;; An atom with a placeholder value. 12 | (atom {})) 13 | 14 | (defn initialize-model [] 15 | (tm/initialize-matcher) 16 | ;; We initialize `*model*` to grab "kind" (token-matcher type) 17 | ;; reasoning assertions in `tm/*kind-assertions*`. 18 | (reset! *model* @tm/*kind-assertions*)) 19 | 20 | (defmacro with-model [& body] 21 | ;; Using `cons`, rather than syntax quote (`), to avoid unnecessary 22 | ;; resolution (i.e., namespace qualification) of symbols in `body`. 23 | (cons 'binding 24 | ;; We bind `pl/*assertions*` so that clolog operations will 25 | ;; refer to `*model*`. We bind `tm/*kind-assertions*` so that 26 | ;; the matcher's KB operations refer to `*model*` also. So, 27 | ;; all our layers use the same knowledge base. 28 | (cons '[pl/*assertions* *model* 29 | tm/*kind-assertions* *model*] 30 | body))) 31 | 32 | ;;; Localize our commonly used KB operations to `*model*`. 33 | 34 | (defn inquire [answer-template & goals] ; `query` otherwise used below. 35 | (with-model 36 | (pl/query answer-template goals))) 37 | 38 | ;;; clolog v0.2 doesn't have a variadic `assert<-_` function like this 39 | ;;; one, which is handy here. 40 | (defn assert&<-_ [& statements] 41 | (with-model 42 | ;; Avoid duplicate assertions. 43 | (pl/assert<-_ statements))) 44 | 45 | (defn get-matching-head-assertions [statement-pattern] 46 | (with-model 47 | (pl/get-matching-head-assertions statement-pattern))) 48 | 49 | ;;; Knowledge base ^^ 50 | ;;; ---------------------------------------------------------------- 51 | ;;; token-matcher interface: 52 | 53 | ;;; The next two forms could reasonably be construed as generic, for 54 | ;;; any user of token-matcher that also uses clolog. 55 | 56 | ;;; Upon integrating clolog, we've needed to copy them from 57 | ;;; token-matcher's `core.clj` and modify `defn-templating-core` so 58 | ;;; that the call to `match` can access our knowledge base (via 59 | ;;; `with-model`). This simple modification aside, comments with 60 | ;;; example expansions in the token-matcher file and (more briefly) in 61 | ;;; the next section may be of interest. 62 | (defn defn-templating-core [fn-sym input-sym template let-binding-form body] 63 | ;; Not using syntax quote (`) here, because of symbol namespacing 64 | ;; issues. 65 | (cons 'defn 66 | (cons fn-sym 67 | (cons [input-sym] 68 | (list 69 | (list 'let 70 | ['bindings-hashmap (list 'with-model ; Specialization. 71 | (list 'tm/match 72 | (list 'quote template) 73 | input-sym))] 74 | (cons 'when 75 | (list 'bindings-hashmap 76 | (cons 'let 77 | (cons let-binding-form body)))))))))) 78 | 79 | 80 | (defmacro defn-templating-strings [fn-sym [template [input-sym]] & body] 81 | (let [let-binding-form (tm/get-let-binding-form-strings 82 | (seq (tm/template-vars (tm/parse-template template))))] 83 | (defn-templating-core fn-sym input-sym template let-binding-form body))) 84 | 85 | ;;; token-matcher interface ^^ 86 | ;;; ---------------------------------------------------------------- 87 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Eclipse Public License - v 2.0 2 | 3 | THE ACCOMPANYING PROGRAM IS PROVIDED UNDER THE TERMS OF THIS ECLIPSE 4 | PUBLIC LICENSE ("AGREEMENT"). ANY USE, REPRODUCTION OR DISTRIBUTION 5 | OF THE PROGRAM CONSTITUTES RECIPIENT'S ACCEPTANCE OF THIS AGREEMENT. 6 | 7 | 1. DEFINITIONS 8 | 9 | "Contribution" means: 10 | 11 | a) in the case of the initial Contributor, the initial content 12 | Distributed under this Agreement, and 13 | 14 | b) in the case of each subsequent Contributor: 15 | i) changes to the Program, and 16 | ii) additions to the Program; 17 | where such changes and/or additions to the Program originate from 18 | and are Distributed by that particular Contributor. A Contribution 19 | "originates" from a Contributor if it was added to the Program by 20 | such Contributor itself or anyone acting on such Contributor's behalf. 21 | Contributions do not include changes or additions to the Program that 22 | are not Modified Works. 23 | 24 | "Contributor" means any person or entity that Distributes the Program. 25 | 26 | "Licensed Patents" mean patent claims licensable by a Contributor which 27 | are necessarily infringed by the use or sale of its Contribution alone 28 | or when combined with the Program. 29 | 30 | "Program" means the Contributions Distributed in accordance with this 31 | Agreement. 32 | 33 | "Recipient" means anyone who receives the Program under this Agreement 34 | or any Secondary License (as applicable), including Contributors. 35 | 36 | "Derivative Works" shall mean any work, whether in Source Code or other 37 | form, that is based on (or derived from) the Program and for which the 38 | editorial revisions, annotations, elaborations, or other modifications 39 | represent, as a whole, an original work of authorship. 40 | 41 | "Modified Works" shall mean any work in Source Code or other form that 42 | results from an addition to, deletion from, or modification of the 43 | contents of the Program, including, for purposes of clarity any new file 44 | in Source Code form that contains any contents of the Program. Modified 45 | Works shall not include works that contain only declarations, 46 | interfaces, types, classes, structures, or files of the Program solely 47 | in each case in order to link to, bind by name, or subclass the Program 48 | or Modified Works thereof. 49 | 50 | "Distribute" means the acts of a) distributing or b) making available 51 | in any manner that enables the transfer of a copy. 52 | 53 | "Source Code" means the form of a Program preferred for making 54 | modifications, including but not limited to software source code, 55 | documentation source, and configuration files. 56 | 57 | "Secondary License" means either the GNU General Public License, 58 | Version 2.0, or any later versions of that license, including any 59 | exceptions or additional permissions as identified by the initial 60 | Contributor. 61 | 62 | 2. GRANT OF RIGHTS 63 | 64 | a) Subject to the terms of this Agreement, each Contributor hereby 65 | grants Recipient a non-exclusive, worldwide, royalty-free copyright 66 | license to reproduce, prepare Derivative Works of, publicly display, 67 | publicly perform, Distribute and sublicense the Contribution of such 68 | Contributor, if any, and such Derivative Works. 69 | 70 | b) Subject to the terms of this Agreement, each Contributor hereby 71 | grants Recipient a non-exclusive, worldwide, royalty-free patent 72 | license under Licensed Patents to make, use, sell, offer to sell, 73 | import and otherwise transfer the Contribution of such Contributor, 74 | if any, in Source Code or other form. This patent license shall 75 | apply to the combination of the Contribution and the Program if, at 76 | the time the Contribution is added by the Contributor, such addition 77 | of the Contribution causes such combination to be covered by the 78 | Licensed Patents. The patent license shall not apply to any other 79 | combinations which include the Contribution. No hardware per se is 80 | licensed hereunder. 81 | 82 | c) Recipient understands that although each Contributor grants the 83 | licenses to its Contributions set forth herein, no assurances are 84 | provided by any Contributor that the Program does not infringe the 85 | patent or other intellectual property rights of any other entity. 86 | Each Contributor disclaims any liability to Recipient for claims 87 | brought by any other entity based on infringement of intellectual 88 | property rights or otherwise. As a condition to exercising the 89 | rights and licenses granted hereunder, each Recipient hereby 90 | assumes sole responsibility to secure any other intellectual 91 | property rights needed, if any. For example, if a third party 92 | patent license is required to allow Recipient to Distribute the 93 | Program, it is Recipient's responsibility to acquire that license 94 | before distributing the Program. 95 | 96 | d) Each Contributor represents that to its knowledge it has 97 | sufficient copyright rights in its Contribution, if any, to grant 98 | the copyright license set forth in this Agreement. 99 | 100 | e) Notwithstanding the terms of any Secondary License, no 101 | Contributor makes additional grants to any Recipient (other than 102 | those set forth in this Agreement) as a result of such Recipient's 103 | receipt of the Program under the terms of a Secondary License 104 | (if permitted under the terms of Section 3). 105 | 106 | 3. REQUIREMENTS 107 | 108 | 3.1 If a Contributor Distributes the Program in any form, then: 109 | 110 | a) the Program must also be made available as Source Code, in 111 | accordance with section 3.2, and the Contributor must accompany 112 | the Program with a statement that the Source Code for the Program 113 | is available under this Agreement, and informs Recipients how to 114 | obtain it in a reasonable manner on or through a medium customarily 115 | used for software exchange; and 116 | 117 | b) the Contributor may Distribute the Program under a license 118 | different than this Agreement, provided that such license: 119 | i) effectively disclaims on behalf of all other Contributors all 120 | warranties and conditions, express and implied, including 121 | warranties or conditions of title and non-infringement, and 122 | implied warranties or conditions of merchantability and fitness 123 | for a particular purpose; 124 | 125 | ii) effectively excludes on behalf of all other Contributors all 126 | liability for damages, including direct, indirect, special, 127 | incidental and consequential damages, such as lost profits; 128 | 129 | iii) does not attempt to limit or alter the recipients' rights 130 | in the Source Code under section 3.2; and 131 | 132 | iv) requires any subsequent distribution of the Program by any 133 | party to be under a license that satisfies the requirements 134 | of this section 3. 135 | 136 | 3.2 When the Program is Distributed as Source Code: 137 | 138 | a) it must be made available under this Agreement, or if the 139 | Program (i) is combined with other material in a separate file or 140 | files made available under a Secondary License, and (ii) the initial 141 | Contributor attached to the Source Code the notice described in 142 | Exhibit A of this Agreement, then the Program may be made available 143 | under the terms of such Secondary Licenses, and 144 | 145 | b) a copy of this Agreement must be included with each copy of 146 | the Program. 147 | 148 | 3.3 Contributors may not remove or alter any copyright, patent, 149 | trademark, attribution notices, disclaimers of warranty, or limitations 150 | of liability ("notices") contained within the Program from any copy of 151 | the Program which they Distribute, provided that Contributors may add 152 | their own appropriate notices. 153 | 154 | 4. COMMERCIAL DISTRIBUTION 155 | 156 | Commercial distributors of software may accept certain responsibilities 157 | with respect to end users, business partners and the like. While this 158 | license is intended to facilitate the commercial use of the Program, 159 | the Contributor who includes the Program in a commercial product 160 | offering should do so in a manner which does not create potential 161 | liability for other Contributors. Therefore, if a Contributor includes 162 | the Program in a commercial product offering, such Contributor 163 | ("Commercial Contributor") hereby agrees to defend and indemnify every 164 | other Contributor ("Indemnified Contributor") against any losses, 165 | damages and costs (collectively "Losses") arising from claims, lawsuits 166 | and other legal actions brought by a third party against the Indemnified 167 | Contributor to the extent caused by the acts or omissions of such 168 | Commercial Contributor in connection with its distribution of the Program 169 | in a commercial product offering. The obligations in this section do not 170 | apply to any claims or Losses relating to any actual or alleged 171 | intellectual property infringement. In order to qualify, an Indemnified 172 | Contributor must: a) promptly notify the Commercial Contributor in 173 | writing of such claim, and b) allow the Commercial Contributor to control, 174 | and cooperate with the Commercial Contributor in, the defense and any 175 | related settlement negotiations. The Indemnified Contributor may 176 | participate in any such claim at its own expense. 177 | 178 | For example, a Contributor might include the Program in a commercial 179 | product offering, Product X. That Contributor is then a Commercial 180 | Contributor. If that Commercial Contributor then makes performance 181 | claims, or offers warranties related to Product X, those performance 182 | claims and warranties are such Commercial Contributor's responsibility 183 | alone. Under this section, the Commercial Contributor would have to 184 | defend claims against the other Contributors related to those performance 185 | claims and warranties, and if a court requires any other Contributor to 186 | pay any damages as a result, the Commercial Contributor must pay 187 | those damages. 188 | 189 | 5. NO WARRANTY 190 | 191 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 192 | PERMITTED BY APPLICABLE LAW, THE PROGRAM IS PROVIDED ON AN "AS IS" 193 | BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, EITHER EXPRESS OR 194 | IMPLIED INCLUDING, WITHOUT LIMITATION, ANY WARRANTIES OR CONDITIONS OF 195 | TITLE, NON-INFRINGEMENT, MERCHANTABILITY OR FITNESS FOR A PARTICULAR 196 | PURPOSE. Each Recipient is solely responsible for determining the 197 | appropriateness of using and distributing the Program and assumes all 198 | risks associated with its exercise of rights under this Agreement, 199 | including but not limited to the risks and costs of program errors, 200 | compliance with applicable laws, damage to or loss of data, programs 201 | or equipment, and unavailability or interruption of operations. 202 | 203 | 6. DISCLAIMER OF LIABILITY 204 | 205 | EXCEPT AS EXPRESSLY SET FORTH IN THIS AGREEMENT, AND TO THE EXTENT 206 | PERMITTED BY APPLICABLE LAW, NEITHER RECIPIENT NOR ANY CONTRIBUTORS 207 | SHALL HAVE ANY LIABILITY FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, 208 | EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING WITHOUT LIMITATION LOST 209 | PROFITS), HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 210 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 211 | ARISING IN ANY WAY OUT OF THE USE OR DISTRIBUTION OF THE PROGRAM OR THE 212 | EXERCISE OF ANY RIGHTS GRANTED HEREUNDER, EVEN IF ADVISED OF THE 213 | POSSIBILITY OF SUCH DAMAGES. 214 | 215 | 7. GENERAL 216 | 217 | If any provision of this Agreement is invalid or unenforceable under 218 | applicable law, it shall not affect the validity or enforceability of 219 | the remainder of the terms of this Agreement, and without further 220 | action by the parties hereto, such provision shall be reformed to the 221 | minimum extent necessary to make such provision valid and enforceable. 222 | 223 | If Recipient institutes patent litigation against any entity 224 | (including a cross-claim or counterclaim in a lawsuit) alleging that the 225 | Program itself (excluding combinations of the Program with other software 226 | or hardware) infringes such Recipient's patent(s), then such Recipient's 227 | rights granted under Section 2(b) shall terminate as of the date such 228 | litigation is filed. 229 | 230 | All Recipient's rights under this Agreement shall terminate if it 231 | fails to comply with any of the material terms or conditions of this 232 | Agreement and does not cure such failure in a reasonable period of 233 | time after becoming aware of such noncompliance. If all Recipient's 234 | rights under this Agreement terminate, Recipient agrees to cease use 235 | and distribution of the Program as soon as reasonably practicable. 236 | However, Recipient's obligations under this Agreement and any licenses 237 | granted by Recipient relating to the Program shall continue and survive. 238 | 239 | Everyone is permitted to copy and distribute copies of this Agreement, 240 | but in order to avoid inconsistency the Agreement is copyrighted and 241 | may only be modified in the following manner. The Agreement Steward 242 | reserves the right to publish new versions (including revisions) of 243 | this Agreement from time to time. No one other than the Agreement 244 | Steward has the right to modify this Agreement. The Eclipse Foundation 245 | is the initial Agreement Steward. The Eclipse Foundation may assign the 246 | responsibility to serve as the Agreement Steward to a suitable separate 247 | entity. Each new version of the Agreement will be given a distinguishing 248 | version number. The Program (including Contributions) may always be 249 | Distributed subject to the version of the Agreement under which it was 250 | received. In addition, after a new version of the Agreement is published, 251 | Contributor may elect to Distribute the Program (including its 252 | Contributions) under the new version. 253 | 254 | Except as expressly stated in Sections 2(a) and 2(b) above, Recipient 255 | receives no rights or licenses to the intellectual property of any 256 | Contributor under this Agreement, whether expressly, by implication, 257 | estoppel or otherwise. All rights in the Program not expressly granted 258 | under this Agreement are reserved. Nothing in this Agreement is intended 259 | to be enforceable by any entity that is not a Contributor or Recipient. 260 | No third-party beneficiary rights are created under this Agreement. 261 | 262 | Exhibit A - Form of Secondary Licenses Notice 263 | 264 | "This Source Code may also be made available under the following 265 | Secondary Licenses when the conditions for such availability set forth 266 | in the Eclipse Public License, v. 2.0 are satisfied: {name license(s), 267 | version(s), and exceptions or additional permissions here}." 268 | 269 | Simply including a copy of this Agreement, including this Exhibit A 270 | is not sufficient to license the Source Code under Secondary Licenses. 271 | 272 | If it is not possible or desirable to put the notice in a particular 273 | file, then You may include the notice in a location (such as a LICENSE 274 | file in a relevant directory) where a recipient would be likely to 275 | look for such a notice. 276 | 277 | You may add additional accurate notices of copyright ownership. 278 | -------------------------------------------------------------------------------- /test/token_matcher/core_test.clj: -------------------------------------------------------------------------------- 1 | (ns token-matcher.core-test 2 | (:require [clojure.test :refer :all] 3 | [token-matcher.core :refer :all])) 4 | 5 | (deftest parse-template-test 6 | ;; We need to refer our used public symbols---in contexts where they 7 | ;; are quoted---because most testers run in the `user` namespace. 8 | (testing "parse-template" 9 | (refer 'token-matcher.core :only '[digits-alone n-digits-alone]) 10 | (is (= '([*int {:max-tokens 1, 11 | :finally? (token-matcher.core/digit-string? *int)}]) 12 | (parse-template '([*int (digits-alone {} *int)])))) 13 | (is (= '([*int 14 | {:finally? 15 | (and 16 | (token-matcher.core/digit-string? *int) 17 | (> (read-string *int) 28)), 18 | :max-tokens 1}]) 19 | (parse-template '([*int (digits-alone {:finally? (> (read-string *int) 28)} *int)])))) 20 | (is (= '([*int 21 | {:finally? (and 22 | (token-matcher.core/digit-string? *int) 23 | (clojure.core/= (clojure.core/count *int) 3) 24 | (> (read-string *int) 99)), 25 | :max-tokens 1}]) 26 | (parse-template '([*int (n-digits-alone {:finally? (> (read-string *int) 99)} 27 | *int 28 | 3)])))) 29 | )) 30 | 31 | (deftest template-local-test 32 | (testing "Locals in template" 33 | ;; Local scope: 34 | (let [max 3 35 | max-limited-digits (fn [self] 36 | {:max-tokens 1 37 | :finally? `(and (digit-string? ~self) 38 | (<= (count ~self) ~max))})] 39 | (is (= '{*foo "23", *bar "321"} 40 | (match `([~'*foo ~(max-limited-digits '*foo)] 41 | [~'*bar ~(max-limited-digits '*bar)]) 42 | "23 321"))) 43 | (is (= nil 44 | (match `([~'*foo ~(max-limited-digits '*foo)] 45 | [~'*bar ~(max-limited-digits '*bar)]) 46 | "23 4321"))) 47 | (is (= '([*foo 48 | {:max-tokens 1, 49 | :finally? 50 | (clojure.core/and 51 | (token-matcher.core/digit-string? *foo) 52 | (clojure.core/<= (clojure.core/count *foo) 3))}] 53 | [*bar 54 | {:max-tokens 1, 55 | :finally? 56 | (clojure.core/and 57 | (token-matcher.core/digit-string? *bar) 58 | (clojure.core/<= (clojure.core/count *bar) 3))}]) 59 | (parse-template `([~'*foo ~(max-limited-digits '*foo)] 60 | [~'*bar ~(max-limited-digits '*bar)])))) 61 | ))) 62 | 63 | (def ^:dynamic *test-atom* (atom nil)) 64 | 65 | ;;; TODO: Explicit/alternative bindings for dynamic Clojure vars. 66 | (deftest match-test 67 | ;; We need to refer our used public symbols---in contexts where they 68 | ;; are quoted---because most testers run in the `user` namespace. 69 | (refer 'token-matcher.core :only '[count-tokens 70 | digits-alone n-digits-alone digits-along 71 | different-when-bound same-when-bound]) 72 | (testing "match" 73 | ;; No vars. 74 | ;; Not handled: (is (= {} (match "" ""))) 75 | (is (= {} (match "foo" "foo"))) 76 | (is (= {} (match "foo bar" "foo bar"))) 77 | (is (= nil (match "foo" "bar"))) 78 | ;; Only *vars. 79 | (is (= '{*foo "bar"} (match "*foo" "bar"))) 80 | (is (= '{*foo "bar bell"} (match "*foo" "bar bell"))) 81 | (is (= '{*foo "bar"} (match "*foo bell" "bar bell"))) 82 | (is (= {} (match '("#to") "#to"))) 83 | (binding [*chars-to-strip* #{}] 84 | (match '("'twas") "'twas")) 85 | (is (= '{*foo "true false"} (match "*foo" "true false"))) 86 | (is (= '{*foo "false"} (match "\"nil\" *foo" "nil false"))) 87 | (is (= nil (match "nil *foo" "nil false"))) 88 | (is (= nil (match "*foo bell" "bar"))) 89 | (is (= '{*foo "bar"} (match "bell *foo" "bell bar"))) 90 | (is (= '{*foo "bar none"} (match "bell *foo" "bell bar none"))) 91 | (is (= nil (match "*foo bar" "bar bell"))) 92 | (is (= '{*foo "bar" *ring "bell"} (match "*foo *ring" "bar bell"))) 93 | (is (= '{*first "first", *third "third"} 94 | (match "*first second *third" "first second third"))) 95 | (is (= '{*first "first first", *third "third"} 96 | (match "*first second *third" "first first second third"))) 97 | (is (= '{*first "first first", *fourth "fourth"} 98 | (match "*first second third *fourth" "first first second third fourth"))) 99 | (is (= '{*front "x y", *middle "m n", *back "r s"} 100 | (match "*front aaa *middle bbb *back" 101 | "x y aaa m n bbb r s"))) 102 | ;; Consecutive *vars: Prefer longer bindings for earlier vars. 103 | ;; Replace the with a. 104 | (is (= '{*foo "bar that" *ring "bell"} 105 | (match "*foo *ring" "bar that bell"))) 106 | ;; Multi-occurrence vars: 107 | (is (= '{*foo "bar"} (match "*foo *foo" "bar bar"))) 108 | (is (= nil (match "*foo *foo" "bar bell"))) 109 | ;; Addressing +vars: 110 | (install-kind-instance-map '{kind #{"really" "really big show" 111 | "John" "John Smith"} 112 | kinder #{"kinder" "kinder gentler" 113 | "something" "something really big"} 114 | kindle #{"Book 1" "Book 2"}}) 115 | ;; Only +vars: 116 | (is (= '{+kind "really"} 117 | (match "+kind" "really"))) 118 | (is (= '{+kind "really big show"} 119 | (match "+kind" "really big show"))) 120 | (is (= '{+kinder "something" +kind "really big show"} 121 | (match "+kinder +kind" "something really big show"))) 122 | (is (= '{+thing "something"} ; `thing` is the universal kind. 123 | (match "+thing" "something"))) 124 | (is (= nil ; `thing` will match only a registered kind instance. 125 | (match "+thing" "unregistered"))) 126 | ;; Mixed vars: 127 | (is (= '{*silly "any old" +kind "really big show"} 128 | (match "*silly +kind" "any old really big show"))) 129 | (is (= '{*silly "reading" +kindle "Book 2"} 130 | (match "*silly +kindle" "reading Book 2"))) 131 | ;; Multi-occurrence vars: 132 | (is (= '{+kindle "Book 2"} (match "+kindle foo +kindle" "Book 2 foo Book 2"))) 133 | (is (= nil (match "+kindle foo +kindle" "Book 2 foo Book 1"))) 134 | (is (= '{+kinder "something" +kind "really big show"} 135 | (match "+kinder +kind +kinder" "something really big show something"))) 136 | ;; Addressing subkinds: 137 | (add-subkind 'kind 'kinder) 138 | (is (= '{+kind "something"} 139 | (match "+kind" "something"))) 140 | ;;; Annotated +vars: 141 | (is (= '{+kind "really", +how "something really big"} 142 | (match "+kind (:optional [+how {:kind kinder}])" "really something really big"))) 143 | (is (= nil 144 | (match "+kind (:optional [+how {:kind kinder}])" "really not at all"))) 145 | (is (= '{+kind_0 "really"} 146 | (match "[+kind_0 {:kind kind}]" "really"))) 147 | (is (= '{+kind_0 "really big show"} 148 | (match "[+kind_0 {:kind kind}]" "really big show"))) 149 | (is (= '{+kinder_a "something" +kind_0 "really big show"} 150 | (match "[+kinder_a {:kind kinder}] [+kind_0 {:kind kind}]" 151 | "something really big show"))) 152 | ;; Mixed vars: 153 | (is (= '{*silly "any old" +kind_0 "really big show"} 154 | (match "*silly [+kind_0 {:kind kind}]" "any old really big show"))) 155 | (is (= '{*silly "reading" +kindle_bar "Book 2"} 156 | (match "*silly [+kindle_bar {:kind kindle}]" "reading Book 2"))) 157 | ;; Multi-occurrence vars: 158 | (is (= '{+kindle_bar "Book 2"} (match "[+kindle_bar {:kind kindle}] foo +kindle_bar" 159 | "Book 2 foo Book 2"))) 160 | (is (= nil (match "[+kindle_bar {:kind kindle}] foo +kindle_bar" 161 | "Book 2 foo Book 1"))) 162 | (is (= '{+kinder_a "something" +kind_0 "really big show"} 163 | (match "[+kinder_a {:kind kinder}] [+kind_0 {:kind kind}] +kinder_a" 164 | "something really big show something"))) 165 | ;; Multiple +vars of a kind. 166 | (is (= '{+kind_0 "really", +kind_1 "John"} 167 | (match "[+kind_0 {:kind kind}] [+kind_1 {:kind kind}] " "really John"))) 168 | ;; Daynamically created *vars: 169 | (is (= '{*happy "really"} (match "[*happy {:kind kind}]" "really"))) 170 | (do (install-kind-instance-map '{}) 171 | (is (= '{*found "Register", +finding "register"} 172 | (match "[*found {:kind finding}] this +finding" "Register this register.")))) 173 | ;; Token cardinality specs: 174 | (is (= '{*foo "this", *bar "little test"} 175 | (match "[*foo {:max-tokens 1}] *bar" "this little test"))) 176 | (is (= '{*foo "this little", *bar "test"} 177 | (match "[*foo {:max-tokens 2}] *bar" "this little test"))) 178 | (is (= '{*foo "this little", *bar "test"} 179 | (match "*foo [*bar {:min-tokens 1}]" "this little test"))) 180 | (is (= '{*foo "this", *bar "little test"} 181 | (match "*foo [*bar {:min-tokens 2}]" "this little test"))) 182 | (is (= nil (match "*foo [*bar {:min-tokens 3}]" "this little test"))) 183 | (do (install-kind-instance-map '{}) 184 | (is (= '{*found "Register", +finding "register"} 185 | (match "[*found {:kind finding :max-tokens 1}] this +finding" 186 | "Register this register.")))) 187 | ;; Optional vars: 188 | (is (= {} (match "(:optional *bar)" ""))) 189 | (is (= '{*foo "Hello Dolly"} 190 | (match "*foo (:optional *bar)" "Hello, Dolly!"))) 191 | (is (= '{*foo "Hello"} 192 | (match "*foo (:optional *bar) (:optional *baz)" 193 | "Hello!"))) 194 | (is (= '{*bar "Hello", *foo "Dolly"} 195 | (match "(:optional *bar) *foo" "Hello, Dolly!"))) 196 | (is (= '{*bar "Hello", *foo "Dolly"} 197 | (match "(:optional *bar) (:optional *baz) *foo" 198 | "Hello, Dolly!"))) 199 | (do (install-kind-instance-map '{kind #{"really" "really big show" 200 | "John" "John Smith"} 201 | kinder #{"kinder" "kinder gentler" 202 | "something" "something really big"} 203 | kindle #{"Book 1" "Book 2"}})) 204 | (is (= '{*rest "Register this item"} 205 | (match "(:optional +kind) *rest" 206 | "Register this item."))) 207 | ;; Inline kind: 208 | (is (= '{+fruits "apples"} 209 | (match "[+fruits {:kind #{\"apples\" \"pumpkins\" \"pears\"}}] to +fruits" 210 | "apples to apples"))) 211 | (is (= '{+fruits "apples", +nuts "filberts"} 212 | (match "[+fruits {:kind #{\"apples\" \"pumpkins\" \"pears\"}}] to 213 | [+nuts {:kind #{\"Brazils\" \"almonds\" \"filberts\"}}]" 214 | "apples to filberts"))) 215 | ;; List template: 216 | (is (= '{+fruits "apples", +nuts "filberts"} 217 | (match '([+fruits {:kind #{"apples" "pumpkins" "pears"}}] to 218 | [+nuts {:kind #{"Brazils" "almonds" "filberts"}}]) 219 | "apples to filberts"))) 220 | (is (= '{+fruits "apples", *to "to", +nuts "filberts"} 221 | (match '([+fruits {:kind #{"apples" "pumpkins" "pears"}}] *to 222 | [+nuts {:kind #{"Brazils" "almonds" "filberts"}}]) 223 | "apples to filberts"))) 224 | (is (= '{+fruits "apples", +nuts "filberts"} 225 | (match '([+fruits {:kind #{"apples" "pumpkins" "pears"}}] to the 226 | [+nuts {:kind #{"Brazils" "almonds" "filberts"}}]) 227 | "apples to the filberts"))) 228 | (is (= nil 229 | (match '([+fruits {:kind #{"apples" "pumpkins" "pears"}}] "*to" the 230 | [+nuts {:kind #{"Brazils" "almonds" "filberts"}}]) 231 | "apples to the filberts"))) 232 | (is (= '{+fruits "apples", +nuts "filberts"} 233 | (match '([+fruits {:kind #{"apples" "pumpkins" "pears"}}] "*to" the 234 | [+nuts {:kind #{"Brazils" "almonds" "filberts"}}]) 235 | "apples *to the filberts"))) 236 | (is (= '{+fruits "apples", +nuts "filberts"} 237 | (match '([+fruits {:kind #{"apples" "pumpkins" "pears"}}] "#to" the 238 | [+nuts {:kind #{"Brazils" "almonds" "filberts"}}]) 239 | "apples #to the filberts"))) 240 | (is (= '{+fruits "apples", +nuts "filberts"} 241 | (match '([+fruits {:kind #{"apples" "pumpkins" "pears"}}] "nil" 242 | [+nuts {:kind #{"Brazils" "almonds" "filberts"}}]) 243 | "apples nil filberts"))) 244 | (is (= nil (match '(nil *foo) "nil false"))) 245 | (is (= '{*foo "false"} (match '("nil" *foo) "nil false"))) 246 | ;; Restrictions: 247 | (is (= nil (match '(*foo [*bar {:finally? (not= *bar *foo)}]) 248 | "ho ho"))) 249 | (is (= '{*foo "ho", *bar "ho"} 250 | (match '(*foo [*bar {:finally? (= *bar *foo)}]) 251 | "ho ho"))) 252 | (is (= '{*foo "ho", *bar "ho"} 253 | ;; It's bad style (inefficient) for a restriction not to 254 | ;; refer to the current var, of course. 255 | (match '(*foo [*bar {:always? (< (count-tokens *foo) 2)}]) 256 | "ho ho"))) 257 | (is (= '{*foo "ho", *bar "ho ho"} 258 | ;; It's bad style (inefficient) for a restriction not to 259 | ;; refer to the current var. 260 | (match '(*foo [*bar {:always? (< (count-tokens *foo) 2)}]) 261 | "ho ho ho"))) 262 | ;; Shorthand restrictions: 263 | (is (= '{*int "29"} 264 | (match '([*int (digits-alone {} *int)]) "29"))) 265 | (is (= nil 266 | (match '([*int (digits-alone {} *int)]) "no"))) 267 | (is (= nil 268 | (match '(*foo [*bar (different-when-bound {} *foo *bar)]) 269 | "ho ho"))) 270 | (is (= '{*foo "ho", *bar "ho"} 271 | (match '(*foo [*bar (same-when-bound {} *foo *bar)]) 272 | "ho ho"))) 273 | (is (= '{*ints "29 30"} 274 | (match '([*ints (digits-along {} *ints)]) "29 30"))) 275 | (is (= nil 276 | (match '([*ints (digits-along {} *ints)]) "29 no"))) 277 | (is (= nil 278 | (match '([*int (n-digits-alone {:finally? (> (read-string *int) 99)} 279 | *int 280 | 3)]) 281 | "1001"))) 282 | (is (= nil 283 | (match '([*int (n-digits-alone {:finally? (> (read-string *int) 99)} 284 | *int 285 | 3)]) 286 | "001"))) 287 | (is (= '{*int "101"} 288 | (match '([*int (n-digits-alone {:finally? (> (read-string *int) 99)} 289 | *int 290 | 3)]) 291 | "101"))) 292 | ;; Actions: 293 | (binding [token-matcher.core-test/*test-atom* (atom nil)] 294 | (match '([*foo {:finally. (reset! token-matcher.core-test/*test-atom* 295 | *foo)}]) 296 | "bar") 297 | (is (= "bar" @token-matcher.core-test/*test-atom*))) 298 | (binding [token-matcher.core-test/*test-atom* (atom nil)] 299 | (match '(no [*foo {:finally. (reset! token-matcher.core-test/*test-atom* 300 | *foo)}]) 301 | "bar") 302 | (is (= nil @token-matcher.core-test/*test-atom*))) 303 | ;; Case sensitivity. 304 | (binding [*case-sensitive* true] 305 | (is (= nil (match "Foo" "foo"))) 306 | (is (= {} (match "(:-case Foo)" "foo"))) 307 | (is (= nil (match "(:+case Foo)" "foo"))) 308 | (is (= nil (match "(:-case Foo) Bar" "foo bar"))) 309 | (is (= {} (match "(:-case Foo) (:-case Bar)" "foo bar"))) 310 | (do (install-kind-instance-map '{kind #{"really" "really big show" 311 | "John" "John Smith"}}) 312 | (is (= nil (match "+kind" "john"))))) 313 | (binding [*case-sensitive* false] 314 | (is (= {} (match "Foo" "foo"))) 315 | (is (= nil (match "(:+case Foo)" "foo"))) 316 | (is (= {} (match "(:-case Foo)" "foo"))) 317 | (is (= nil (match "(:+case Foo) Bar" "foo bar"))) 318 | (is (= nil (match "(:+case Foo) (:+case Bar)" "foo bar")))) 319 | ;; Optional content: 320 | (is (= {} (match '((:optional foo)) ""))) 321 | (is (= {} (match '((:optional *foo)) ""))) 322 | (is (= {} (match '((:optional foo) bar) "bar"))) 323 | (is (= {} (match '(foo (:optional bar)) "foo"))) 324 | (is (= {} (match '(foo (:optional *bar)) "foo"))) 325 | (is (= '{*bar "bar"} (match '(foo (:optional *bar)) "foo bar"))) 326 | (is (= '{*bar "bar"} (match '(foo (:optional *bar stool)) "foo bar stool"))) 327 | (is (= nil (match '(foo (:optional *bar stool)) "foo bar"))) 328 | (is (= {} (match '(foo (:optional *bar stool)) "foo"))) 329 | (is (= '{*quack "foo"} 330 | (match '((:optional *bar stool) 331 | [*quack (different-when-bound {} *quack *bar)]) 332 | "foo"))) 333 | (is (= '{*bar "bar", *quack "quack"} 334 | (match '((:optional *bar stool) 335 | [*quack (different-when-bound {} *quack *bar)]) 336 | "bar stool quack"))) 337 | (is (= '{*quack "bar stool bar"} 338 | (match '((:optional *bar stool) 339 | [*quack (different-when-bound {} *quack *bar)]) 340 | "bar stool bar"))) 341 | ;; Choices: 342 | (is (= {} (match '((:choice foo bar)) "bar"))) 343 | (is (= '{*foo "bar"} (match '((:choice *foo bar)) "bar"))) 344 | (is (= '{*foo "bar"} (match '(some (:choice *foo bar)) "some bar"))) 345 | ;; Series: 346 | (is (= '{*foo "bar"} (match '(:series *foo) "bar"))) 347 | (is (= '{*foo "bar"} (match '((:series *foo)) "bar"))) 348 | ;; Control combos: 349 | (is (= '{*foo "bar"} (match '(:series (:optional *foo)) "bar"))) 350 | (is (= '{*foo "foo bar"} (match '(:series (:optional *foo)) "foo bar"))) 351 | (is (= '{*foo "foo bar"} 352 | (match '(:series (:optional (:choice *foo))) "foo bar"))) 353 | (is (= nil (match '(:series (:optional (:choice foo))) "Foo bar"))) 354 | (binding [*case-sensitive* false] 355 | (is (= nil (match '(:series (:optional (:choice (:+case Foo)))) "foo")))) 356 | (binding [*case-sensitive* true] 357 | (is (= {} (match '(:series (:optional (:choice (:-case foo)))) "Foo")))) 358 | (do (initialize-matcher) 359 | (add-kind-instance "person" "Alex") 360 | (add-kind-instance "person" "Bob") 361 | (add-kind-instance "restricted resource" "Repo 1")) 362 | (is (= #{"Repo 1" "Alex" "Bob"} 363 | (get-kind-instances 'thing))) 364 | (is (= '{*subject "Alex", *predicate "permissioned to", +thing "Repo 1"} 365 | (match "*subject is *predicate +thing" "\"Alex\" is permissioned to Repo 1"))) 366 | )) 367 | 368 | (deftest matches-test 369 | (testing "matches" 370 | (initialize-matcher) 371 | (is (= '#{[{*foo "one", *bar "two three"} #{}] 372 | [{*foo "one two", *bar "three"} #{}]} 373 | (matches "*foo *bar" "one two three"))) 374 | (is (= '#{[{*foo "one", *bar "two three"} #{}] 375 | [{*foo "one two", *bar "three"} #{}]} 376 | (matches "*foo *bar" "one two three" 2))) 377 | (is (= '#{[{*foo "one two", *bar "three"} #{}]} 378 | (matches "*foo *bar" "one two three" 1))) 379 | ;; Unique matches: 380 | (is (= [{} #{}] 381 | (match-uniquely [] []))) 382 | (is (= '[{*foo "one two"} #{}] 383 | (match-uniquely "*foo three" "one two three"))) 384 | (comment ; Errors out. 385 | ;; Execution error (AssertionError) at token-matcher.core/match-constructs (REPL:808). 386 | ;; Assert failed: Multiple matches found: #{[{*foo "one", *bar "two three"} #{}] [{*foo "one two", *bar "three"} #{}]} 387 | (match-uniquely "*foo *bar" "one two three")) 388 | )) 389 | 390 | (deftest defn-templaters-test 391 | ;; We need to refer our used public symbols---in contexts where they 392 | ;; are quoted---because most testers run in the `user` namespace. 393 | (refer 'token-matcher.core :only '[defn-templating-symbols defn-templating-strings]) 394 | (testing "defn-templating forms" 395 | (is (= '(defn list-outer-symbols [phrase] 396 | (let [bindings-hashmap (token-matcher.core/match '"*front stuff *back" phrase)] 397 | (when bindings-hashmap 398 | (let [*front (token-matcher.core/instance->symbol (get bindings-hashmap '*front)) 399 | *back (token-matcher.core/instance->symbol (get bindings-hashmap '*back))] 400 | (list *front *back))))) 401 | (macroexpand-1 402 | '(defn-templating-symbols list-outer-symbols ["*front stuff *back" 403 | [phrase]] 404 | (list *front *back))))) 405 | (comment ; Not suitable here. 406 | (defn-templating-symbols list-outer-symbols ["*front stuff *back" 407 | [phrase]] 408 | (list *front *back)) 409 | (is (= '(make up) 410 | (list-outer-symbols "make stuff up")))) 411 | (is (= '(defn list-outer-strings [phrase] 412 | (let [bindings-hashmap (token-matcher.core/match '"*front stuff *back" phrase)] 413 | (when bindings-hashmap 414 | (let [*front (get bindings-hashmap '*front) 415 | *back (get bindings-hashmap '*back)] 416 | (list *front *back))))) 417 | (macroexpand-1 '(defn-templating-strings list-outer-strings ["*front stuff *back" 418 | [phrase]] 419 | (list *front *back))))) 420 | (comment ; Not suitable. 421 | (defn-templating-symbols list-outer-symbols ["*front stuff *back" 422 | [phrase]] 423 | (list *front *back)) 424 | (is (= '(make up) 425 | (list-outer-symbols "make stuff up")))) 426 | ;; With optional var, list template: 427 | (is (= '(defn list-elided [phrase] 428 | (let [bindings-hashmap (token-matcher.core/match '((:optional *foo)) phrase)] 429 | (when bindings-hashmap 430 | (let [*foo (get bindings-hashmap '*foo)] 431 | (list *foo))))) 432 | (macroexpand-1 '(defn-templating-strings list-elided [((:optional *foo)) 433 | [phrase]] 434 | (list *foo))))) 435 | )) 436 | 437 | (deftest with-matching-macros-test 438 | ;; We need to refer our used public symbols---in contexts where they 439 | ;; are quoted---because most testers run in the `user` namespace. 440 | (refer 'token-matcher.core :only '[with-matching-template-symbols]) 441 | (testing "with-matching-macros" 442 | ;; With optional var, list template: 443 | (is (= '(clojure.core/let [*foo 'nil] true) 444 | (macroexpand-1 '(with-matching-template-symbols [((:optional *foo)) ""] true)))) 445 | (is (= true 446 | (with-matching-template-symbols [((:optional *foo)) ""] true))) 447 | )) 448 | 449 | (comment ; Make the tesetd functions public, to run these tests. 450 | (deftest isolate-declared-chars-test 451 | (testing "isolate-chars" 452 | (is '["(" "foo" "(" "bar" "(" "baz" "like" ")" ")" ")"] 453 | (isolate-declared-chars "(foo (bar (baz like)))")) 454 | (is ["(" "foo" "(" "ba" "[" "r" "(" "baz" "li" "]" "ke" ")" ")" ")"] 455 | (isolate-declared-chars "(foo (ba[r (baz li]ke)))")) 456 | )) 457 | 458 | (deftest strip-leading-chars-test 459 | (testing "strip-leading-chars" 460 | (is (= "" (strip-leading-chars ""))) 461 | (is (= "Foo" (strip-leading-chars "Foo"))) 462 | (is (= "Foo" (strip-leading-chars ",Foo"))) 463 | (is (= "" (strip-leading-chars "."))) 464 | (is (= "" (strip-leading-chars "?"))))) 465 | 466 | (deftest strip-trailing-chars-test 467 | (testing "strip-trailing-chars" 468 | (is (= "" (strip-trailing-chars ""))) 469 | (is (= "Foo" (strip-trailing-chars "Foo."))) 470 | (is (= "Foo" (strip-trailing-chars "Foo"))) 471 | (binding [*allow-trailing-apostrophe* true] 472 | (is (= "Foo'" (strip-trailing-chars "Foo'"))) 473 | (is (= "Foo'" (strip-trailing-chars "Foo'.")))) 474 | (binding [*allow-trailing-apostrophe* false] 475 | (is (= "Foo" (strip-trailing-chars "Foo'"))) 476 | (is (= "Foo" (strip-trailing-chars "Foo'.")))))) 477 | 478 | (deftest normalize-token-test 479 | (testing "normalize-token" 480 | (binding [*token-substitutions* {"happy" "glad"}] 481 | (is (= "foo" (normalize-token "foo"))) 482 | (is (= "Bob" (normalize-token "Bob"))) 483 | (is (= "glad" (normalize-token "happy"))) 484 | (is (= "glad" (normalize-token "Happy")))))) 485 | 486 | (deftest checking-vars 487 | (testing "var? functions" 488 | (is (= true (*var? '*foo))) 489 | (is (= nil (*var? 'foo))) 490 | (is (= true (+var? '+foo))) 491 | (is (= nil (+var? 'foo))))) 492 | 493 | (deftest updated-var-val-test 494 | (testing "updated-var-val" 495 | (is "bar bell" (updated-var-val "bar" 'bell)))) 496 | 497 | (deftest pruned-by-prefix-test 498 | (testing "pruned-by-prefix" 499 | (is (= #{} (pruned-by-prefix #{"f" "ff" "gf"} "x"))) 500 | (is (= #{} (pruned-by-prefix #{"22"} "1"))) ; Handle a too-long prefix. 501 | (is (= #{"f" "ff"} (pruned-by-prefix #{"f" "ff" "gf"} "f"))) 502 | (is (= #{"f" "ff" "gf"} (pruned-by-prefix #{"f" "ff" "gf"} ""))))) 503 | 504 | (deftest guard-template-vars-test 505 | (testing "guard-template-vars" 506 | (is (= '{*bar "bar", *a nil, *b nil} 507 | (guard-template-vars '{*bar "bar"} '(*a *b)))))) 508 | ) 509 | 510 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # token-matcher 2 | 3 | We provide token-based pattern matching for Clojure: match a template 4 | containing variables against an input string, creating appropriate 5 | bindings that a using application may process further. 6 | 7 | We offer the matcher as one capability towards Clojure-based natural 8 | language processing (NLP) in relatively narrow domains where 9 | hand-crafted solutions may be effective, including for tasks such 10 | as... 11 | 12 | - Acquiring knowledge via controlled natural language 13 | - Extracting knowledge from free text 14 | - Standardizing/summarizing conversational input. 15 | 16 | The matcher supports two kinds of template matching 17 | variables---collectively known as "tm-vars." In knowledge acquisition 18 | and extraction tasks, we have exploited a sort of bootstrapping 19 | process to populate and then refer to named sets (here called "kinds") 20 | of allowed instances (strings of one or more tokens) for set-linked 21 | tm-vars---"+vars". Other tm-vars---"*vars"---may be bound to 22 | instances apart from reference to any named set, may also be tasked 23 | (via annotation) to extend a named set with their binding---even 24 | dynamically, during the course of processing a single template. 25 | 26 | Tm-var annotations also admit raw Clojure expressions used as 27 | "restrictions" that qualify would-be-complete or partial tm-var 28 | instances or as "actions" evaluated for side effects. We wrap 29 | restriction or action expressions to make tm-vars' so-far-matched 30 | values (using `nil`, if unmatched) accessible. 31 | 32 | We call user- or application-defined "short-hand" functions with 33 | tm-var arguments to help define the attributes of annotated tm-vars. The 34 | short-hand functions act like macros whose returned content our parser 35 | splices into a match template before processing. 36 | 37 | Template-embedded lists headed by "control" keywords can be nested to 38 | turn on or off token matching case sensitivity and to specify 39 | optional, one-of-choice, or standard token-series content. 40 | 41 | Top-level interface macros allow application developers to define a 42 | function of a fixed template and an input string argument, in which 43 | the function body is wrapped similarly to restrictions and actions. 44 | 45 | These constructs contribute towards minimizing repetition of 46 | expressions among an application's authored templates, thus enhancing 47 | programmer productivity and product maintainability. 48 | 49 | ## Example operations 50 | 51 | In illustrative examples below, function `match` takes two 52 | arguments---a template and an input string---returning a hashmap of 53 | feasible bindings for tm-vars where such exist, else `nil`. Examples 54 | are taken from among tests in `test/token_matcher/core_test.clj`. 55 | 56 | Sections following these examples present detailed documentation. 57 | 58 | ### Plain tm-var examples 59 | 60 | ```clojure 61 | > (match "foo" "foo") 62 | {} ; Empty hashmap. 63 | 64 | > (match "foo" "bar") 65 | nil ; No match. 66 | 67 | > (match "*foo *foo" 68 | "bar bar") 69 | {*foo "bar"} ; Consistently bound. 70 | 71 | > (match "*foo *bar" 72 | "ho ho") 73 | {*foo "ho", *bar "ho"} 74 | 75 | > (match "do *something with *something-else" 76 | "do all the good you can with all you've got") 77 | {*something "all the good you can", 78 | *something-else "all you've got"} 79 | 80 | > ;;; +Var with (hypothetically) registered kinds: 81 | > (match '(+fruits to +nuts) 82 | "apples to filberts") 83 | {+fruits "apples", +nuts "filberts"} 84 | 85 | > (match '(+fruits to +nuts) 86 | "apples to apples") 87 | nil ; Apples are not nuts. 88 | 89 | > ;;; `thing` is the universal kind. 90 | > (match "+thing" "something") 91 | {+thing "something"} 92 | ``` 93 | 94 | ### Annotated tm-var examples 95 | ```clojure 96 | > ;;; +Var with inline kind: 97 | > (match '([+fruits {:kind #{"apples" "pumpkins" "pears"}}] 98 | to 99 | [+nuts {:kind #{"Brazils" "almonds" "filberts"}}]) 100 | "apples to filberts") 101 | {+fruits "apples", +nuts "filberts"} 102 | ``` 103 | 104 | #### Restriction examples 105 | ```clojure 106 | > (match '(*foo [*bar {:finally? (not (= *bar *foo))}]) 107 | "ho ho") 108 | nil ; They are equal. 109 | 110 | > (match '(*foo [*bar {:finally? (= *bar *foo)}]) 111 | "ho ho") 112 | {*foo "ho" *bar "ho"} 113 | ``` 114 | 115 | #### Short-hand attribute function call examples 116 | ```clojure 117 | ;;; Functions: 118 | 119 | (defn digit-string? [s] 120 | (re-matches #"\d*" s)) 121 | 122 | ;;; Restrict 'this' to a single, all-digit token. 123 | (defn digits-alone [attrs this] 124 | (assoc (assoc attrs :max-tokens 1) 125 | :finally? (conjoin-restrictions `(digit-string? ~this) 126 | (get attrs :finally?)))) 127 | 128 | ;;; Restrict 'this' to a single token of exactly n digits. 129 | (defn n-digits-alone [attrs this n] 130 | (assoc (assoc attrs :max-tokens 1) 131 | :finally? (conjoin-restrictions 132 | `(~'and ; Standardize 'and'. 133 | (digit-string? ~this) 134 | (= (count ~this) ~n)) 135 | (get attrs :finally?)))) 136 | 137 | ;;; To qualify a series of (comma-free, natural) integers. 138 | (defn digits-along [attrs this] 139 | (assoc attrs 140 | :each? (conjoin-restrictions `(digit-string? ~this) 141 | (get attrs :each?)))) 142 | 143 | 144 | > ;;; Expansions: 145 | 146 | > (parse-template '([*int (n-digits-alone {:finally? (> (read-string *int) 99)} 147 | *int 148 | 3)])) 149 | ([*int {:finally? (and (token-matcher.core/digit-string? *int) 150 | (clojure.core/= (clojure.core/count *int) 3) 151 | (> (read-string *int) 99)), 152 | :max-tokens 1}]) 153 | 154 | > (parse-template '([*ints (digits-along {} *ints)])) 155 | ([*ints {:each? (token-matcher.core/digit-string? *ints)}]) 156 | 157 | ;;; Tm-Vars with short-hand calls: 158 | 159 | > (match '([*int (n-digits-alone {:finally? (> (read-string *int) 99)} 160 | *int 161 | 3)]) 162 | "1001") 163 | nil ; Too many digits. 164 | > (match '([*int (n-digits-alone {:finally? (> (read-string *int) 99)} 165 | *int 166 | 3)]) 167 | "001") 168 | nil ; Too small a number. 169 | > (match '([*int (n-digits-alone {:finally? (> (read-string *int) 99)} 170 | *int 171 | 3)]) 172 | "101") 173 | {*int "101"} 174 | > (match '([*ints (digits-along {} *ints)]) "29 no") 175 | nil ; "no" not digits. 176 | > (match '([*ints (digits-along {} *ints)]) "29 010") 177 | {*ints "29 010"} 178 | ``` 179 | 180 | ### Template control examples: 181 | ```clojure 182 | > (binding [*case-sensitive* true] 183 | (match "Foo" "foo")) 184 | nil ; Cases don't match. 185 | 186 | > (binding [*case-sensitive* true] 187 | (match "(:-case Foo)" "foo")) 188 | {} 189 | 190 | > (match '((:optional *bar stool) 191 | [*quack (different-when-bound {} *quack *bar)]) 192 | "bar stool like a duck") 193 | {*bar "bar", *quack "like a duck"} 194 | 195 | > (match '((:choice foo bar)) "bar") 196 | {} 197 | ``` 198 | 199 | ### Defining form examples 200 | ```clojure 201 | > (defn-templating-strings list-outer-strings ["*front stuff *back" 202 | [phrase]] 203 | (list *front *back)) 204 | `#'``/list-outer-strings` 205 | > (list-outer-strings "make stuff up") 206 | ("make" "up") 207 | 208 | > (defn-templating-symbols list-outer-symbols ["*front stuff *back" 209 | [phrase]] 210 | (list *front *back)) 211 | `#'``/list-outer-symbols` 212 | > (list-outer-symbols "make stuff up") 213 | (make up) 214 | ``` 215 | ## Template grammar 216 | 217 | In production rules below, ... 218 | - Angle brackets surround a grammar \. 219 | - \+ denotes one or more of \. 220 | - \* denotes zero or more of \. 221 | - ":-" separates rules' left- and right-hand sides. 222 | - "|" separates right-hand sides alternatives. 223 | 224 | We present first standard, internal template form, then extend this to 225 | input template form accommodating short-hand function calls that 226 | should expand (during parsing) into internal form. 227 | 228 | ### Internal template form 229 | 230 | \ :- \ | \ 231 | 232 | \ :- `"`\*`"` 233 | 234 | \ :- `(`\*`)` 235 | 236 | \ :- \ | \ | `(`\ \*`)` 237 | 238 | \ :- \ | \ 239 | 240 | \: A non-empty string free of whitespace, free of 241 | leading and trailing chars in `*chars-to-strip*`, and free of chars in 242 | `*chars-to-isolate*`. For convenience, alternatively, when feasible, 243 | a symbol whose name is a token. 244 | 245 | \: A string of a single char included in 246 | `*chars-to-isolate*` (necessarily including `\(`, `\)`, `\[`, `\]`, 247 | `\{`, `\}`). 248 | 249 | \ :- \ | \ 250 | 251 | \: A symbol starting with `*` (*var) or `+` (+var). 252 | 253 | \ :- `[`\ \`]` 254 | 255 | \ :- `{`\<\ \\>*`}` 256 | 257 | \<\ \\> :- `:kind` \ | `:always?` 258 | \ | `:each?` \ | `:finally?` 259 | \ | `:finally.` \ 260 | 261 | \ :- \ | `#{`\+`}` 262 | 263 | \: A string of one or more tokens separated by spaces. 264 | 265 | \: Clojure expression evaluating truthy/falsey. 266 | 267 | \: Clojure expression to be evaluated for side effects. 268 | 269 | \ :- \ | \ 270 | 271 | \ :- `:+case` | `:-case` 272 | 273 | \ :- `:optional` | `:serial` | `:choice` 274 | 275 | ### Input template form: 276 | 277 | Short-hand function calls must expand to expected internal form. 278 | "+:-" below indicates an additional alternative for \, beyond 279 | those specified above using ":-". 280 | 281 | \ +:- `(`\ \*`)` 282 | 283 | \: Symbol naming a short-hand attribute function. 284 | 285 | \ :- \ | \ | \ 286 | 287 | \: Some Clojure datum. 288 | 289 | ## Token handling 290 | 291 | Templates can be either strings or lists. We use 292 | `clojure.edn/read-string` to parse string templates into list 293 | templates. 294 | 295 | We expect templates to be clean. Tokens should be free of chars in 296 | `*chars-to-strip*`, chars in `*chars-to-isolate*` should already be 297 | isolated, and all expressions should conform to the template language. 298 | 299 | To include in a template a token that would cause `read-string` 300 | to error out, stringify it---as in `"#this"` (in a list) or 301 | `\"'this\"` (in a string). Do the same for a token that the 302 | matcher would recognize as a tm-var but that you would like 303 | to match explicitly in input---as in `"*that"`. When `\'` is not 304 | in `*chars-to-strip*` and not in `*chars-to-isolate*`, do the same 305 | where `\'` begins a token---as in `"'twas"`. Otherwise, 306 | tokens (besides `nil`---see next) that `read-string` will 307 | recognize as symbols may be expressed as such in a list template. 308 | 309 | To include the token `"nil"` in a string template, explicitly quote 310 | it: `\"nil\"`. In a list template, use `"nil"`. 311 | 312 | List templates may not include multi-token strings, as in `("#this 313 | that 7.5")`. Stringify (double-quote) exceptional tokens 314 | individually, as in `("#this" that "7.5")`. 315 | 316 | To customize what (e.g., punctuation) characters get stripped 317 | automatically from adjacent tokens, edit or rebind dynamic (Clojure) 318 | var `*chars-to-strip*`. Strippable characters that are not adjacent 319 | to alphanumeric tokens are discarded. A single trailing quote mark 320 | (`\'`) is handled specially, per dynamic var 321 | `*allow-trailing-apostrophe*`. 322 | 323 | An application can also rebind dynamic vars... 324 | 325 | - `*token-substitutions*` (hashamp), to standardize selected 326 | tokens---e.g., to substitute `"a"` for `"an"` 327 | 328 | - `*chars-to-isolate*` (set), to isolate (e.g.) `\(` to `"("` 329 | 330 | - `*case-sensitive*` (Boolean), re template token to input token 331 | matching. 332 | 333 | We parse the second, string argument to `match` to create a list 334 | of normal tokens (stripping and isolating chars, per spec). 335 | 336 | ## Tm-vars 337 | 338 | Tm-vars either have matching mode input/output (*vars) or mode 339 | input-only (+vars). A *var may match any series of tokens. A +var's 340 | permitted matching token series are limited to a known set of 341 | instances. 342 | 343 | Only the template contains tm-vars (not the input string). 344 | 345 | A tm-var, unless in the scope an `:optional` or `:choice` control, must 346 | match at least one input string token. It may match a greater number 347 | of tokens. 348 | 349 | A tm-var occurring more than once in a template must match consistently. 350 | E.g., template `(*foo *foo)` will match input `"bar bar"` but not 351 | `"bar bell"`. We take a same-root-named pair of *var and +var (e.g., 352 | `*part` and `+part`) to be two different tm-vars not requiring such 353 | consistency. We recommend against later occurrences transcending 354 | scopes of earlier occurrences' containing`:optional` or `:choice` 355 | controls. Instead, use a restriction calling `same-when-bound`. 356 | 357 | ## +Vars 358 | 359 | We will know all of a +var's possible values before processing it in a 360 | template. As of v0.2, the matcher uses 361 | [clolog](https://github.com/bobschrag/clolog) to express and reason 362 | about kind instances and kind-subkind relationships. token-matcher 363 | provides a kind knowledge base API whose elements have the following 364 | docstrings. 365 | 366 | ```clojure 367 | (def ^:dynamic *kind-assertions* 368 | "The knowledge base of assertions for token-matcher kind reasoning. 369 | Initialized (in `initialize-kind-assertions`) to the (empty) 370 | initialized Prolog knowledge base." 371 | ;; ... 372 | ) 373 | 374 | (defn initialize-kind-assertions [] 375 | "Clears the kind knowledge base and adds assertions to support 376 | subkind reasoning." 377 | ;; ... 378 | ) 379 | 380 | (defn initialize-matcher [] 381 | "Currently, just initializes the kind knoweldge base." 382 | ;; ... 383 | ) 384 | 385 | (defn get-kind-instances [kind] 386 | "Returns the registered instances of `kind` (including instances of 387 | subkinds of `kind`)." 388 | ;; ... 389 | ) 390 | 391 | (defn add-kind-instance [kind instance] 392 | "Adds `instance` to `kind`." 393 | ;; ... 394 | ) 395 | 396 | (defn add-subkind [kind subkind] 397 | "Makes `subkind` a subkind of `kind`." 398 | ;; ... 399 | ) 400 | 401 | (defn install-kind-instance-map [kind-instance-map] 402 | "Clears the kind knowledge base and asserts instances from a map 403 | with kind keys and instance vector values. See 404 | `test/clolog/core_test.clj`." 405 | ;; ... 406 | ) 407 | ``` 408 | 409 | See `test/token-matcher/example-kb-api.clj` for our generic take on 410 | how a combined clolog/token-matcher application can interact with the 411 | Prolog knowledge base. 412 | 413 | ## Annotated tm-vars 414 | 415 | A tm-var can either be "plain"---e.g., a single Clojure symbol like 416 | `*foo` or `+bar`---or "annotated"---a symbol-and-attribute map Clojure 417 | vector like `[*foo {:kind "football"}]`. Supported attributes 418 | include... 419 | 420 | - Instance kind specification, as suggested above 421 | 422 | - We can handle different tm-vars of the same kind in a given 423 | template this way---as in `"[+subpart {:kind part}] is a part 424 | of [+superpart {:kind part}]"`. 425 | 426 | - Inline an instance set, for an ephemeral kind (not recorded 427 | on `*kind-instances*`)---as in `[+foo {:kind #{"any" "one 428 | of" "these"}}]`. This can be useful when the requirement for 429 | a given set is specific to a given template, and there is no 430 | apparent benefit to giving the set a name on 431 | `*kind-instances*`. We support this form only for a +var. 432 | 433 | - When attribute `:kind` is included in a *var's annotation, we add 434 | its binding to the instance registry for the specified kind. 435 | 436 | - To accommodate backtracking search, the matcher will extend the 437 | instance registry only locally, until the binding finally is 438 | included in a recognized match. 439 | 440 | - A template's earlier *var instances discovered thus can be 441 | available to its later +vars specifying the same kind. 442 | 443 | - An inline set is not an appropriate value for a *var's `:kind` 444 | attribute. 445 | 446 | - Token cardinality restrictions, as in `[*book_2 {:kind book 447 | :min-tokens` \ `:max-tokens` \`}]`, where both \ 448 | and \ are integers \>= 1 449 | 450 | - Instance restrictions---Clojure forms that must evaluate 451 | truthy (i.e., other than `false` or `nil`) to qualify a 452 | partial or full candidate binding. Three varieties: 453 | 454 | - `{:always?` \`}`, to check \ each time a 455 | tm-var's candidate binding is extended with a next input token. So, 456 | `[*foo {:max-tokens 2}]` could alternatively be specified as `[*foo 457 | {:always? (<= (count-tokens *foo) 2)}]`. 458 | 459 | - `{:each?` \`}`, to check \ applied just to 460 | the next token that would extend a tm-var's candidate binding. See 461 | our definition of `digits-along`, for some motivation. 462 | 463 | - `{:finally?` \`}`, to check \ only when a 464 | tm-var's candidate binding is complete. So, [*foo {:min-tokens 465 | 2}] could be specified as `[*foo {:finally? (>= (count-tokens 466 | *foo) 2)}]`. 467 | 468 | Such forms may refer to earlier tm-vars (in doing so, 469 | will reference their bindings) or to a thread's Clojure vars. 470 | Example, when `*part_1` occurs earlier in a template: 471 | `{:finally? (not+ *part_1 *part_2)}`. In this setting, we 472 | bind any vars as yet unbound by the matcher to `nil`, so using 473 | code can branch on that. See, e.g., our definition for 474 | `different-when-bound`. Call functions that are closures to 475 | access local content in their captured scope. See our test 476 | `template-local-test`. 477 | 478 | ```clojure 479 | (defn different-when-bound [attrs this other] 480 | (assoc attrs 481 | :finally? (conjoin-restrictions `(if (and ~other ~this) 482 | (not= ~this ~other) 483 | true) 484 | (get attrs :finally?)))) 485 | ``` 486 | 487 | We accommodate compound conditions via `and`, `or`, `not`, ... 488 | 489 | - Instance actions---Clojure forms to be evaluated for their side 490 | effects, once a binding value has been qualified finally. 491 | 492 | In this setting, also, we bind any elided optional-scope tm-vars to 493 | `nil`. (`different-when-bound` remains a good reference, form-wise. 494 | Replace the truthy condition with some side effect. Call 495 | `adjoin-actions` rather than `conjoin-restrictions`.) 496 | 497 | Example: `{:finally. (println (format-cl "Matched *part to 498 | \'~a\'..." *part))}`. 499 | 500 | Wrap multiple actions in `do`. 501 | 502 | Given that even "qualified finally" bindings may be backtracked out 503 | of without having been included in a complete match, we recommend 504 | deferring material actions to a template's final tm-var, if not to a 505 | calling application (considering that a given template and input 506 | string may not match uniquely). 507 | 508 | token-matcher binds `clolog.core/*assertions*` (clolog's Prolog 509 | knowledge base---KB) to `*kind-assertions*`. If, in your actions, 510 | you'd like any Prolog operations to pertain to a different KB, wrap 511 | your actions in an appropriate context binding (via `binding`) 512 | `clolog.core/*assertions*`. 513 | 514 | An action might connect multiple tm-var's bindings in a logical 515 | assertion, e.g. As we do with *var-based dynamic kind instance 516 | registration, you might consider treating any within-template, KB 517 | actions as provisional until a match is complete and you've had a 518 | chance to review their effects. Consider modifying our source code 519 | to... 520 | 521 | - Fork your KB for backtracking search, as we do in 522 | `match-var-finally` 523 | 524 | - Track cumulative KB modifications, like our tracking of 525 | matcher-added assertions, in `match-var-finally` and pervasively 526 | via `def-match-fn`-based argument `:assertions`, finally reported 527 | along with a matcher answer in `match-constructs`. 528 | 529 | When a tm-var occurs more than once in a template, a later occurrence's 530 | annotation matters only if all earlier occurrences have been elided, 531 | per their scope within `:optional` or `:choice` controls. (We 532 | recommend against multiple occurrences transcending scopes of earlier 533 | occurrences' containing `:optional` or `:choice` controls. Instead, 534 | use our short-hand function `same-when-bound`.) Otherwise, later 535 | occurrences must share an earliest occurrence's binding. 536 | 537 | ## Attribute short-hand functions 538 | 539 | Instead of an explicit attribute map, an annotated tm-var may have a call 540 | to an attribute "short-hand" function. The spec `[*int (digits-alone 541 | {} *int)]`, e.g., expands at parse time to the attribute map `[*int 542 | {:max-tokens 1, :finally? (digit-string? *int)}]`. This map's effect 543 | is to restrict `*int`'s binding to a single token, all of whose 544 | characters are (per the regex uesd in `digit-string?`) digits. 545 | 546 | ```clojure 547 | (defn digit-string? [s] 548 | (re-matches #"\d*" s)) 549 | 550 | (defn digits-alone [attrs this] 551 | (assoc (assoc attrs :max-tokens 1) 552 | :finally? (conjoin-restrictions `(digit-string? ~this) 553 | (get attrs :finally?)))) 554 | ``` 555 | 556 | To keep templates uncluttered, we walk them to quote short-hand 557 | function calls' arguments automatically. To test a function 558 | like `digits-alone` outside the matcher, quote the arguments 559 | manually as necessary, as in `(digits-alone {} '*int)`. 560 | 561 | As you can see in our definitions for `digits-alone` and related 562 | functions and tests, an attribute short-hand function... 563 | 564 | - Should include at least an argument (we use `this`) to 565 | accommodate the plain version of the current tm-var (e.g., `*int`). 566 | 567 | - May include arguments to accommodate any tm-vars in using 568 | templates. 569 | 570 | - May include (as all ours do) an argument for an 571 | existing (perhaps empty) attribute map, thus supporting 572 | composition of short-hand function calls. 573 | 574 | - May include other arguments as needed. See our definition for 575 | `n-digits-alone`. 576 | 577 | ```clojure 578 | (defn n-digits-alone [attrs this n] 579 | (assoc (assoc attrs :max-tokens 1) 580 | :finally? (conjoin-restrictions 581 | `(~'and 582 | (digit-string? ~this) 583 | (= (count ~this) ~n)) 584 | (get attrs :finally?)))) 585 | ``` 586 | 587 | ## Template controls 588 | 589 | Template controls can surround any terms and can be nested. 590 | 591 | The mode control `:+case` arranges for its contained terms to be 592 | processed with dynamic var `*case-sensitive*` bound to `true`, 593 | `:-case` to `false`. 594 | 595 | The sequence control `:optional` allows its content to be elided. 596 | 597 | `:choice` allows any one (and only one) of its top-level content 598 | items. Matching considers these items in the order listed. 599 | 600 | `:series` requires all of its top-level items, in order. Ths can be 601 | useful within the scope of a `:choice` control. A top-level templates 602 | implicitly is contained by a `:series` control. 603 | 604 | ## Template-matching interfaces 605 | 606 | We provide several interfaces for interacting with the matcher. 607 | 608 | - The function `match` returns either a single match (a hashmap 609 | associating tm-vars with token strings) or `nil` (for no match). 610 | `match` returns the first match discovered when binding as as many 611 | tokens as possible to each tm-var when processing templates from 612 | left to right. 613 | 614 | - `match-details` is like `match` but returns either `nil` or a 615 | two-element vector including... 616 | 617 | - The result as from `match` 618 | 619 | - A vector of assertions that were added, ephemerally, to 620 | `*kind-assertions*`, to realize the result. We leave it up to 621 | you/your application to add these assertions as warranted. 622 | 623 | - `match-uniquely` is like `match-details` but raises an error if 624 | there is more than one match. 625 | 626 | - Most general is the function `matches` that returns a set of either 627 | all results as from `match-details` or up to a specified number of 628 | such results. 629 | 630 | Our intuition is that applications will require either... 631 | 632 | - A single match if it exists (`match` or `match-details` should 633 | suffice.) 634 | 635 | - All matches. (Use `matches` with no limit.) 636 | 637 | - A single match and whether it is unique. Use `matches` with a 638 | value of `2` for argument `limit`, as in 639 | `match-uniquely`. Considering that if a returned match were not 640 | unique, there could be some undesirable ambiguity in a using 641 | application's templates and/or the presented input strings, 642 | `matches` affords the opportunity either to accept (say, `reset!` 643 | the global version) or discard the returned version of 644 | `*kind-instances*`. 645 | 646 | Multiple matches may be appropriate in some applications, not in 647 | others (e.g., knowledge acquisition via controlled natural 648 | language). Multiple matches consistent with a given template/input 649 | pair are more likely when either... 650 | 651 | - The template has optional tm-vars. 652 | 653 | - The template has consecutive tm-vars---especially consecutive 654 | *vars. 655 | 656 | - The input includes consecutive like tokens that may match a tm-var. 657 | 658 | - `match-pre-parsed` and `matches-pre-parsed`---versions of 659 | `match-details` and `matches` that skip the work of parsing, in case 660 | a template may be applied repeatedly. 661 | 662 | - Two macros for defining functions that match an input argument to a 663 | fixed template and expose the template's tm-vars and matched values 664 | as locals in the defined function's body. Here again, we bind any 665 | elided optional-scope tm-vars to `nil`. So far, these macros use only 666 | `match` (not `match-details` or `matches`). 667 | 668 | - `defn-templating-strings` 669 | 670 | - `defn-templating-symbols` 671 | 672 | See examples in the large comment in the source file's section 673 | "Template-matching defining forms," also the helper functions 674 | `tokens->symbol`, `symbol->tokens` that an application may benefit 675 | from. This is the sole method of calling the matcher that we have 676 | used in our own (proprietary) application. 677 | 678 | - REPL-serviceable macros `with-matching-template-strings`, 679 | `with-matching-template-symbols`. 680 | 681 | ## Future work ideas 682 | 683 | We might pursue some of the following ideas towards increasing matcher 684 | expressivity, efficiency, robustness, and scale, given motivating use 685 | cases. 686 | 687 | ### Ideas for expressivity 688 | 689 | We might... 690 | 691 | - Enable short-hand function calls at the sequence level, splicing in 692 | results to yield standard, internal form. This is an easy lift, 693 | awaiting a real-world use case where it clearly furthers code 694 | duplication avoidance. 695 | 696 | - Support explicit anonymous tm-vars (say, `*_`, `+_`) for which 697 | no bindings are recorded (so, for which consistency across 698 | occurrences is not required). 699 | 700 | - Handle anonymous tm-vars implicit in the use of sequence controls. 701 | Consider the following hypothetical call---free of explicit 702 | tm-vars---and its resulting hypothetical match. 703 | 704 | ```clojure 705 | > (match '(:series this 706 | (:optional (:choice always never)) 707 | (:choice is was) 708 | (:choice going (:series starting 709 | (:choice before after) 710 | (:choice daylight midnight))) 711 | to be confusing) 712 | "this is starting before midnight to be confusing") 713 | {*:s1 "this is starting before midnight to be confusing" 714 | +:s1.o1 nil, 715 | *:s1.o1.c1 nil, 716 | +:s1.c2 "is", 717 | +:s1.c3 "starting before midnight", 718 | *:s1.c3.s1 "starting before midnight", 719 | +:s1.c3.s1.c1 "before", 720 | +:s1.c3.s1.c2 "midnight"} 721 | ``` 722 | 723 | We suppose a using application might benefit by examining such 724 | bindings for sequence context tm-vars. 725 | 726 | Delivering tm-var-free instance bindings (as above) for series 727 | containing choices may involve initially (internally) including and 728 | then systematically de-referencing such tm-vars. E.g., an initial 729 | binding for `*:s1.c3.s1` might be `"starting +:s1.c3.s1.c1 730 | +:s1.c3.s1.c2"` (perhaps more efficiently left as the list 731 | `(starting +:s1.c3.s1.c1 +:s1.c3.s1.c2)`). 732 | 733 | - Return either longest or shortest tm-var bindings first. With 734 | supposed dynamic var `*shortest-first*` (and controls `:shortest`, 735 | `:longest`), prefer eliding optional content. Handle corresponding 736 | controls `:shortest`, `:longest` like `:+case`, `:-case`. Again, 737 | this is an easy lift for which we await a use case it would clearly 738 | matter in. 739 | 740 | ### Ideas for efficiency 741 | 742 | Some ideas here might be advantageous for some use cases or 743 | application mixes, depending on indexing and compilation overhead. 744 | 745 | - Represent instances for a large-cardinality kind, rather than in a 746 | flat set as here, in an efficiently traversable trie (i.e., a token 747 | prefix tree---ideally with a hashamap of allowed next tokens at each 748 | prefix node). Then replace our winnowing down of `current-+set` 749 | with successive trie queries. 750 | 751 | - Represent templates in a trie---towards identifying which templates 752 | can match a given input string. 753 | 754 | - Index templates' tokens to obviate unnecessary matching work. 755 | 756 | - At minimum, ensure each of a template's unelidable tokens occurs 757 | in input (non-empty set difference). 758 | 759 | - Note (compile templates') indices of occurrences within a template 760 | control scope. Ensure that (e.g.)... 761 | 762 | - Templates' and inputs' consecutive tokens are consistent, 763 | pairwise. 764 | 765 | - The remaining indices of a template or control branch thereof do 766 | not exceed corresponding tokens' indices in input. 767 | 768 | - Perhaps in addition to indexing, ... 769 | 770 | - Compute and incrementally track a template's token capacity---the 771 | maximum number of input tokens it can match. Because a single 772 | *var can match infinitely many tokens, this makes sense only when 773 | a template (or traversed branch thereof) necessarily ends in a 774 | token or a +var---or (if we'd like to consider processing 775 | templates and inputs in reverse) begins so. 776 | 777 | - Track an input's remaining token load, then fail early, should 778 | load exceed capacity. Manage also the symmetric case, to catch 779 | (e.g.) a template with more necessary (unelidable) tokens and tm-vars 780 | than its corresponding input has tokens. 781 | 782 | - Employ Clojure's transient collections where appropriate internally, 783 | returning persistent results. 784 | 785 | - Consider both laziness and parallelism in multiple match 786 | processing (using `matches`). 787 | 788 | ### Ideas for robustness 789 | 790 | To make the matcher accessible to a broader range of users and for a 791 | broader range of use cases, we might... 792 | 793 | - Perform systematic error checking and exception handling. 794 | 795 | - Employ `recur` or `trampoline` for tail recursion optimization, to 796 | obviate deep call stacks. 797 | 798 | - Enhance tests by arranging for them to bind explicit values for the 799 | dynamic Clojure vars we've offered for user/application 800 | customization---rather than just assuming our default values as in 801 | most tests now. Consider customizing `deftest`, perhaps differently 802 | for different groups of tests, towards economizing expression in 803 | individual tests. 804 | 805 | ### Ideas for scale 806 | 807 | Consider a trie database of kinds/types---ideally one capable of 808 | optionally case-sensitive queries. Address logical subtype and 809 | subpredicate reasoning using this database. 810 | 811 | ## Limitations 812 | 813 | Accommodating large kinds could commend adding clolog support for unit 814 | ground assertions in a database. 815 | 816 | The matcher is recursive and has not been architected to support 817 | `trampoline`---it backtracks via Clojure `or`, not by passing a 818 | continuation (the way clolog does). As such, feasible match length is 819 | bounded by the Clojure stack limit. One way to overcome this might be 820 | to rewrite the matcher's logic in clolog. 821 | 822 | ## License 823 | 824 | Copyright © 2023 Robert Carl Schrag 825 | 826 | This program and the accompanying materials are made available under 827 | the terms of the Eclipse Public License 2.0 which is available at 828 | http://www.eclipse.org/legal/epl-2.0. 829 | 830 | This Source Code may also be made available under the following 831 | Secondary Licenses when the conditions for such availability set forth 832 | in the Eclipse Public License, v. 2.0 are satisfied: GNU General 833 | Public License as published by the Free Software Foundation, either 834 | version 2 of the License, or (at your option) any later version, with 835 | the GNU Classpath Exception which is available at 836 | https://www.gnu.org/software/classpath/license.html. 837 | 838 | ## Acknowledgements 839 | 840 | Thanks to folks at Franz (Inc.) and at Elemental Cognition (Inc.) for 841 | posing inspiring problems and to folks on Clojurians Slack channels 842 | for gracious assistance. 843 | -------------------------------------------------------------------------------- /src/token_matcher/core.clj: -------------------------------------------------------------------------------- 1 | (ns token-matcher.core 2 | (:require [clolog.core :as pl :refer :all] 3 | [clojure.pprint :refer [pprint cl-format]] 4 | [clojure.string :as str] 5 | [clojure.set] 6 | [riddley.walk :refer [walk-exprs]] 7 | )) 8 | 9 | ;;;;; Preliminaries: 10 | ;;; 11 | ;;; Source code variables, functions, and macros are defined before 12 | ;;; they are mentioned. For top-down comprehension, perhaps read this 13 | ;;; file starting at the bottom? Section title comments run in both 14 | ;;; directions: ":" for reading down, " ^^" for reading up---around a 15 | ;;; long line of dashes. (See the first such a few lines below.) 16 | 17 | ;;;;; Preliminaries ^^ 18 | ;;;;;---------------------------------------------------------------- 19 | ;;;;; Token handling: 20 | 21 | (defn parse-string-template 22 | "Parse string `template`, returning a list template." 23 | [template] 24 | (let [r (java.io.PushbackReader. (java.io.StringReader. template))] 25 | (take-while (complement #{::eof}) 26 | (repeatedly #(read {:eof ::eof} r))))) 27 | 28 | (defn- standard-split 29 | "Split `s` at whitespace chars, returning a vec of (unnormalized) 30 | tokens." 31 | [s] 32 | (str/split (str/triml s) #"\s+")) 33 | 34 | (def ^:dynamic *chars-to-isolate* 35 | "The set of chars to be isolated as single-char tokens." 36 | ;; Don't remove any of these chars: 37 | #{\( \) \[ \] \{ \}}) 38 | 39 | (defn- isolate-declared-chars 40 | "Perform a standard split, after isolating chars in 41 | `*chars-to-isolate*` to single-char tokens." 42 | [s] 43 | (standard-split (apply str (mapcat (fn [char] 44 | (if (contains? *chars-to-isolate* char) 45 | (list \space char \space) 46 | (list char))) 47 | (seq s))))) 48 | 49 | ;;; Dynamic, so that a using application can override with 50 | ;;; (binding [*chars-to-isolate* ...] ...). 51 | (def ^:dynamic *chars-to-strip* 52 | "The set of chars normalization will strip when they lead or trail an 53 | input token." 54 | #{\, \. \; \: \? \! \" \'}) 55 | 56 | (defn- strippable? [char] 57 | (contains? *chars-to-strip* char)) 58 | 59 | (defn- strip-leading-chars 60 | "Strip strippable leading chars from token." 61 | [s] 62 | (if-not (seq s) 63 | s 64 | (if (strippable? (first s)) 65 | ;; Strip off leading character. 66 | (strip-leading-chars (subs s 1)) 67 | ;; Nothing to strip. 68 | s))) 69 | 70 | (def ^:dynamic *allow-trailing-apostrophe* false) 71 | 72 | (defn- strip-trailing-chars 73 | "Strip strippable trailing chars from token." 74 | [s] 75 | (if-not (seq s) 76 | s 77 | ;; Is the last character strippable? 78 | (let [trailing-char (last s)] 79 | (if (and (strippable? trailing-char) 80 | (if (and (contains? *chars-to-strip* \') 81 | ;; Not going so far for now as to allow only 82 | ;; one... 83 | *allow-trailing-apostrophe*) 84 | (not= trailing-char \') 85 | true)) 86 | ;; Strip off trailing character. 87 | (strip-trailing-chars (subs s 0 88 | (- (count s) 89 | 1))) 90 | ;; Nothing to strip. 91 | s)))) 92 | 93 | ;;; Use a string-key/value hashmap, e.g., 94 | ;;; {"can't" "cannot", "big" "large"}. Keys must be lower-case. 95 | ;;; TODO: Accommodate upper-case keys. 96 | (def ^:dynamic *token-substitutions* {}) 97 | 98 | (defn- normalize-token 99 | "Strip token, apply single-token substitutions." 100 | [token] 101 | (let [token (strip-leading-chars token) 102 | token (strip-trailing-chars token) 103 | token (or (get *token-substitutions* (str/lower-case token)) 104 | ;; Leave anything else alone. 105 | token)] 106 | token)) 107 | 108 | (defn- parse-input-string 109 | "Return a vector of the non-empty, normalized tokens of `s`." 110 | [s] 111 | (let [tokens (isolate-declared-chars s) 112 | tokens (map normalize-token tokens) 113 | tokens (filter #(not= % "") tokens)] 114 | (vec tokens))) 115 | 116 | ;;;;; Token handling ^^ 117 | ;;;;; ---------------------------------------------------------------- 118 | ;;;;; Kind instance registration: 119 | 120 | ;;; We use clolog to register a kind's instance strings that a 121 | ;;; template's +var will match against. 122 | 123 | (def ^:dynamic *kind-assertions* 124 | "The knowledge base of assertions for token-matcher kind reasoning. 125 | Initialized (in `initialize-kind-assertions`) to the (empty) 126 | initialized Prolog knowledge base." 127 | ;; An atom with a placeholder value. 128 | (atom {})) 129 | 130 | (defn initialize-kind-assertions [] 131 | "Clears the kind knowledge base and adds assertions to support 132 | subkind reasoning." 133 | (binding [*assertions* *kind-assertions*] 134 | (initialize-prolog) 135 | ;; Sample application knoweldge (facts): 136 | (comment 137 | (binding [*assertions* *kind-assertions*] 138 | (<- (has-subkind "vertebrate" "mammal")) 139 | (<- (has-subkind "mammal" "primate")) 140 | (<- (has-subkind "primate" "human")) 141 | ;; Idea is to handle instance mentions at any level. 142 | (<- (has-kind "Bob" "vertebrate")) 143 | (<- (has-kind "Sally" "mammal")) 144 | (<- (has-kind "Joe" "primate")) 145 | (<- (has-kind "Freida" "human")))) 146 | ;; Rules. 147 | (<- (has-kind* ?instance ?kind) 148 | (has-kind ?instance ?kind)) 149 | (<- (has-kind* ?instance ?kind) 150 | (has-subkind* ?kind ?subkind) 151 | (has-kind ?instance ?subkind)) 152 | ;; `has-subkind` is non-transitive (to avoid infinite recursion) . 153 | (<- (has-subkind* ?kind ?subkind) 154 | (has-subkind ?kind ?subkind)) 155 | ;; This assertion of `has-subkind*` is transitive. 156 | (<- (has-subkind* ?kind ?subsubkind) 157 | (has-subkind ?kind ?subkind) 158 | (has-subkind* ?subkind ?subsubkind)))) 159 | 160 | (defn initialize-matcher [] 161 | "Currently, just initializes the kind knoweldge base." 162 | (initialize-kind-assertions)) 163 | 164 | (declare symbol->instance) 165 | 166 | (defn get-kind-instances [kind] 167 | "Returns the registered instances of `kind` (including instances of 168 | subkinds of `kind`)." 169 | (binding [*assertions* *kind-assertions*] 170 | (set (query '?instance 171 | ;; Introduce `thing`, the universal kind. 172 | (if (= kind 'thing) 173 | '((has-kind ?instance ?_kind)) 174 | `((~'evals-from? ~'?kind (quote ~kind)) ; TODO: `->?` 175 | (~'has-kind* ~'?instance ~'?kind))))))) 176 | 177 | (defn add-kind-instance [kind instance] 178 | "Adds `instance` to `kind`." 179 | (binding [*assertions* *kind-assertions*] 180 | (let [assertion `((~'has-kind ~instance ~kind))] 181 | ;; We want to avoid duplicates. 182 | (assert<-_ assertion) 183 | assertion))) 184 | 185 | (defn add-subkind [kind subkind] 186 | "Makes `subkind` a subkind of `kind`." 187 | (binding [*assertions* *kind-assertions*] 188 | (let [assertion `((~'has-subkind ~kind ~subkind))] 189 | ;; Avoid duplicates. 190 | (assert<-_ assertion) 191 | assertion))) 192 | 193 | (defn install-kind-instance-map [kind-instance-map] 194 | "Clears the kind knowledge base and asserts instances from a map 195 | with kind keys and instance vector values. See 196 | `test/clolog/core_test.clj`." 197 | (initialize-kind-assertions) 198 | (doseq [kind-instances (vec kind-instance-map)] 199 | (let [[kind instances] kind-instances] 200 | (doseq [instance instances] 201 | (add-kind-instance kind instance))))) 202 | 203 | ;;;;; Kind instance registration ^^ 204 | ;;;;; ---------------------------------------------------------------- 205 | ;;;;; Variables: 206 | 207 | (defn- plain-*var? [construct] 208 | (if (and (symbol? construct) 209 | ;; 'name' here strips down to the simple symbol. 210 | (= (nth (name construct) 0) \*)) 211 | true 212 | ;; Harmonize our match functions' results to use nil, rather than 213 | ;; false. 214 | nil)) 215 | 216 | (defn- annotated-*var? [construct] 217 | (if (and (vector? construct) 218 | (plain-*var? (first construct))) 219 | true 220 | nil)) 221 | 222 | (defn- *var? [construct] 223 | (if (or (plain-*var? construct) 224 | (annotated-*var? construct)) 225 | true 226 | nil)) 227 | 228 | (defn- plain-+var? [construct] 229 | (if (and (symbol? construct) 230 | (= (nth (name construct) 0) \+)) 231 | true 232 | nil)) 233 | 234 | (defn- annotated-+var? [construct] 235 | (if (and (vector? construct) 236 | (plain-+var? (first construct))) 237 | true 238 | nil)) 239 | 240 | (defn- +var? [construct] 241 | (if (or (plain-+var? construct) 242 | (annotated-+var? construct)) 243 | true 244 | nil)) 245 | 246 | (defn- annotated-var? [construct] 247 | (or (annotated-*var? construct) 248 | (annotated-+var? construct))) 249 | 250 | (defn- plain-var? [construct] 251 | (or (plain-*var? construct) 252 | (plain-+var? construct))) 253 | 254 | ;;; Not "var?", so that we don't shadow clojure.core/var?. 255 | (defn- tm-var? [construct] 256 | (or (*var? construct) (+var? construct))) 257 | 258 | (defn- plain-var 259 | "Return the plain var symbol of a potentially annotated var." 260 | [var] 261 | (if (annotated-var? var) 262 | (first var) 263 | var)) 264 | 265 | ;;; For var, e.g., '+foo', return symbol 'foo'. 266 | (defn- var->root-symbol 267 | "Return the symbol resulting from stripping the leading `*` or `+` 268 | from a var symbol's name." 269 | [var] ; *var or +var 270 | (clojure.edn/read-string (subs (str var) 1))) 271 | 272 | (defn- +var-kind [var] 273 | (if (annotated-+var? var) 274 | (:kind (second var)) 275 | (var->root-symbol var))) 276 | 277 | (defn- +var-kind-instances [var] 278 | (let [kind (+var-kind var)] 279 | (if (set? kind) 280 | ;; Inline kind instances (no registered kind name). 281 | kind 282 | ;; Registered kind usage. 283 | (when kind (get-kind-instances kind))))) 284 | 285 | (defn- *var-kind [var] 286 | (if (annotated-*var? var) 287 | (:kind (second var)) 288 | nil)) 289 | 290 | (defn- var-kind [var] 291 | (if (+var? var) 292 | (+var-kind var) 293 | ;; Else. 294 | (*var-kind var))) 295 | 296 | (defn- var-attributes [var] 297 | (when (annotated-var? var) 298 | (or (second var) {}))) 299 | 300 | ;;;;; Variables ^^ 301 | ;;;;; ---------------------------------------------------------------- 302 | ;;;;; Attribute short-hand functions: 303 | 304 | ;;; [*foo (num-tokens-range {} 1 3)] 305 | (defn num-tokens-range [attrs min max] 306 | (assoc (assoc attrs :min-tokens min) :max-tokens max)) 307 | 308 | ;;; 'earlier' to be evaluated before 'later'. 309 | (defn conjoin-restrictions [earlier later] 310 | (if (nil? later) 311 | earlier 312 | (if (nil? earlier) 313 | later 314 | ;; Else neither is nil. 315 | (let [later-conjuncts (if (= (first later) 'and) 316 | (rest later) 317 | (list later)) 318 | earlier-conjuncts (if (= (first earlier) 'and) 319 | (rest earlier) 320 | (list earlier))] 321 | `(~'and ~@earlier-conjuncts ~@later-conjuncts))))) 322 | 323 | (defn adjoin-actions [later earlier] 324 | (if (nil? later) 325 | earlier 326 | (if (nil? earlier) 327 | later 328 | ;; Else neither is nil. 329 | (let [later-statements (if (= (first later) 'do) 330 | (rest later) 331 | (list later)) 332 | earlier-statements (if (= (first earlier) 'do) 333 | (rest earlier) 334 | (list earlier))] 335 | `(~'do ~@earlier-statements ~@later-statements))))) 336 | 337 | (defn digit-string? [s] 338 | ;; Encapsulate the regex inside this function, because expressions 339 | ;; involving regexes are hard to test (because regexes have only 340 | ;; identity equality). 341 | (re-matches #"\d*" s)) 342 | 343 | ;;; Restrict 'this' to a single, all-digit token. 344 | (defn digits-alone [attrs this] 345 | (assoc (assoc attrs :max-tokens 1) 346 | :finally? (conjoin-restrictions `(digit-string? ~this) 347 | (:finally? attrs)))) 348 | 349 | ;;; Restrict 'this' to a single token of exactly n digits. 350 | (defn n-digits-alone [attrs this n] 351 | (assoc (assoc attrs :max-tokens 1) 352 | :finally? (conjoin-restrictions 353 | `(~'and ; Standardize 'and'. 354 | (digit-string? ~this) 355 | (= (count ~this) ~n)) 356 | (:finally? attrs)))) 357 | 358 | (defn same-when-bound [attrs this other] 359 | (assoc attrs 360 | :finally? (conjoin-restrictions `(if (and ~other ~this) 361 | (= ~this ~other) 362 | true) 363 | (:finally? attrs)))) 364 | 365 | (defn different-when-bound [attrs this other] 366 | (assoc attrs 367 | :finally? (conjoin-restrictions `(if (and ~other ~this) 368 | (not= ~this ~other) 369 | true) 370 | (:finally? attrs)))) 371 | 372 | ;;; To qualify a series of (comma-free, natural) integers. 373 | (defn digits-along [attrs this] 374 | (assoc attrs 375 | :each? (conjoin-restrictions `(digit-string? ~this) 376 | (:each? attrs)))) 377 | ;;; The ':each?' definition above is more efficient than the 378 | ;;; ':finally?' version commented out below. 379 | (comment 380 | (defn digits-along [attrs this] 381 | (assoc attrs 382 | :finally? (conjoin-restrictions `(every? (digit-string? ~this) 383 | (instance->tokens ~this)) 384 | (:finally? attrs))))) 385 | 386 | ;;;;; Attribute short-hand functions ^^ 387 | ;;;;; ---------------------------------------------------------------- 388 | ;;;;; Template matcher (core): 389 | 390 | (defn- expand-short-hand-attributes [annotated-var] 391 | (let [var-sym (first annotated-var) 392 | attrs (var-attributes annotated-var)] 393 | (if (seq? attrs) 394 | (let [fn-sym (first attrs) 395 | args (rest attrs)] 396 | ;; Quote the shorthand function's var args. 397 | [var-sym (eval `(~fn-sym 398 | ~@(map #(list 'quote %) 399 | args)))]) 400 | annotated-var))) 401 | 402 | (defn parse-template [template] 403 | (let [template (if (string? template) 404 | (parse-string-template template) 405 | template) 406 | template (if (= (first template) :series) 407 | (list template) 408 | template)] 409 | (walk-exprs annotated-var? 410 | expand-short-hand-attributes 411 | template))) 412 | 413 | ;;; We have an already-in-process var. 414 | (defn- updated-var-val [current-val with-token] 415 | (if (= current-val "") 416 | with-token 417 | (str current-val " " with-token))) 418 | 419 | (def ^:dynamic *case-sensitive* false) 420 | 421 | (defn- -case-instance= [template-construct input-token] 422 | (= (str/lower-case 423 | ;; Can be called on empty lists. 424 | (or template-construct "")) 425 | (str/lower-case (or input-token "")))) 426 | 427 | (defn- +case-instance= [template-construct input-token] 428 | ;; No case deviation tolerance. 429 | (= template-construct input-token)) 430 | 431 | (defn- instance= [template-construct input-token] 432 | (if *case-sensitive* 433 | (+case-instance= template-construct input-token) 434 | (-case-instance= template-construct input-token))) 435 | 436 | (defn- hasPrefix [s prefix] 437 | (let [string-size (count s) 438 | prefix-size (count prefix)] 439 | (when (>= string-size prefix-size) 440 | (instance= (subs s 0 prefix-size) prefix)))) 441 | 442 | ;;; Later, search using a trie. For now, we'll brute-force 443 | ;;; search (what's left of) the (selected-down) set. 444 | ;;; 445 | ;;; Return the subset of strings (also a set) having the prefix. 446 | (defn- pruned-by-prefix [set-of-strings prefix] 447 | (clojure.set/select (fn [item] (hasPrefix item prefix)) 448 | set-of-strings)) 449 | 450 | ;;; Given a +var, we'd like to bind some match of a kind instances. 451 | ;;; 452 | ;;; We could prefer a longest or a shortest going-forward match. Here 453 | ;;; we've gone with longest. 454 | ;;; 455 | ;;; We must examine one input token at a time, traversing the input 456 | ;;; string up to next-template-construct, if that's not a var, or calling 457 | ;;; match-current-var, if it is a var. Either way, we must augment 458 | ;;; bindings as we go. 459 | ;;; 460 | ;;; Compared to handling a *var, handling a +var requires... 461 | ;;; 462 | ;;; - Initially calling with a freshly looked up current-+set 463 | ;;; 464 | ;;; - Failing when current-+set is empty 465 | ;;; 466 | ;;; - Failing rather than recursing when current-var's value on 467 | ;;; bindings is not in current-+set. This saves us from a partial 468 | ;;; match (e.g., "foo" when only "foo bar" is in the set. 469 | ;;; 470 | ;;; - Pruning current-+set upon consuming an input token. 471 | 472 | ;;; This clearly won't scale. We'd ultimately like an instance 473 | ;;; retrieval facility to handle case sensitivity. 474 | (defn- downcase-set [s] 475 | (into #{} (map str/lower-case s))) 476 | 477 | ;;; Returns nil, if current-+set should not be active. 478 | (defn- initialize-current-+set [current-var bindings] 479 | (if-let [current-binding (get bindings (plain-var current-var))] 480 | #{(str/lower-case current-binding)} ; Have to match later binding. 481 | (when (+var? current-var) 482 | (when-let [instances (+var-kind-instances current-var)] 483 | (if *case-sensitive* 484 | instances 485 | (downcase-set instances)))))) 486 | 487 | (defn- var-attribute [var attribute] 488 | (when (annotated-var? var) 489 | (get (second var) attribute))) 490 | 491 | (defn- instance->tokens [instance] 492 | (filter #(= % \space) instance)) 493 | 494 | (defn count-tokens [instance] ; current-val 495 | (+ 1 (count (instance->tokens instance)))) 496 | 497 | (def ^:private ^:dynamic *template-vars* #{}) 498 | 499 | (defn template-vars [template] 500 | "Public for application integration." 501 | (let [vars (atom #{})] 502 | (walk-exprs tm-var? 503 | #(swap! vars conj (plain-var %)) 504 | template) 505 | @vars)) 506 | 507 | ;;; Called with seq of unique elements. Bind any unbound template 508 | ;;; vars to nil---to support (e.g.) different-when-bound (because 509 | ;;; Clojure barfs on code with free names). 510 | ;;; 511 | ;;; Arguably more efficiently done at parse time, but (for now, 512 | ;;; anyway) we'd like parsed templates cleaner, for tracing/debugging. 513 | (defn- guard-template-vars [bindings template-vars] 514 | (let [var (first template-vars) 515 | template-vars (rest template-vars) 516 | bindings (if (get bindings var) 517 | bindings 518 | (assoc bindings var nil))] 519 | (if (empty? template-vars) 520 | bindings 521 | (guard-template-vars bindings template-vars)))) 522 | 523 | ;;; This should handle any expression using bound tm-vars and Clojure vars. 524 | (defn- pass-restriction? [bindings-hashmap restriction] 525 | (let [bindings-hashmap (guard-template-vars bindings-hashmap (seq *template-vars*)) 526 | bindings-list (vec (flatten (vec bindings-hashmap)))] 527 | ;; Evaluate the restriction. 528 | (eval `(let ~bindings-list 529 | ~restriction)))) 530 | 531 | (defn- do-action [bindings-hashmap action] 532 | (let [bindings-hashmap (guard-template-vars bindings-hashmap (seq *template-vars*)) 533 | bindings-list (vec (flatten (vec bindings-hashmap)))] 534 | ;; Evaluate the action. 535 | (eval `(let ~bindings-list 536 | ~action)))) 537 | 538 | (defn- qualify-attributes-always? [current-var current-val bindings] 539 | (and (let [max-tokens (var-attribute current-var :max-tokens)] 540 | (or (nil? max-tokens) 541 | (<= (count-tokens current-val) max-tokens))) 542 | (let [restriction (var-attribute current-var :always?) 543 | bindings (assoc bindings (plain-var current-var) current-val)] 544 | (or (nil? restriction) 545 | (pass-restriction? bindings restriction))) 546 | )) 547 | 548 | (defn- qualify-attributes-each? [current-var current-token bindings] 549 | (let [restriction (var-attribute current-var :each?) 550 | ;; Not really legit bindings (since not current-val), but it's 551 | ;; what we'll check. 552 | bindings (assoc bindings (plain-var current-var) 553 | current-token)] 554 | (or (nil? restriction) 555 | (pass-restriction? bindings restriction)))) 556 | 557 | (defn- qualify-always? [current-var current-val current-token bindings 558 | current-+set] 559 | (and (qualify-attributes-always? current-var current-val bindings) 560 | (qualify-attributes-each? current-var current-token bindings) 561 | (or (and (*var? current-var) 562 | ;; *var ok, unless already on bindings. 563 | (not (get bindings (plain-var current-var)))) 564 | ;; +var / any already-bound var needs surviving instances. 565 | (not (empty? current-+set))))) 566 | 567 | (defn- qualify-attributes-finally? [current-var current-val bindings] 568 | (and (qualify-attributes-always? current-var current-val bindings) 569 | (let [min-tokens (var-attribute current-var :min-tokens)] 570 | (or (nil? min-tokens) 571 | (>= (count-tokens current-val) min-tokens))) 572 | (let [restriction (var-attribute current-var :finally?) 573 | bindings (assoc bindings (plain-var current-var) current-val)] 574 | (or (nil? restriction) 575 | (pass-restriction? bindings restriction))) 576 | )) 577 | 578 | (defn- qualify-finally? [current-var current-val bindings current-+set] 579 | (and (qualify-attributes-finally? current-var current-val bindings) 580 | ;; Qualify +var instance. 581 | (or (and (*var? current-var) 582 | ;; *var ok, unless already on bindings. 583 | (not (get bindings current-var))) 584 | ;; +var / any already-bound var: fail when 585 | ;; current-val is not in current-+set. 586 | (contains? current-+set (str/lower-case current-val))))) 587 | 588 | (defn- assert-kind-var-val [current-var current-val] 589 | (let [assertion (and (annotated-*var? current-var) 590 | (let [kind (*var-kind current-var)] 591 | (when kind 592 | (add-kind-instance kind current-val))))] 593 | assertion)) 594 | 595 | (defn- updated-current-+set [current-+set current-var current-val bindings] 596 | (when (or (get bindings (plain-var current-var)) 597 | (+var? current-var)) 598 | ;; So, (set? current-+set). 599 | (pruned-by-prefix current-+set current-val))) 600 | 601 | ;;; We use this macro to modularize 'match-...' functions 602 | ;;; below, to provide a single point of maintenance and avoid typing 603 | ;;; this same preamble stuff more than once. 604 | (defmacro ^:private def-match-fn [fn-name & body] 605 | ;; Can't abide syntax quote's symbol namespacing here. So, consing. 606 | (cons 'defn- 607 | (cons fn-name 608 | (concat '([template-constructs 609 | input-tokens 610 | & {:keys [bindings 611 | assertions ; Made during this call to `match`. 612 | current-var 613 | current-val 614 | ;; For +vars, only. A bit cheesy, but okay. 615 | current-+prefix 616 | current-+set 617 | ;; For future sequence context vars. See README.md. 618 | ;; context-var 619 | ;; context-val 620 | ] 621 | ;; The keyword arguments' default values. 622 | :or {bindings {} ; Excepting current-val. 623 | assertions #{} ; What we'll say we've added. 624 | current-var nil 625 | current-val "" 626 | ;; The growing instance (maybe-multi-token string). 627 | current-+prefix "" 628 | current-+set nil 629 | ;; For future. 630 | ;; context-var 631 | ;; context-val 632 | }}]) 633 | body)))) 634 | 635 | (declare match-constructs) 636 | (declare match-var) 637 | 638 | (def-match-fn match-var-always 639 | (when-not (empty? input-tokens) 640 | (let [current-token (first input-tokens)] 641 | (when (qualify-always? current-var current-val current-token bindings current-+set) 642 | (let [current-val (updated-var-val current-val current-token) 643 | current-+set (updated-current-+set current-+set current-var current-val bindings)] 644 | (match-var template-constructs (rest input-tokens) 645 | :bindings bindings 646 | :assertions assertions 647 | :current-var current-var 648 | :current-val current-val 649 | :current-+set current-+set)))))) 650 | 651 | (def-match-fn match-var-finally 652 | (when (qualify-finally? current-var current-val bindings current-+set) 653 | ;; Say we're done (recording current var/val on 654 | ;; bindings) and try the next var. 655 | ;; 656 | ;; Arrange for our updates to *kind-assertions* to be unwound upon 657 | ;; backtracking. 658 | (binding [*kind-assertions* (atom @*kind-assertions*)] 659 | (let [bindings (assoc bindings (plain-var current-var) current-val) 660 | assertion (assert-kind-var-val current-var current-val) 661 | assertions (if assertion 662 | (conj assertions assertion) 663 | assertions) 664 | action (var-attribute current-var :finally.)] 665 | ;; Perform any specified action. 666 | (or (nil? action) 667 | (do-action bindings action)) 668 | ;; Keep matching. 669 | (match-constructs template-constructs input-tokens 670 | :bindings bindings 671 | :assertions assertions))))) 672 | 673 | ;;; We have an already-in-process var. 674 | (def-match-fn match-var 675 | (or 676 | ;; Prefer a longer binding for the current var---update the 677 | ;; current value and chug along. 678 | (match-var-always template-constructs input-tokens 679 | :bindings bindings 680 | :assertions assertions 681 | :current-var current-var 682 | :current-val current-val 683 | :current-+set current-+set) 684 | ;; Reverse the surrounding disjuncts (at both levels) to bind a 685 | ;; var to the shortest possible remaining token sequence. 686 | (match-var-finally template-constructs input-tokens 687 | :bindings bindings 688 | :assertions assertions 689 | :current-var current-var 690 | :current-val current-val 691 | :current-+set current-+set))) 692 | 693 | (def-match-fn match-+case 694 | (let [within-+case-scope (rest (first template-constructs)) 695 | beyond-+case-scope (rest template-constructs)] 696 | (if *case-sensitive* 697 | ;; :+case is a no-op. 698 | (match-constructs `(~@within-+case-scope ~@beyond-+case-scope) 699 | input-tokens 700 | :bindings bindings 701 | :assertions assertions) 702 | (binding [*case-sensitive* true] 703 | (let [-case-form (when-not (empty? beyond-+case-scope) 704 | ;; Elide if empty, else wrap. 705 | `((:-case ~@beyond-+case-scope)))] 706 | (match-constructs `(~@within-+case-scope ~@-case-form) 707 | input-tokens 708 | :bindings bindings 709 | :assertions assertions)))))) 710 | 711 | (def-match-fn match--case 712 | (let [within--case-scope (rest (first template-constructs)) 713 | beyond--case-scope (rest template-constructs)] 714 | (if-not *case-sensitive* 715 | ;; :-case is a no-op. 716 | (match-constructs `(~@within--case-scope ~@beyond--case-scope) 717 | input-tokens 718 | :bindings bindings 719 | :assertions assertions) 720 | (binding [*case-sensitive* false] 721 | (let [+case-form (when-not (empty? beyond--case-scope) 722 | ;; Elide if empty, else wrap. 723 | `((:+case ~@beyond--case-scope)))] 724 | (match-constructs `(~@within--case-scope ~@+case-form) 725 | input-tokens 726 | :bindings bindings 727 | :assertions assertions)))))) 728 | 729 | (def-match-fn match-optional 730 | (let [within-optional-scope (rest (first template-constructs)) 731 | beyond-optional-scope (rest template-constructs)] 732 | (or 733 | ;; First try for (prefer) non-empty content. 734 | (match-constructs `(~@within-optional-scope ~@beyond-optional-scope) 735 | input-tokens 736 | :bindings bindings 737 | :assertions assertions) 738 | ;; Next try empty content. 739 | (match-constructs beyond-optional-scope input-tokens 740 | :bindings bindings 741 | :assertions assertions)))) 742 | 743 | (def-match-fn match-series 744 | (let [within-series-scope (rest (first template-constructs)) 745 | beyond-series-scope (rest template-constructs)] 746 | (match-constructs `(~@within-series-scope ~@beyond-series-scope) 747 | input-tokens 748 | :bindings bindings 749 | :assertions assertions))) 750 | 751 | (def-match-fn match-choice 752 | (let [within-choice-scope (rest (first template-constructs)) 753 | beyond-choice-scope (rest template-constructs)] 754 | (when-not (empty? within-choice-scope) ; Fail if we've exhausted choices. 755 | (or (match-constructs `(~(first within-choice-scope) ~@beyond-choice-scope) 756 | input-tokens 757 | :bindings bindings 758 | :assertions assertions) 759 | ;; Skip the call to match-constructs, since we know what's coming. 760 | (match-choice `((:choice ~@(rest within-choice-scope)) 761 | ~@beyond-choice-scope) 762 | input-tokens 763 | :bindings bindings 764 | :assertions assertions))))) 765 | 766 | (def-match-fn match-control 767 | (let [controlled (first template-constructs) 768 | control (first controlled)] 769 | (apply (case control 770 | :+case match-+case 771 | :-case match--case 772 | :optional match-optional 773 | :series match-series 774 | :choice match-choice) 775 | [template-constructs input-tokens 776 | :bindings bindings 777 | :assertions assertions]))) 778 | 779 | (def ^:private ^:dynamic *matches* (atom #{})) 780 | (def ^:private ^:dynamic *matcher-assertions* (atom #{})) 781 | (def ^:private ^:dynamic *matches-countdown* nil) 782 | (def ^:private ^:dynamic *assert-unique-match* false) 783 | 784 | ;;; Match input-string against template, creating (generally, 785 | ;;; multi-token) "instance" bindings for template wildcard symbols. 786 | ;;; Returns nil if the does template not match, else a binding 787 | ;;; hashmap, perhaps empty. 788 | (def-match-fn match-constructs 789 | (let [template-item (first template-constructs)] 790 | (cond 791 | (seq? template-item) 792 | (match-control template-constructs input-tokens 793 | :bindings bindings 794 | :assertions assertions) 795 | 796 | (and (empty? template-constructs) 797 | (empty? input-tokens)) 798 | ;; Succeed (record match and backtrack). 799 | (let [answer [bindings assertions]] 800 | (if (contains? @*matches* answer) 801 | nil ; Fail 802 | (do (swap! *matches* conj [bindings assertions]) 803 | (when *matches-countdown* 804 | (swap! *matches-countdown* dec)) 805 | (if (and *matches-countdown* 806 | (= @*matches-countdown* 0)) 807 | (do (when *assert-unique-match* ; *matches-countdown* started at 2. 808 | (assert false (cl-format nil "Multiple matches found: ~s" @*matches*))) 809 | ;; Return something truthy, unwinding all matching calls. 810 | [bindings assertions]) 811 | ;; Else fail, for backtracking. 812 | nil)))) 813 | 814 | ;; Else at least one of template, input still has tokens. 815 | ;; Optional vars handled at bottom. 816 | (or (empty? template-constructs) 817 | (empty? input-tokens)) 818 | ;; Fail if they don't both still have tokens. 819 | nil 820 | 821 | ;; Else both have tokens. Make sure they match. 822 | (and (not (tm-var? template-item)) 823 | (instance= (str template-item) ; Accommodate an easy symbol. 824 | (first input-tokens))) 825 | ;; The front tokens match outright, so check the rest. 826 | (match-constructs (rest template-constructs) (rest input-tokens) 827 | :bindings bindings 828 | :assertions assertions) 829 | 830 | :else 831 | ;; They don't match outright. Can we find a matching variable binding? 832 | (when (tm-var? template-item) 833 | (let [current-var template-item 834 | current-val (first input-tokens) 835 | current-+set (initialize-current-+set current-var bindings)] 836 | (match-var (rest template-constructs) (rest input-tokens) 837 | :bindings bindings 838 | :assertions assertions 839 | :current-var current-var 840 | :current-val current-val 841 | :current-+set current-+set)))))) 842 | 843 | ;;; The application-level interface to match-constructs: 844 | 845 | ;;; Single-match interface. 846 | (defn match-details [template input-string] 847 | (let [parsed-template (parse-template template)] 848 | (binding [*matches* (atom #{}) 849 | *matches-countdown* (atom 1) 850 | *template-vars* (template-vars parsed-template)] 851 | ;; Returns the last match found. 852 | (match-constructs parsed-template 853 | (parse-input-string input-string)) 854 | ;; Not needed: (nth @*matches* 0) 855 | ))) 856 | 857 | ;;; Used in most tests. 858 | (defn match [template input-string] 859 | (let [[bindings _assertions] (match-details template input-string)] 860 | bindings)) 861 | 862 | (defn match-uniquely [template input-string] 863 | (let [parsed-template (parse-template template)] 864 | (binding [*matches* (atom #{}) 865 | *matches-countdown* (atom 2) 866 | *assert-unique-match* true 867 | *template-vars* (template-vars parsed-template)] 868 | (match-constructs parsed-template 869 | (parse-input-string input-string)) 870 | (first @*matches*)))) 871 | 872 | ;;; Multi-match interface. 873 | (defn matches 874 | ([template input-string] 875 | (matches template input-string nil)) 876 | ;; 'limit' should be a positive integer. 877 | ([template input-string limit] 878 | (let [parsed-template (parse-template template)] 879 | (binding [*matches* (atom #{}) 880 | *matches-countdown* (when limit (atom limit)) 881 | *template-vars* (template-vars parsed-template)] 882 | (match-constructs parsed-template 883 | (parse-input-string input-string)) 884 | ;; A using application should decide what to do with these, when 885 | ;; *matches* has more than one entry. 886 | @*matches*)))) 887 | 888 | ;;; Versions for pre-parsed templates (so that they may be applied 889 | ;;; many times, without re-parsing): 890 | 891 | (defn match-pre-parsed [parsed-template input-string] 892 | (binding [*matches* (atom #{}) 893 | *matches-countdown* (atom 1) 894 | *template-vars* (template-vars parsed-template)] 895 | ;; Returns the last match found. 896 | (match-constructs parsed-template 897 | (parse-input-string input-string)) 898 | ;; Not needed: (nth @*matches* 0) 899 | )) 900 | 901 | (defn matches-pre-parsed 902 | ([parsed-template input-string] 903 | (matches-pre-parsed parsed-template input-string nil)) 904 | ;; 'limit' should be a positive integer. 905 | ([parsed-template input-string limit] 906 | (binding [*matches* (atom #{}) 907 | *matches-countdown* (when limit (atom limit)) 908 | *template-vars* (template-vars parsed-template)] 909 | (match-constructs parsed-template 910 | (parse-input-string input-string)) 911 | @*matches*))) 912 | 913 | ;;;;; Template matcher (core) ^^ 914 | ;;;;; ---------------------------------------------------------------- 915 | ;;;;; Template-matching defining forms: 916 | 917 | ;;; A matcher-returned binding is a string of tokens, e.g., "foo 918 | ;;; bar" (or "foo"). When our application used standard 919 | ;;; Prolog---whose predicates must be symbols---we needed the 920 | ;;; following two functions. 921 | (defn instance->symbol [s] 922 | ;; Return a corresponding symbol, e.g., `foo_bar` (or `foo`). 923 | (when s ; Handle elided optional/chioce-scope vars. 924 | (clojure.edn/read-string (str/replace s " " "_")))) 925 | 926 | ;;; Invert the above. 927 | (defn symbol->instance [symbol] 928 | ;; This assumes \_ never occurs in an (e.g., predicate) token. 929 | (str/replace (name symbol) "_" " ")) 930 | 931 | ;;; For template matcher users/applications, we provide two macros to 932 | ;;; define functions automatially binding the variables of a fixed 933 | ;;; template and accepting a string input parameter, such that these 934 | ;;; variables' values---when matched against the template for a given 935 | ;;; input---can be referenced in the function body. One macro 936 | ;;; provides these values as plain strings, the other as symbols---as 937 | ;;; illustrated in the long comment below. 938 | (comment 939 | 940 | ;; Define your function. 941 | (defn-templating-symbols list-outer-symbols ["*front stuff *back" 942 | [phrase]] 943 | (list *front *back)) 944 | 945 | ;; Ask to see an equivalent plan--defn form. 946 | (macroexpand-1 '(defn-templating-symbols list-outer-symbols ["*front stuff *back" 947 | [phrase]] 948 | (list *front *back))) 949 | 950 | ;; Aren't we getting away with a lot less typing (and code 951 | ;; duplication presenting maintenance issues)? 952 | (defn list-outer-symbols [phrase] 953 | (let [bindings-hashmap (match "*front stuff *back" phrase)] 954 | (when bindings-hashmap 955 | (let [*front (instance->symbol (get bindings-hashmap '*front)) 956 | *back (instance->symbol (get bindings-hashmap '*back))] 957 | (list *front *back))))) 958 | 959 | ;; Call it on some input. 960 | (list-outer-symbols "We've built some stuff here...") 961 | 962 | ;; The result---a list of symbols. 963 | 964 | (We've_built_some here) 965 | 966 | ;; Or, with the -strings versions: 967 | 968 | ;; Defining form: 969 | (defn-templating-strings list-outer-strings ["*front stuff *back" 970 | [phrase]] 971 | (list *front *back)) 972 | 973 | ;; Invocation: 974 | (list-outer-strings "We've built some stuff here...") 975 | 976 | ;; Result: 977 | ("We've built some" "here") 978 | 979 | ) 980 | ;;; Since integrating clolog in v1.1---whose predicates can be 981 | ;;; strings---we use only the `defn-templating-strings` (and not 982 | ;;; `defn-templating-symbols`) in our application. 983 | 984 | ;;; The macros (defined below) have some helper functions. We've 985 | ;;; fully qualified some symbols to make these forms exportable. (It 986 | ;;; turns out that our clolog integration has required copying this 987 | ;;; section of code into our application and modifying 988 | ;;; `defn-templating-core` there so that the call to `match` can 989 | ;;; access the application's knowledge base. Our copy wraps that call 990 | ;;; with a macro, `with-model`.) 991 | 992 | (defn get-let-binding-form-strings [vars] 993 | "Public for application integration." 994 | (into [] cat (map (fn [var] 995 | [var (list 'get 'bindings-hashmap 996 | (list 'quote var))]) 997 | vars))) 998 | 999 | (defn defn-templating-core [fn-sym input-sym template let-binding-form body] 1000 | ;; Not using syntax quote (`) here, because of symbol namespacing 1001 | ;; issues. 1002 | (cons 'defn 1003 | (cons fn-sym 1004 | (cons [input-sym] 1005 | (list 1006 | (list 'let 1007 | ['bindings-hashmap (list 'token-matcher.core/match 1008 | (list 'quote template) 1009 | input-sym)] 1010 | (cons 'when 1011 | (list 'bindings-hashmap 1012 | (cons 'let 1013 | (cons let-binding-form body)))))))))) 1014 | 1015 | 1016 | (defmacro defn-templating-strings [fn-sym [template [input-sym]] & body] 1017 | (let [let-binding-form (get-let-binding-form-strings (seq (template-vars (parse-template template))))] 1018 | (defn-templating-core fn-sym input-sym template let-binding-form body))) 1019 | 1020 | ;;; Another helper. 1021 | (defn- get-let-binding-form-symbols [vars] 1022 | (into [] cat 1023 | (map (fn [var] 1024 | [var (list 'token-matcher.core/instance->symbol ; Fully qualify. 1025 | (list 'get 'bindings-hashmap 1026 | (list 'quote var)))]) 1027 | vars))) 1028 | 1029 | (defmacro defn-templating-symbols [fn-sym [template [input-sym]] & body] 1030 | (let [let-binding-form (get-let-binding-form-symbols (seq (template-vars (parse-template template))))] 1031 | (defn-templating-core fn-sym input-sym template let-binding-form body))) 1032 | 1033 | ;;;;; Template-matching defining forms ^^ 1034 | ;;;;; ---------------------------------------------------------------- 1035 | ;;;;; REPL-only forms: 1036 | 1037 | ;;; The next two forms may be useful at the REPL. They can't 1038 | ;;; productively be embedded in a function or a macro. 1039 | 1040 | ;;; Unless matching fails, execute body under raw template 1041 | ;;; bindings (strings). 1042 | (defmacro with-matching-template-strings [[template input-form] 1043 | & body] 1044 | (let [bindings-hashmap (match template input-form)] 1045 | (when bindings-hashmap ; (not= bindings-hashmap nil) 1046 | (let [bindings-hashmap (guard-template-vars bindings-hashmap 1047 | (seq (template-vars (parse-template template)))) 1048 | bindings-list (into [] cat bindings-hashmap)] 1049 | `(let ~bindings-list 1050 | ~@body) 1051 | ;; Syntax quote won't penetrate bindings-list to qualify the 1052 | ;; symbols there. So we don't need this approach, here. 1053 | ;; (cons 'let 1054 | ;; (cons bindings-list 1055 | ;; body)) 1056 | )))) 1057 | 1058 | ;;; Helper function. 1059 | (defn- instance->quoted-symbol [string] 1060 | (list 'quote (instance->symbol string))) 1061 | 1062 | ;;; Unless matching fails, execute body with template-bound strings 1063 | ;;; converted to symbols. 1064 | (defmacro with-matching-template-symbols [[template input-form] 1065 | & body] 1066 | (let [bindings-hashmap (match template input-form)] 1067 | (when bindings-hashmap ; (not= bindings-hashmap nil) 1068 | (let [bindings-hashmap (guard-template-vars bindings-hashmap 1069 | (seq (template-vars (parse-template template)))) 1070 | symbols-hashmap (update-vals bindings-hashmap instance->quoted-symbol) 1071 | bindings-list (into [] cat symbols-hashmap)] 1072 | `(let ~bindings-list 1073 | ~@body))))) 1074 | 1075 | ;;;;; REPL-only forms ^^ 1076 | ;;;;; ---------------------------------------------------------------- 1077 | --------------------------------------------------------------------------------