├── doc ├── .gitignore ├── style.css ├── Makefile └── esrap.texinfo ├── web ├── Makefile └── style.css ├── Makefile ├── coverage.sh ├── .travis.yml ├── coverage.lisp ├── test ├── package.lisp ├── stress.lisp ├── macros.lisp ├── readme.lisp ├── util.lisp ├── examples.lisp └── expressions.lisp ├── src ├── variables.lisp ├── protocol.lisp ├── package.lisp ├── editor-support.lisp ├── cache │ ├── chunk.lisp │ └── packrat.lisp ├── types.lisp ├── conditions.lisp ├── context.lisp ├── rule.lisp ├── macros.lisp ├── expressions.lisp ├── interface.lisp └── results.lisp ├── examples ├── sexp.lisp ├── symbol-table.lisp ├── left-recursion.lisp └── function-terminals.lisp ├── esrap.asd ├── TODO.org └── README.org /doc/.gitignore: -------------------------------------------------------------------------------- 1 | *.html 2 | include 3 | -------------------------------------------------------------------------------- /web/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean 2 | 3 | all: index.html 4 | 5 | index.html: ../doc/esrap.html 6 | cp ../doc/esrap.html index.html 7 | 8 | clean: 9 | rm -f *~ *.lisp *.lisp.html \#* 10 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: doc web wc clean all test 2 | 3 | all: 4 | echo "Targets: clean, wc, doc, test, web" 5 | 6 | clean: 7 | rm -f *.fasl *~ 8 | make -C doc clean 9 | make -C web clean 10 | 11 | wc: 12 | wc -l *.lisp 13 | 14 | doc: 15 | make -C doc 16 | 17 | web: doc 18 | make -C web 19 | 20 | gh-pages: web 21 | rm -rf web-tmp 22 | mv web web-tmp 23 | git checkout gh-pages 24 | cp web-tmp/index.html . 25 | git commit -a -c master 26 | mv web-tmp web 27 | git checkout -f master 28 | -------------------------------------------------------------------------------- /coverage.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | 3 | # Helper script for coverage report generation. 4 | # 5 | # Copyright (C) 2014 Jan Moringen 6 | # 7 | # Author: Jan Moringen 8 | 9 | SBCL="${HOME}/opt/sbcl/bin/sbcl" 10 | QUICKLISP="${HOME}/.local/share/common-lisp/quicklisp" 11 | 12 | "${SBCL}" --noinform --disable-ldb --lose-on-corruption \ 13 | --no-userinit --disable-debugger \ 14 | --load "${QUICKLISP}/setup.lisp" \ 15 | --load "coverage.lisp" \ 16 | --quit 17 | -------------------------------------------------------------------------------- /doc/style.css: -------------------------------------------------------------------------------- 1 | .node { visibility:hidden; height: 0px; } 2 | .menu { visibility:hidden; height: 0px; } 3 | .chapter { background-color:#e47700; padding: 0.2em; } 4 | .section { background-color:#e47700; padding: 0.2em; } 5 | .settitle { background-color:#e47700; } 6 | .contents { border: 2px solid black; 7 | margin: 1cm 1cm 1cm 1cm; 8 | padding-left: 3mm; } 9 | .lisp { padding: 0; margin: 0em; } 10 | body { padding: 2em 8em; font-family: sans-serif; } 11 | h1 { padding: 1em; text-align: center; } 12 | li { margin: 1em; } 13 | 14 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: lisp 2 | 3 | env: PREFIX="$(pwd)/sbcl" 4 | SBCL_HOME="$(pwd)/sbcl/lib/sbcl" 5 | SBCL="$(pwd)/sbcl/bin/sbcl" 6 | SBCL_OPTIONS="--noinform --no-userinit" 7 | 8 | install: 9 | - curl -L "${SBCL_DOWNLOAD_URL}" | tar -xj 10 | - ( cd sbcl-* && INSTALL_ROOT="${PREFIX}" sh install.sh ) 11 | 12 | - curl -o cl "${CL_LAUNCH_DOWNLOAD_URL}" 13 | - chmod +x cl 14 | 15 | - curl -o quicklisp.lisp "${QUICKLISP_DOWNLOAD_URL}" 16 | - ./cl -L quicklisp.lisp '(quicklisp-quickstart:install)' 17 | 18 | script: 19 | - ./cl 20 | -S '(:source-registry (:directory "'$(pwd)'") :ignore-inherited-configuration)' 21 | -Q 22 | -s esrap/tests 23 | '(or (esrap-tests:run-tests) (uiop:quit -1))' 24 | -------------------------------------------------------------------------------- /doc/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: clean html include doc 2 | 3 | doc: html 4 | 5 | clean: 6 | rm -rf include 7 | rm -f *.pdf *.html *.info 8 | rm -f *.aux *.cp *.fn *.fns *.ky *.log *.pg *.toc *.tp *.tps *.vr 9 | 10 | include: 11 | sbcl \ 12 | --noinform --disable-ldb --lose-on-corruption \ 13 | --no-userinit --disable-debugger \ 14 | --eval '(require :asdf)' \ 15 | --eval '(let ((asdf:*central-registry* (cons #p"../" asdf:*central-registry*))) (require :esrap))' \ 16 | --load docstrings.lisp \ 17 | --eval '(sb-texinfo:generate-includes "include/" (list :esrap) :base-package :esrap)' \ 18 | --quit 19 | 20 | esrap.html: esrap.texinfo style.css docstrings.lisp ../*.lisp ../*.asd 21 | make include 22 | makeinfo --html --no-split --css-include=style.css esrap.texinfo 23 | 24 | html: esrap.html 25 | -------------------------------------------------------------------------------- /web/style.css: -------------------------------------------------------------------------------- 1 | body { 2 | color: #000000; 3 | background-color: #ffffff; 4 | } 5 | .builtin { 6 | /* font-lock-builtin-face */ 7 | color: #7a378b; 8 | } 9 | .comment { 10 | /* font-lock-comment-face */ 11 | color: #b22222; 12 | } 13 | .comment-delimiter { 14 | /* font-lock-comment-delimiter-face */ 15 | color: #b22222; 16 | } 17 | .constant { 18 | /* font-lock-constant-face */ 19 | color: #008b8b; 20 | } 21 | .function-name { 22 | /* font-lock-function-name-face */ 23 | color: #0000ff; 24 | } 25 | .keyword { 26 | /* font-lock-keyword-face */ 27 | color: #7f007f; 28 | } 29 | .slime-reader-conditional { 30 | /* slime-reader-conditional-face */ 31 | color: #b22222; 32 | } 33 | .string { 34 | /* font-lock-string-face */ 35 | color: #996633; 36 | } 37 | .type { 38 | /* font-lock-type-face */ 39 | color: #228b22; 40 | } 41 | .warning { 42 | /* font-lock-warning-face */ 43 | color: #ff0000; 44 | font-weight: bold; 45 | } 46 | 47 | a { 48 | color: inherit; 49 | background-color: inherit; 50 | font: inherit; 51 | text-decoration: inherit; 52 | } 53 | a:hover { 54 | text-decoration: underline; 55 | } 56 | -------------------------------------------------------------------------------- /coverage.lisp: -------------------------------------------------------------------------------- 1 | ;;;; coverage.lisp --- Helper script for coverage report generation. 2 | ;;;; 3 | ;;;; Copyright (C) 2014 Jan Moringen 4 | ;;;; 5 | ;;;; Author: Jan Moringen 6 | 7 | (require :sb-cover) 8 | 9 | (defun compute-coverage-for-system (system 10 | &key 11 | (output-directory 12 | (merge-pathnames 13 | (concatenate 'string (string system) "/") 14 | "coverage-report/"))) 15 | (flet ((set-store-coverage (storep) 16 | (eval `(declaim (optimize (sb-cover:store-coverage-data ,(if storep 3 0)))))) 17 | (load-system-silently (system &rest args) 18 | (let* ((*standard-output* (make-broadcast-stream)) 19 | (*trace-output* *standard-output*)) 20 | (handler-bind ((style-warning #'muffle-warning)) 21 | (apply #'asdf:load-system system args))))) 22 | (load-system-silently system) ; load dependencies 23 | (unwind-protect 24 | (progn 25 | (set-store-coverage t) 26 | (load-system-silently system :force t) 27 | (set-store-coverage nil) 28 | (let ((*compile-print* nil) 29 | (*compile-progress* nil) 30 | (*compile-verbose* nil)) 31 | (asdf:test-system system)) 32 | (sb-cover:report output-directory)) 33 | (set-store-coverage nil) 34 | (load-system-silently system :force t) 35 | (sb-cover:clear-coverage)))) 36 | 37 | (mapcar #'compute-coverage-for-system '(:esrap)) 38 | -------------------------------------------------------------------------------- /test/package.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2019 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:defpackage #:esrap-tests 21 | (:use 22 | #:alexandria 23 | #:cl 24 | #:esrap 25 | #:fiveam) 26 | 27 | (:shadowing-import-from #:esrap #:!) 28 | 29 | (:import-from #:esrap 30 | #:%expression-direct-dependencies 31 | #:%expression-dependencies 32 | #:expression-simple-p 33 | 34 | #:expand-transforms) 35 | 36 | (:export #:run-tests)) 37 | 38 | (cl:in-package #:esrap-tests) 39 | 40 | (def-suite esrap) 41 | 42 | (defun run-tests () 43 | (run! 'esrap)) 44 | -------------------------------------------------------------------------------- /src/variables.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2016 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap) 21 | 22 | (declaim (type left-recursion-policy *on-left-recursion*)) 23 | 24 | (defvar *on-left-recursion* nil 25 | "This special variable controls Esrap's behavior with respect to 26 | allowing left recursion. 27 | 28 | When :ERROR, PARSE signals a LEFT-RECURSION error when it encounters a 29 | left recursive rule. Otherwise the rule is processed. 30 | 31 | Note: when processing left recursive rules, linear-time guarantees 32 | generally no longer hold.") 33 | 34 | (defparameter *eval-nonterminals* nil) 35 | -------------------------------------------------------------------------------- /src/protocol.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2016 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap) 21 | 22 | ;;; Error protocolx 23 | 24 | (defgeneric esrap-error-position (condition) 25 | (:documentation 26 | "Return the input position at which the parse failure represented 27 | by CONDITION occurred.")) 28 | 29 | (defgeneric esrap-parse-error-result (condition) 30 | (:documentation 31 | "Return the result associated to the parse error represented by 32 | CONDITION.")) 33 | 34 | (defgeneric esrap-parse-error-context (condition) 35 | (:documentation 36 | "Return the context result associated to the parse error 37 | represented by CONDITION.")) 38 | -------------------------------------------------------------------------------- /test/stress.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2019 Jan Moringen 2 | ;;;; 3 | ;;;; Permission is hereby granted, free of charge, to any person 4 | ;;;; obtaining a copy of this software and associated documentation files 5 | ;;;; (the "Software"), to deal in the Software without restriction, 6 | ;;;; including without limitation the rights to use, copy, modify, merge, 7 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 8 | ;;;; and to permit persons to whom the Software is furnished to do so, 9 | ;;;; subject to the following conditions: 10 | ;;;; 11 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 12 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 13 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 14 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 15 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 16 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | 19 | (cl:in-package #:esrap-tests) 20 | 21 | (in-suite esrap) 22 | 23 | (test long-input 24 | "Inputs of various lengths that stress the chunk cache." 25 | (loop :for length :from 0 :to 5000 26 | :do (let* ((input (make-string length :initial-element #\a)) 27 | (result (esrap:parse '(* #\a) input))) 28 | (is (eql length (length result)))))) 29 | 30 | (macrolet ((define-rules () 31 | (let ((names '())) 32 | `(progn 33 | ,@(loop :for i :from 0 :to (1+ esrap::+packrat-hash-table-switch-point+) 34 | :for string = (format nil "~R" i) 35 | :for name = (intern string) 36 | :do (push name names) 37 | :collect `(defrule ,name ,string)) 38 | (defrule number 39 | (or ,@names)))))) 40 | (define-rules)) 41 | 42 | (test many-rules 43 | "Many alternative rules that stress the packrat cache." 44 | (is (equal "zero" (parse 'number "zero")))) 45 | -------------------------------------------------------------------------------- /test/macros.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2019 Jan Moringen 2 | ;;;; 3 | ;;;; Permission is hereby granted, free of charge, to any person 4 | ;;;; obtaining a copy of this software and associated documentation files 5 | ;;;; (the "Software"), to deal in the Software without restriction, 6 | ;;;; including without limitation the rights to use, copy, modify, merge, 7 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 8 | ;;;; and to permit persons to whom the Software is furnished to do so, 9 | ;;;; subject to the following conditions: 10 | ;;;; 11 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 12 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 13 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 14 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 15 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 16 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | 19 | (cl:in-package #:esrap-tests) 20 | 21 | (in-suite esrap) 22 | 23 | (test expand-transforms.smoke 24 | "Smoke test for the EXPAND-TRANSFORMS function." 25 | 26 | (flet ((test-case (transforms expected-identityp expected-constantp expected-textp) 27 | (multiple-value-bind (code identityp constantp textp) 28 | (expand-transforms transforms) 29 | (declare (ignore code)) 30 | (is (eq expected-identityp identityp)) 31 | (is (eq expected-constantp constantp)) 32 | (is (eq expected-textp textp))))) 33 | (test-case '() t nil nil) 34 | (test-case '((:identity t)) t nil nil) 35 | ;; (test-case '((:text t) (:constant 5)) nil t nil) TODO should signal an error 36 | (test-case '((:constant 5) (:text t)) nil t t) 37 | (test-case '((:function string-upcase) (:text t) (:constant 5)) nil nil nil) 38 | (test-case '((:text t)) nil nil t))) 39 | -------------------------------------------------------------------------------- /test/readme.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2016 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap-tests) 21 | 22 | (in-suite esrap) 23 | 24 | (test-both-modes examples-from-readme.foo 25 | "README examples related to \"foo+\" rule." 26 | (is (equal '("foo" nil t) 27 | (multiple-value-list (parse '(or "foo" "bar") "foo")))) 28 | (is (eq 'foo+ (add-rule 'foo+ 29 | (make-instance 'rule :expression '(+ "foo"))))) 30 | (is (equal '(("foo" "foo" "foo") nil t) 31 | (multiple-value-list (parse 'foo+ "foofoofoo"))))) 32 | 33 | (test-both-modes examples-from-readme.decimal 34 | "README examples related to \"decimal\" rule." 35 | (is (eq 'decimal 36 | (add-rule 37 | 'decimal 38 | (make-instance 39 | 'rule 40 | :expression `(+ (or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) 41 | :transform (lambda (list start end) 42 | (declare (ignore start end)) 43 | (parse-integer (format nil "~{~A~}" list))))))) 44 | (is (eql 123 (parse '(oddp decimal) "123"))) 45 | (is (equal '(nil 0) (multiple-value-list 46 | (parse '(evenp decimal) "123" :junk-allowed t))))) 47 | -------------------------------------------------------------------------------- /src/package.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2019 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:defpackage #:esrap 21 | (:use 22 | #:cl 23 | #:alexandria) 24 | 25 | #+sbcl (:lock t) 26 | 27 | ;; Conditions 28 | (:export 29 | #:invalid-expression-error 30 | #:invalid-expression-error-expression 31 | 32 | #:undefined-rule-symbol 33 | 34 | #:undefined-rule-error 35 | 36 | #:esrap-error 37 | #:esrap-error-position 38 | #:esrap-error-text 39 | 40 | #:esrap-parse-error 41 | #:esrap-parse-error-result 42 | #:esrap-parse-error-context 43 | 44 | #:left-recursion 45 | #:left-recursion-nonterminal 46 | #:left-recursion-path) 47 | 48 | ;; Parsing 49 | (:export 50 | #:*on-left-recursion* 51 | 52 | #:parse) 53 | 54 | ;; Expressions 55 | (:export 56 | #:! #:? #:+ #:* #:& #:~ 57 | #:character-ranges) 58 | 59 | ;; Introspection 60 | (:export 61 | 62 | #:expression-start-terminals 63 | 64 | #:rule 65 | #:rule-dependencies 66 | #:rule-expression 67 | #:rule-symbol 68 | 69 | #:find-rule 70 | #:add-rule #:remove-rule 71 | #:change-rule 72 | 73 | #:trace-rule #:untrace-rule 74 | #:untrace-all-rules 75 | 76 | #:describe-grammar 77 | #:describe-terminal) 78 | 79 | ;; Macros 80 | (:export 81 | #:defrule 82 | 83 | #:&bounds 84 | 85 | #:call-transform 86 | #:text)) 87 | -------------------------------------------------------------------------------- /src/editor-support.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2016 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap) 21 | 22 | (defvar *indentation-hint-table* nil) 23 | 24 | (defun hint-slime-indentation () 25 | ;; See https://github.com/nikodemus/esrap/issues/24. 26 | (unless (member "SWANK-INDENTATION" *modules* :test #'string=) 27 | (return-from hint-slime-indentation)) 28 | (when-let* ((swank (find-package :swank)) 29 | (tables (find-symbol (string '#:*application-hints-tables*) swank)) 30 | (table (make-hash-table :test #'eq))) 31 | (setf (gethash 'defrule table) 32 | '(4 4 &rest (&whole 2 &lambda &body))) 33 | (set tables (cons table (remove *indentation-hint-table* (symbol-value tables)))) 34 | (setf *indentation-hint-table* table) 35 | t)) 36 | 37 | (hint-slime-indentation) 38 | 39 | (defun hint-sly-indentation () 40 | (unless (member "SLYNK/INDENTATION" *modules* :test #'string=) 41 | (return-from hint-sly-indentation)) 42 | (when-let* ((slynk (find-package :slynk)) 43 | (tables (find-symbol (string '#:*application-hints-tables*) slynk)) 44 | (table (make-hash-table :test #'eq))) 45 | (setf (gethash 'defrule table) 46 | '(4 4 &rest (&whole 2 &lambda &body))) 47 | (set tables (cons table (remove *indentation-hint-table* (symbol-value tables)))) 48 | (setf *indentation-hint-table* table) 49 | t)) 50 | 51 | (hint-sly-indentation) 52 | -------------------------------------------------------------------------------- /examples/sexp.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Esrap example: a simple S-expression grammar 2 | 3 | (cl:require :esrap) 4 | 5 | (cl:defpackage #:sexp-grammar 6 | (:use #:cl #:esrap)) 7 | 8 | (cl:in-package #:sexp-grammar) 9 | 10 | ;;; A semantic predicate for filtering out double quotes. 11 | 12 | (defun not-doublequote (char) 13 | (not (eql #\" char))) 14 | 15 | (defun not-integer (string) 16 | (when (find-if-not #'digit-char-p string) 17 | t)) 18 | 19 | ;;; Utility rules. 20 | 21 | (defrule whitespace (+ (or #\space #\tab #\newline)) 22 | (:constant nil)) 23 | 24 | (defrule alphanumeric (alphanumericp character)) 25 | 26 | (defrule string-char (or (not-doublequote character) (and #\\ #\"))) 27 | 28 | ;;; Here we go: an S-expression is either a list or an atom, with possibly leading whitespace. 29 | 30 | (defrule sexp (and (? whitespace) (or magic list atom)) 31 | (:function second) 32 | (:lambda (s &bounds start end) 33 | (list s (cons start end)))) 34 | 35 | (defrule magic "foobar" 36 | (:constant :magic) 37 | (:when (eq * :use-magic))) 38 | 39 | (defrule list (and #\( sexp (* sexp) (? whitespace) #\)) 40 | (:destructure (p1 car cdr w p2) 41 | (declare (ignore p1 p2 w)) 42 | (cons car cdr))) 43 | 44 | (defrule atom (or string integer symbol)) 45 | 46 | (defrule string (and #\" (* string-char) #\") 47 | (:destructure (q1 string q2) 48 | (declare (ignore q1 q2)) 49 | (text string))) 50 | 51 | (defrule integer (+ (or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) 52 | (:lambda (list) 53 | (parse-integer (text list) :radix 10))) 54 | 55 | (defrule symbol (not-integer (+ alphanumeric)) 56 | ;; NOT-INTEGER is not strictly needed because ATOM considers INTEGER before 57 | ;; a STRING, we know can accept all sequences of alphanumerics -- we already 58 | ;; know it isn't an integer. 59 | (:lambda (list) 60 | (intern (text list)))) 61 | 62 | ;;;; Try these 63 | 64 | (parse 'sexp "FOO123") 65 | 66 | (parse 'sexp "123") 67 | 68 | (parse 'sexp "\"foo\"") 69 | 70 | (parse 'sexp " ( 1 2 3 (FOO\"foo\"123 ) )") 71 | 72 | (parse 'sexp "foobar") 73 | 74 | (let ((* :use-magic)) 75 | (parse 'sexp "foobar")) 76 | 77 | (describe-grammar 'sexp) 78 | 79 | (trace-rule 'sexp :recursive t) 80 | 81 | (parse 'sexp "(foo bar 1 quux)") 82 | 83 | (untrace-rule 'sexp :recursive t) 84 | 85 | (defparameter *orig* (rule-expression (find-rule 'sexp))) 86 | 87 | (change-rule 'sexp '(and (? whitespace) (or list symbol))) 88 | 89 | (parse 'sexp "(foo bar quux)") 90 | 91 | (parse 'sexp "(foo bar 1 quux)" :junk-allowed t) 92 | 93 | (change-rule 'sexp *orig*) 94 | 95 | (parse 'sexp "(foo bar 1 quux)" :junk-allowed t) 96 | -------------------------------------------------------------------------------- /examples/symbol-table.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Esrap example: a simple grammar with scopes and symbol tables. 2 | 3 | (cl:require :esrap) 4 | 5 | (cl:defpackage #:symbol-table 6 | (:use #:cl #:esrap)) 7 | 8 | (cl:in-package #:symbol-table) 9 | 10 | ;;; Use the :AROUND construction to maintain a stack of symbol tables 11 | ;;; during parsing. 12 | ;;; 13 | ;;; It is important to note that the bodies of :AROUND options are 14 | ;;; executed during result construction, not parsing. Therefore, 15 | ;;; :AROUND cannot be used to introduce context sensitivity into 16 | ;;; parsing. However, this can be done when using functions as 17 | ;;; terminals, see example-function-terminals.lisp. 18 | 19 | (declaim (special *symbol-table*)) 20 | (defvar *symbol-table* nil) 21 | 22 | (defstruct (symbol-table 23 | (:constructor make-symbol-table (&optional %parent))) 24 | (%table (make-hash-table :test #'equal)) 25 | %parent) 26 | 27 | (defun lookup/direct (name &optional (table *symbol-table*)) 28 | (values (gethash name (symbol-table-%table table)))) 29 | 30 | (defun lookup (name &optional (table *symbol-table*)) 31 | (or (lookup/direct name table) 32 | (alexandria:when-let ((parent (symbol-table-%parent table))) 33 | (lookup name parent)))) 34 | 35 | (defun (setf lookup) (new-value name &optional (table *symbol-table*)) 36 | (when (lookup/direct name table) 37 | (error "~@" 38 | name)) 39 | (setf (gethash name (symbol-table-%table table)) new-value)) 40 | 41 | 42 | 43 | (defrule whitespace 44 | (+ (or #\Space #\Tab #\Newline)) 45 | (:constant nil)) 46 | 47 | (defrule name 48 | (+ (alphanumericp character)) 49 | (:text t)) 50 | 51 | (defrule type 52 | (+ (alphanumericp character)) 53 | (:text t)) 54 | 55 | (defrule declaration 56 | (and name #\: type) 57 | (:destructure (name colon type) 58 | (declare (ignore colon)) 59 | (setf (lookup name) (list name :type type)) 60 | (values))) 61 | 62 | (defrule use 63 | name 64 | (:lambda (name) 65 | (list :use (or (lookup name) 66 | (error "~@" 67 | name))))) 68 | 69 | (defrule statement 70 | (+ (or scope declaration use)) 71 | (:lambda (items) 72 | (remove nil items))) 73 | 74 | (defrule statement/ws 75 | (and statement (? whitespace)) 76 | (:function first)) 77 | 78 | (defrule scope 79 | (and (and #\{ (? whitespace)) 80 | (* statement/ws) 81 | (and #\} (? whitespace))) 82 | (:function second) 83 | (:around () 84 | (let ((*symbol-table* (make-symbol-table *symbol-table*))) 85 | (list* :scope (apply #'append (call-transform)))))) 86 | 87 | (parse 'scope "{ 88 | a:int 89 | a 90 | { 91 | a 92 | b:double 93 | a 94 | b 95 | { 96 | a:string 97 | a 98 | b 99 | } 100 | a 101 | b 102 | } 103 | a 104 | }") 105 | -------------------------------------------------------------------------------- /src/cache/chunk.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2017-2019 Jan Moringen 2 | ;;;; 3 | ;;;; Permission is hereby granted, free of charge, to any person 4 | ;;;; obtaining a copy of this software and associated documentation files 5 | ;;;; (the "Software"), to deal in the Software without restriction, 6 | ;;;; including without limitation the rights to use, copy, modify, merge, 7 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 8 | ;;;; and to permit persons to whom the Software is furnished to do so, 9 | ;;;; subject to the following conditions: 10 | ;;;; 11 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 12 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 13 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 14 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 15 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 16 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | 19 | (cl:in-package #:esrap) 20 | 21 | (defconstant +chunk-divisor+ 8) 22 | 23 | ;;; CHUNK 24 | ;;; 25 | ;;; A vector of fixed size (ash 1 +chunk-divisor+) the indices of 26 | ;;; which correspond to a range of input positions. 27 | 28 | (deftype chunk () 29 | '(simple-array t 1)) 30 | 31 | (declaim (inline make-chunk)) 32 | (defun make-chunk () 33 | (make-array (ash 1 +chunk-divisor+) :initial-element nil)) 34 | 35 | ;;; CHUNK-ARRAY 36 | ;;; 37 | ;;; An array of CHUNKs. 38 | 39 | (deftype chunk-array () 40 | '(simple-array (or (eql 0) chunk) 1)) 41 | 42 | (declaim (inline %make-chunk-array)) 43 | (defun %make-chunk-array (length) 44 | (declare (type array-index length) (optimize speed)) 45 | (make-array length :initial-element 0)) 46 | 47 | (declaim (ftype (function (array-index) (values chunk-array &optional)) 48 | make-chunk-array)) 49 | (defun make-chunk-array (length) 50 | (declare (optimize speed)) 51 | (%make-chunk-array (1+ (ash length (- +chunk-divisor+))))) 52 | 53 | ;;; CHUNK-CACHE 54 | ;;; 55 | ;;; Maps input positions to CHUNKs, potentially allocating new CHUNKs. 56 | 57 | (declaim (inline chunk-cache-chunks)) 58 | (defstruct (chunk-cache 59 | (:constructor 60 | make-chunk-cache 61 | (length &aux (chunks (make-chunk-array length)))) 62 | (:predicate nil) 63 | (:copier nil)) 64 | (chunks nil :type chunk-array)) 65 | #+sbcl (declaim (sb-ext:freeze-type chunk-cache)) 66 | 67 | (declaim (ftype (function (input-position chunk-cache) (values (or null chunk) &optional)) 68 | find-chunk) 69 | (inline find-chunk)) 70 | (defun find-chunk (position chunk-cache) 71 | (declare (optimize speed)) 72 | (let* ((chunks (chunk-cache-chunks chunk-cache)) 73 | (position-1 (ash position (- +chunk-divisor+)))) 74 | (let ((current (aref chunks position-1))) 75 | (unless (eql current 0) 76 | current)))) 77 | (declaim (notinline find-chunk)) 78 | 79 | (declaim (ftype (function (input-position chunk-cache) 80 | (values chunk &optional)) 81 | ensure-chunk)) 82 | (defun ensure-chunk (position chunk-cache) 83 | (declare (optimize speed)) 84 | (or (locally (declare (inline find-chunk)) 85 | (find-chunk position chunk-cache)) 86 | (let ((chunks (chunk-cache-chunks chunk-cache)) 87 | (position-1 (ash position (- +chunk-divisor+)))) 88 | (setf (aref chunks position-1) (make-chunk))))) 89 | -------------------------------------------------------------------------------- /src/types.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2019 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap) 21 | 22 | ;;; Input types 23 | 24 | (deftype input-position () 25 | 'array-index) 26 | 27 | (deftype input-length () 28 | 'array-length) 29 | 30 | ;;; Parser behavior types 31 | 32 | (deftype left-recursion-policy () 33 | '(or null (eql :error))) 34 | 35 | ;;; Expression types 36 | 37 | (deftype nonterminal () 38 | "Any symbol except CHARACTER and NIL can be used as a nonterminal symbol." 39 | '(and symbol (not (member character nil)))) 40 | 41 | (deftype terminal () 42 | "Literal strings and characters are used as case-sensitive terminal symbols, 43 | and expressions of the form \(~ ) denote case-insensitive terminals." 44 | '(or string character 45 | (cons (eql ~) (cons (or string character) null)))) 46 | 47 | (deftype character-range () 48 | "A character range is either a single character or a list of two 49 | characters." 50 | '(or character 51 | (cons character (cons character null)))) 52 | 53 | (deftype predicate-name () 54 | '(and symbol 55 | (not (member character-ranges string 56 | and or not 57 | * + ? & ! ~ < > 58 | function)))) 59 | 60 | (deftype predicate () 61 | '(cons predicate-name (cons (not null) null))) 62 | 63 | ;;; Rule-related types 64 | 65 | (deftype cache-policy () 66 | '(member nil t :unless-trivial)) 67 | 68 | (deftype error-report-part () 69 | "Named part of a parse error report." 70 | `(member :context :detail)) 71 | 72 | (deftype rule-error-report () 73 | "Suitability of a rule for error report parts. 74 | 75 | In addition to the ERROR-REPORT-PART values, NIL indicates 76 | unsuitability for all error report parts, while T indicates 77 | suitability for all parts." 78 | '(or (member t nil) error-report-part)) 79 | 80 | (deftype rule-error-report-pattern () 81 | "ERROR-REPORT-PART or a list thereof." 82 | '(or (member t nil) error-report-part (cons error-report-part))) 83 | 84 | (declaim (ftype (function (rule-error-report rule-error-report-pattern) 85 | (values boolean &optional)) 86 | error-report-behavior-suitable-for-report-part-p)) 87 | (defun error-report-behavior-suitable-for-report-part-p 88 | (query part-or-parts) 89 | "Return true if QUERY is suitable for PART-OR-PARTS." 90 | (when (or (eq query part-or-parts) 91 | (eq query t) 92 | (and (consp part-or-parts) 93 | (member query part-or-parts :test #'eq))) 94 | t)) 95 | -------------------------------------------------------------------------------- /test/util.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2017 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap-tests) 21 | 22 | (defmacro destructuring-lambda (lambda-list &body body) 23 | (with-gensyms (args) 24 | `(lambda (&rest ,args) 25 | (destructuring-bind ,lambda-list ,args 26 | ,@body)))) 27 | 28 | (defmacro with-silent-compilation-unit (() &body body) 29 | `(let ((*error-output* (make-broadcast-stream))) 30 | (with-compilation-unit (:override t) 31 | ,@body))) 32 | 33 | (defun call-expecting-signals-esrap-error (thunk input condition position 34 | &optional messages) 35 | (ecase condition 36 | (esrap-parse-error 37 | (signals (esrap-parse-error) (funcall thunk)))) 38 | (handler-case (funcall thunk) 39 | (esrap-error (condition) 40 | (is (string= (esrap-error-text condition) input)) 41 | (when position 42 | (is (= (esrap-error-position condition) position))) 43 | (let ((report (with-standard-io-syntax 44 | (let ((*print-pretty* t)) 45 | (with-output-to-string (stream) 46 | (pprint-logical-block (stream nil) 47 | (princ condition stream)))))) 48 | (start 0)) 49 | (mapc (lambda (message) 50 | (let ((position (search message report :start2 start))) 51 | (is (integerp position) 52 | "~@" 54 | message report start) 55 | (when position 56 | (setf start position)))) 57 | messages))))) 58 | 59 | (defmacro signals-esrap-error ((input condition position &optional messages) 60 | &body body) 61 | `(call-expecting-signals-esrap-error 62 | (lambda () ,@body) ,input 63 | ',condition ,position (list ,@(ensure-list messages)))) 64 | 65 | (defmacro test-both-modes (name &body body) 66 | (multiple-value-bind (body declarations documentation) 67 | (parse-body body :documentation t) 68 | (declare (ignore declarations)) 69 | (let ((name/interpreted (symbolicate name '#:.interpreted)) 70 | (name/compiled (symbolicate name '#:.compiled))) 71 | `(progn 72 | (test ,name/interpreted 73 | ,@(when documentation `(,documentation)) 74 | (let ((esrap::*eval-nonterminals* t)) 75 | (#-sbcl progn #+sbcl locally 76 | #+sbcl (declare (sb-ext:disable-package-locks esrap:parse)) 77 | (flet ((parse (&rest args) 78 | (apply #'parse args))) 79 | ,@body)))) 80 | (test ,name/compiled 81 | ,@(when documentation `(,documentation)) 82 | ,@body))))) 83 | -------------------------------------------------------------------------------- /examples/left-recursion.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Esrap example: some grammars with left-recursive rules. 2 | 3 | (cl:require :esrap) 4 | 5 | (cl:defpackage #:left-recursive-grammars 6 | (:use #:cl #:alexandria #:esrap) 7 | (:export #:la-expr #:ra-expr #:primary)) 8 | 9 | (cl:in-package :left-recursive-grammars) 10 | 11 | ;;; Left associative expressions 12 | 13 | (defrule la-expr 14 | la-term) 15 | 16 | (defrule la-literal 17 | (digit-char-p character) 18 | (:lambda (x) (parse-integer (text x)))) 19 | 20 | (defrule la-term 21 | (and la-factor (? (and (or #\+ #\-) la-term))) 22 | (:destructure (left (&optional op right)) 23 | (if op 24 | (list (find-symbol op :cl) left right) 25 | left))) 26 | 27 | (defrule la-factor 28 | (and (or la-literal la-expr) (? (and (or #\* #\/) la-factor))) 29 | (:destructure (left (&optional op right)) 30 | (if op 31 | (list (find-symbol op :cl) left right) 32 | left))) 33 | 34 | (defun test-la () 35 | (let ((*on-left-recursion* :error)) 36 | (assert (equal (parse 'la-expr "1*2+3*4+5") 37 | '(+ (* 1 2) 38 | (+ (* 3 4) 39 | 5)))))) 40 | 41 | ;;; Right associative expressions 42 | 43 | (defrule ra-expr 44 | ra-term) 45 | 46 | (defrule ra-literal 47 | (digit-char-p character) 48 | (:lambda (x) (parse-integer (text x)))) 49 | 50 | (defrule ra-term 51 | (and (? (and ra-term (or #\+ #\-))) ra-factor) 52 | (:destructure ((&optional left op) right) 53 | (if op 54 | (list (find-symbol op :cl) left right) 55 | right))) 56 | 57 | (defrule ra-factor 58 | (and (? (and ra-factor (or #\* #\/))) (or ra-literal ra-expr)) 59 | (:destructure ((&optional left op) right) 60 | (if op 61 | (list (find-symbol op :cl) left right) 62 | right))) 63 | 64 | (defun test-ra () 65 | (let ((*on-left-recursion* :error)) 66 | (parse 'ra-expr "1*2+3*4+5")) ; |- Error 67 | 68 | (assert (equal (parse 'ra-expr "1*2+3*4+5") 69 | '(+ (+ (* 1 2) 70 | (* 3 4)) 71 | 5)))) 72 | 73 | ;;; The following example is given in 74 | ;;; 75 | ;;; Alessandro Warth, James R. Douglass, Todd Millstein, 2008, 76 | ;;; "Packrat Parsers Can Support Left Recursion". 77 | ;;; http://www.vpri.org/pdf/tr2007002_packrat.pdf 78 | 79 | (defrule primary 80 | primary-no-new-array) 81 | 82 | (defrule primary-no-new-array 83 | (or class-instance-creation-expression 84 | method-invocation 85 | field-access 86 | array-access 87 | "this")) 88 | 89 | (defrule class-instance-creation-expression 90 | (or (and "new" class-or-interface-type "()") 91 | (and primary ".new" identifier "()"))) 92 | 93 | ;; Note: in the paper, the first case is 94 | ;; 95 | ;; (and primary "." identifier "()") 96 | ;; 97 | ;; but that seems to be an error. 98 | (defrule method-invocation 99 | (or (and primary "." method-name "()") 100 | (and (and) (and) method-name "()")) 101 | (:destructure (structure dot name parens) 102 | (declare (ignore dot parens)) 103 | (list :method-invocation structure name))) 104 | 105 | (defrule field-access 106 | (or (and primary "." identifier) 107 | (and "super." identifier)) 108 | (:destructure (structure dot field) 109 | (declare (ignore dot)) 110 | (list :field-access structure field))) 111 | 112 | (defrule array-access 113 | (or (and primary "[" expression "]") 114 | (and expression-name "[" expression "]")) 115 | (:destructure (structure open index close) 116 | (declare (ignore open close)) 117 | (list :array-access structure index))) 118 | 119 | (defrule class-or-interface-type 120 | (or class-name interface-type-name)) 121 | 122 | (defrule class-name 123 | (or "C" "D")) 124 | 125 | (defrule interface-type-name 126 | (or "I" "J")) 127 | 128 | (defrule identifier 129 | (or "x" "y" class-or-interface-type)) 130 | 131 | (defrule method-name 132 | (or "m" "n")) 133 | 134 | (defrule expression-name 135 | identifier) 136 | 137 | (defrule expression 138 | (or "i" "j")) 139 | 140 | (defun test-warth () 141 | (mapc 142 | (curry #'apply 143 | (lambda (input expected) 144 | (assert (equal (parse 'primary input) expected)))) 145 | '(("this" "this") 146 | ("this.x" (:field-access "this" "x")) 147 | ("this.x.y" (:field-access (:field-access "this" "x") "y")) 148 | ("this.x.m()" (:method-invocation (:field-access "this" "x") "m")) 149 | ("x[i][j].y" (:field-access (:array-access (:array-access "x" "i") "j") "y"))))) 150 | -------------------------------------------------------------------------------- /examples/function-terminals.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Esrap example: some grammars with function-based terminals. 2 | 3 | (cl:require :esrap) 4 | 5 | (cl:defpackage #:esrap-example.function-terminals 6 | (:use #:cl #:esrap) 7 | (:export #:indented-block #:common-lisp)) 8 | 9 | (cl:in-package #:esrap-example.function-terminals) 10 | 11 | ;;; Ex. 1. Using a custom terminal for context sensitive parsing. 12 | ;;; 13 | ;;; Among many other things, this can be used to implement 14 | ;;; indentation-based grammars such as Python's. 15 | 16 | (defrule whitespace (+ #\space) 17 | (:constant nil)) 18 | 19 | ;; *CURRENT-INDENT* tracks the current indentation and CURRENT-INDENT 20 | ;; *succeeds when it can consume exactly CURRENT-INDENT* units of 21 | ;; *indentation. 22 | (defvar *current-indent* 0) 23 | 24 | (defun current-indent-p (indent) 25 | (= indent *current-indent*)) 26 | 27 | (defrule indent (* #\space) 28 | (:function length)) 29 | 30 | (defrule current-indent 31 | (current-indent-p indent)) 32 | 33 | ;; Just a dummy rule for the statement-like elements of the 34 | ;; grammar. This is not the focus of this example. For simplicity, 35 | ;; each statement is on one line. 36 | (defrule statement (+ (character-ranges (#\a #\z))) 37 | (:text t)) 38 | 39 | (defrule line (and statement #\newline) 40 | (:function first)) 41 | 42 | (defrule block-content 43 | (or if line)) 44 | 45 | (defrule indented-block-content 46 | (and current-indent block-content) 47 | (:function second)) 48 | 49 | ;; PARSE-INDENTED-BLOCK is the real meat. It determines the new 50 | ;; indentation depth via a nested (PARSE INDENT ...) call which does 51 | ;; not consume input. The block's content can then be parsed with a 52 | ;; suitably increased current indent. 53 | ;; 54 | ;; The result of the second PARSE call is returned "raw" in case of 55 | ;; success. This allows the associated result tree to be attached to 56 | ;; the global result tree and permits lazy computation of rule 57 | ;; productions within the sub-tree (beneficial if e.g. the result of 58 | ;; the parse, despite successful, is not used in the global result). 59 | (defun parse-indented-block (text position end) 60 | (multiple-value-bind (new-indent new-position) 61 | (parse 'indent text :start position :end end 62 | :junk-allowed t) 63 | (if (> new-indent *current-indent*) 64 | (let ((*current-indent* new-indent)) 65 | (parse '(+ indented-block-content) text 66 | :start position :end end :raw t)) 67 | (values nil new-position "Expected indent")))) 68 | 69 | (defrule indented-block #'parse-indented-block) 70 | 71 | (defrule if 72 | (and (and "if" whitespace) statement (and #\: #\Newline) 73 | indented-block 74 | (? (and (and current-indent "else" #\: #\Newline) 75 | indented-block))) 76 | (:destructure (if-keyword condition colon then 77 | (&optional else-keyword else)) 78 | (declare (ignore if-keyword colon else-keyword)) 79 | (list* 'if condition then (when else (list else))))) 80 | 81 | (defun test-indentation () 82 | (parse 'indented-block 83 | " foo 84 | bar 85 | quux 86 | if foo: 87 | bla 88 | if baz: 89 | bli 90 | blo 91 | else: 92 | whoop 93 | blu 94 | ")) 95 | 96 | ;;; Ex. 2. Using CL:READ to parse lisp. 97 | 98 | (defun parse-using-read (text position end) 99 | (handler-case 100 | ;; When successful, READ-FROM-STRING returns the read object and 101 | ;; the position up to which TEXT has been consumed. 102 | (read-from-string text t nil :start position :end end) 103 | ;; When READ-FROM-STRING fails, indicate the parse failure, 104 | ;; including CONDITION as explanation. 105 | (stream-error (condition) 106 | ;; For STREAM-ERRORs, we can try to determine and return the 107 | ;; exact position of the failure. 108 | (let ((position (ignore-errors 109 | (file-position (stream-error-stream condition))))) 110 | (values nil position condition))) 111 | (error (condition) 112 | ;; For general ERRORs, we cannot determine the exact position of 113 | ;; the failure. 114 | (values nil nil condition)))) 115 | 116 | (defrule common-lisp #'parse-using-read) 117 | 118 | ;; When parsing anything by using CL:READ, it is probably a good idea 119 | ;; to disable *READ-EVAL*. The package in which symbols will be 120 | ;; interned has to be kept in mind as well. 121 | (defun test-read () 122 | (with-standard-io-syntax 123 | (let (; (*package* (find-package :my-package-for-symbols)) 124 | (*read-eval* nil)) 125 | ;; This contains deliberate syntax errors to highlight the error 126 | ;; position and error message reporting implemented in 127 | ;; PARSE-USING-READ. 128 | (parse 'common-lisp "(list 'i :::love 'lisp")))) 129 | -------------------------------------------------------------------------------- /test/examples.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2017 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap-tests) 21 | 22 | (in-suite esrap) 23 | 24 | (test-both-modes example-left-recursion.left-associative 25 | "Left associate grammar from example-left-recursion.lisp." 26 | ;; This grammar should work without left recursion. 27 | (is (equal '(+ (* 1 2) 28 | (+ (* 3 4) 29 | 5)) 30 | (let ((*on-left-recursion* :error)) 31 | (parse 'left-recursive-grammars:la-expr "1*2+3*4+5"))))) 32 | 33 | (test-both-modes example-left-recursion.right-associative 34 | "Right associate grammar from example-left-recursion.lisp." 35 | ;; This combination of grammar and input would require left 36 | ;; recursion. 37 | (signals left-recursion 38 | (let ((*on-left-recursion* :error)) 39 | (parse 'left-recursive-grammars:ra-expr "1*2+3*4+5"))) 40 | 41 | (is (equal `(+ (+ (* 1 2) 42 | (* 3 4)) 43 | 5) 44 | (parse 'left-recursive-grammars:ra-expr "1*2+3*4+5")))) 45 | 46 | (test-both-modes example-left-recursion.warth.smoke 47 | "Warth's Java expression example from example-left-recursion.lisp." 48 | (mapc 49 | (curry #'apply 50 | (lambda (input expected) 51 | (is (equal expected 52 | (parse 'left-recursive-grammars:primary input))))) 53 | '(("this" "this") 54 | ("this.x" (:field-access "this" "x")) 55 | ("this.x.y" (:field-access (:field-access "this" "x") "y")) 56 | ("this.x.m()" (:method-invocation (:field-access "this" "x") "m")) 57 | ("x[i][j].y" (:field-access (:array-access (:array-access "x" "i") "j") "y"))))) 58 | 59 | (test-both-modes example-function-terminals.indented-block.smoke 60 | "Context-sensitive parsing via function terminals." 61 | (is (equal '("foo" "bar" "quux" 62 | (if "foo" 63 | ("bla" 64 | (if "baz" 65 | ("bli" "blo") 66 | ("whoop")))) 67 | "blu") 68 | (parse 'esrap-example.function-terminals:indented-block 69 | " foo 70 | bar 71 | quux 72 | if foo: 73 | bla 74 | if baz: 75 | bli 76 | blo 77 | else: 78 | whoop 79 | blu 80 | ")))) 81 | 82 | (test-both-modes example-function-terminals.indented-block.condition 83 | "Context-sensitive parsing via function terminals." 84 | (let ((input "if foo: 85 | bla 86 | ")) 87 | (signals-esrap-error (input esrap-parse-error 0 88 | ("In context INDENTED-BLOCK:" 89 | "While parsing INDENTED-BLOCK." 90 | "Problem:" "Expected indent" 91 | "Expected:" 92 | "a string that can be parsed by the function")) 93 | (parse 'esrap-example.function-terminals:indented-block input)))) 94 | 95 | (test-both-modes example-function-terminals.read.smoke 96 | "Using CL:READ as a terminal." 97 | (macrolet ((test-case (input expected) 98 | `(is (equal ,expected 99 | (with-standard-io-syntax 100 | (parse 'esrap-example.function-terminals:common-lisp 101 | ,input)))))) 102 | (test-case "(1 2 3)" '(1 2 3)) 103 | (test-case "foo" 'cl-user::foo) 104 | (test-case "#C(1 3/4)" #C(1 3/4)))) 105 | 106 | (test-both-modes example-function-terminals.read.condition 107 | "Test error reporting in the CL:READ-based rule" 108 | (handler-case 109 | (with-standard-io-syntax 110 | (parse 'esrap-example.function-terminals:common-lisp 111 | "(list 'i :::love 'lisp")) 112 | (esrap-parse-error (condition) 113 | #-sbcl (declare (ignore condition)) 114 | ;; Different readers may report this differently. 115 | #+sbcl (is (<= 9 (esrap-error-position condition) 16)) 116 | ;; Not sure how other lisps report this. 117 | #+sbcl (is (search "too many colons" 118 | (princ-to-string condition)))))) 119 | -------------------------------------------------------------------------------- /test/expressions.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2017, 2019 Jan Moringen 2 | ;;;; 3 | ;;;; Permission is hereby granted, free of charge, to any person 4 | ;;;; obtaining a copy of this software and associated documentation files 5 | ;;;; (the "Software"), to deal in the Software without restriction, 6 | ;;;; including without limitation the rights to use, copy, modify, merge, 7 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 8 | ;;;; and to permit persons to whom the Software is furnished to do so, 9 | ;;;; subject to the following conditions: 10 | ;;;; 11 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 12 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 13 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 14 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 15 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 16 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | 19 | (cl:in-package #:esrap-tests) 20 | 21 | (in-suite esrap) 22 | 23 | (test %direct-expression-dependencies.smoke 24 | "Smoke test for the %EXPRESSION-DEPENDENCIES function." 25 | 26 | (mapc (destructuring-lambda ((expression expected-dependencies)) 27 | (let ((result (%expression-direct-dependencies expression))) 28 | (is (equal result (remove-duplicates result))) 29 | (is (set-equal expected-dependencies result)))) 30 | '((character ()) 31 | ((character-ranges #\a #\z) ()) 32 | ((string 5) ()) 33 | ((and #\a #\b) ()) 34 | ((or #\a #\b) ()) 35 | ((not #\a) ()) 36 | ((* #\a) ()) 37 | ((+ #\a) ()) 38 | ((? #\a) ()) 39 | ((& #\a) ()) 40 | ((! #\a) ()) 41 | ((< 1 #\a) ()) 42 | ((> 1 #\a) ()) 43 | (#\c ()) 44 | ("foo" ()) 45 | ((~ "foo") ()) 46 | 47 | (foo (foo)) 48 | ((and foo foo) (foo)) 49 | ((and foo bar) (foo bar)) 50 | ((consp foo) (foo)) 51 | 52 | ((foo #\a) ()) 53 | (#'foo ())))) 54 | 55 | (defrule expression-dependencies.1 56 | (and #\c (? expression-dependencies.undefined) 57 | expression-dependencies.1 58 | expression-dependencies.2)) 59 | 60 | (defrule expression-dependencies.2 61 | expression-dependencies.3) 62 | 63 | (defrule expression-dependencies.3 64 | #\d) 65 | 66 | (test %expression-dependencies.smoke 67 | "Smoke test for the %EXPRESSION-DEPENDENCIES function." 68 | 69 | (mapc (destructuring-lambda ((expression expected-dependencies)) 70 | (let ((result (%expression-dependencies expression))) 71 | (is (equal result (remove-duplicates result))) 72 | (is (set-equal expected-dependencies result)))) 73 | '((foo (foo)) 74 | ((and foo foo) (foo)) 75 | ((and foo bar) (foo bar)) 76 | (expression-dependencies.1 (expression-dependencies.undefined 77 | expression-dependencies.3 78 | expression-dependencies.2 79 | expression-dependencies.1)) 80 | (expression-dependencies.2 (expression-dependencies.3 81 | expression-dependencies.2)) 82 | (expression-dependencies.3 (expression-dependencies.3))))) 83 | 84 | (test expression-simple-p 85 | "Smoke test for the EXPRESSION-SIMPLE-P function." 86 | 87 | (mapc (destructuring-lambda ((expression expected)) 88 | (is (equal expected (expression-simple-p 89 | expression 90 | :depth-limit 2 91 | :string-length-limit 3 92 | :character-ranges-size-limit 3)))) 93 | '((character t) 94 | ((character-ranges #\a #\b) t) 95 | ((character-ranges #\a #\b #\c) nil) 96 | ((string 2) t) 97 | ((string 3) nil) 98 | ((and #\c) t) 99 | ((and (and #\c)) nil) 100 | ((or #\c) t) 101 | ((or (and #\c)) nil) 102 | ((not #\c) t) 103 | ((not (and #\c)) nil) 104 | ((< 1 #\c) t) 105 | ((< 1 (and #\c)) nil) 106 | ((> 1 #\c) t) 107 | ((> 1 (and #\c)) nil) 108 | (#\c t) 109 | ("ab" t) 110 | ("abc" nil) 111 | ((~ "ab") t) 112 | ((and (~ "ab")) t) 113 | ((and (and (~ "ab"))) nil) 114 | ;; 115 | (foo nil) 116 | ((foo-p "bar") nil) 117 | (#'digit-char-p nil)))) 118 | -------------------------------------------------------------------------------- /src/cache/packrat.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2017-2019 Jan Moringen 2 | ;;;; 3 | ;;;; Permission is hereby granted, free of charge, to any person 4 | ;;;; obtaining a copy of this software and associated documentation files 5 | ;;;; (the "Software"), to deal in the Software without restriction, 6 | ;;;; including without limitation the rights to use, copy, modify, merge, 7 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 8 | ;;;; and to permit persons to whom the Software is furnished to do so, 9 | ;;;; subject to the following conditions: 10 | ;;;; 11 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 12 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 13 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 14 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 15 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 16 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 17 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 18 | 19 | (cl:in-package #:esrap) 20 | 21 | ;;; Packrat cache 22 | ;;; 23 | ;;; A cache mapping pairs (input position, rule name) to parse 24 | ;;; results. Its purpose is avoiding multiple identical applications 25 | ;;; of rules. This can improve performance and act as the foundation 26 | ;;; of a framework for handling left recursion. 27 | ;;; 28 | ;;; Since reads from and writes to this cache can be a performance 29 | ;;; bottleneck, the implementation tries to be as runtime and memory 30 | ;;; efficient as possible. A two-level scheme maps the pairs 31 | ;;; mentioned above to parse results: 32 | ;;; 1. an array maps the input position to a secondary structure 33 | ;;; 2. this structure maps the rule name to the cached parse results 34 | ;;; 35 | ;;; The interesting part about 1. is not allocating an array of the 36 | ;;; same size as the input upfront while keeping lookup performance 37 | ;;; reasonable. This trade-off is achieved using a "chunk cache". 38 | ;;; 39 | ;;; The difficulty with 2. is the variety of scenarios that have to be 40 | ;;; supported efficiently w.r.t. memory and runtime. To address this 41 | ;;; issue, the secondary structure uses one of multiple 42 | ;;; representations depending on the situation: 43 | ;;; 44 | ;;; + If only a mapping from a single rule to the associated parse 45 | ;;; result has to be represented, a single cons cell is used. 46 | ;;; 47 | ;;; + For a small number of mapping entries, the number of entries and 48 | ;;; an alist are stored to represent the mapping. 49 | ;;; 50 | ;;; + In the (uncommon) case that more than a few entries have to be 51 | ;;; stored, a hash-table is used. 52 | ;;; 53 | ;;; Switches between representations happen when entries are added. 54 | 55 | (eval-when (:compile-toplevel :load-toplevel :execute) 56 | (defconstant +packrat-hash-table-switch-point+ 16)) 57 | 58 | (declaim (ftype (function (symbol input-position chunk-cache) (values t &optional)) 59 | cached)) 60 | (defun cached (symbol position cache) 61 | (declare (optimize speed)) 62 | (let* ((chunk (find-chunk position cache)) 63 | (position-2 (ldb (byte +chunk-divisor+ 0) position)) 64 | (cell (when chunk 65 | (aref chunk position-2)))) 66 | (cond ((null cell) 67 | nil) 68 | ((not (consp cell)) 69 | (gethash symbol cell)) 70 | ((not (consp (cdr cell))) 71 | (when (eq (car cell) symbol) 72 | (cdr cell))) 73 | (t 74 | (assoc-value (cdr cell) symbol :test #'eq))))) 75 | 76 | (declaim (ftype (function (t symbol input-position chunk-cache) (values t &optional)) 77 | (setf cached))) 78 | (defun (setf cached) (result symbol position cache) 79 | (declare (optimize speed)) 80 | (let* ((chunk (ensure-chunk position cache)) 81 | (position-2 (ldb (byte +chunk-divisor+ 0) position)) 82 | (cell (aref chunk position-2))) 83 | (cond 84 | 85 | ;; No entry => Create a singleton entry using one CONS. 86 | ((null cell) 87 | (setf (aref chunk position-2) (cons symbol result))) 88 | 89 | ;; Not a CONS => Has to be a hash-table. Store the result. 90 | ((not (consp cell)) 91 | (setf (gethash symbol cell) result)) 92 | 93 | ;; A singleton CONS => Maybe extend to a list of the form 94 | ;; 95 | ;; (LENGTH . (KEY1 . RESULT1) (KEY2 . RESULT2) ...) 96 | ;; 97 | ;; where LENGTH is initially 2 after upgrading from a singleton 98 | ;; CONS. 99 | ((not (consp (cdr cell))) 100 | (if (eq (car cell) symbol) 101 | (setf (cdr cell) result) 102 | (setf (aref chunk position-2) 103 | (cons 2 (acons symbol result (list cell)))))) 104 | 105 | ;; A list with leading length as described above. 106 | (t 107 | (let ((count (car cell)) ; note: faster than DESTRUCTURING-BIND 108 | (entries (cdr cell))) 109 | (declare (type (integer 0 #.+packrat-hash-table-switch-point+) count)) 110 | (cond ;; When there is an entry for RESULT, update it. 111 | ((when-let ((entry (assoc symbol entries :test #'eq))) 112 | (setf (cdr entry) result) 113 | t)) 114 | ;; When there are +PACKRAT-HASH-TABLE-SWITCH-POINT+ 115 | ;; entries and we need another one, upgrade to 116 | ;; HASH-TABLE, then store the new entry. 117 | ((= count +packrat-hash-table-switch-point+) 118 | (let ((table (setf (aref chunk position-2) 119 | (alist-hash-table entries :test #'eq)))) 120 | (setf (gethash symbol table) result))) 121 | ;; When there are less than 122 | ;; +PACKRAT-HASH-TABLE-SWITCH-POINT+ entries and we 123 | ;; need another one, increase the counter and add an 124 | ;; entry. 125 | (t 126 | (setf (car cell) (1+ count)) 127 | (setf (cdr cell) (acons symbol result entries)))))))) 128 | result) 129 | -------------------------------------------------------------------------------- /esrap.asd: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2019 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (defsystem "esrap" 21 | :version "0.18" 22 | :description "A Packrat / Parsing Grammar / TDPL parser for Common Lisp." 23 | :long-description "A Packrat / Parsing Grammar / TDPL parser for Common Lisp. 24 | 25 | Notable features include 26 | 27 | * dynamic redefinition of nonterminals 28 | * inline grammars 29 | * semantic predicates 30 | * introspective facilities (describing grammars, 31 | tracing, setting breaks) 32 | * left-recursive grammars 33 | * functions as terminals 34 | * accurate, customizable parse error reports 35 | 36 | See README.org and :homepage for more 37 | information." 38 | :author ("Nikodemus Siivola " 39 | "Jan Moringen ") 40 | :maintainer "Jan Moringen " 41 | :homepage "https://scymtym.github.io/esrap" 42 | :bug-tracker "https://github.com/scymtym/esrap/issues" 43 | :source-control (:git "https://github.com/scymtym/esrap.git") 44 | :licence "MIT" 45 | :depends-on ("alexandria") 46 | :components ((:module "early" 47 | :pathname "src" 48 | :serial t 49 | :components ((:file "package") 50 | (:file "types") 51 | (:file "protocol") 52 | (:file "variables") 53 | (:file "conditions") 54 | (:file "expressions") 55 | (:file "rule") 56 | (:file "results"))) 57 | 58 | (:module "cache" 59 | :pathname "src/cache" 60 | :depends-on ("early") 61 | :serial t 62 | :components ((:file "chunk") 63 | (:file "packrat"))) 64 | 65 | (:module "src" 66 | :depends-on ("early" "cache") 67 | :serial t 68 | :components ((:file "context") 69 | 70 | (:file "evaluator") 71 | (:file "macros") 72 | (:file "interface") 73 | (:file "editor-support"))) 74 | 75 | (:module "examples" 76 | :components ((:static-file "sexp.lisp") 77 | (:static-file "symbol-table.lisp") 78 | (:static-file "left-recursion.lisp") 79 | (:static-file "function-terminals.lisp"))) 80 | 81 | (:static-file "README.org")) 82 | :in-order-to ((test-op (test-op "esrap/tests")))) 83 | 84 | (defmethod perform :after ((op load-op) (sys (eql (find-system "esrap")))) 85 | ;; Since version 0.19 86 | ;; * DEFRULE accepts a :USE-CACHE option 87 | ;; Since version 0.16 88 | ;; * DEFRULE accepts an :ERROR-REPORT option 89 | ;; Since version 0.15 90 | ;; * All transforms that support it, can access bounds via &BOUNDS. 91 | ;; Since version 0.14 92 | (pushnew :esrap.lookahead *features*) 93 | (pushnew :esrap.lookbehind *features*) 94 | ;; Since version 0.13 95 | (pushnew :esrap.expression-start-terminals *features*) 96 | ;; Since version 0.12 97 | (pushnew :esrap.function-terminals *features*) 98 | ;; Since version 0.11 99 | (pushnew :esrap.multiple-transforms *features*) 100 | ;; Since version 0.10 101 | (pushnew :esrap.can-handle-left-recursion *features*) 102 | 103 | ;; For consistency with examples which contain (require :esrap). 104 | (provide :esrap)) 105 | 106 | (defsystem "esrap/tests" 107 | :description "Tests for ESRAP." 108 | :author ("Nikodemus Siivola " 109 | "Jan Moringen ") 110 | :maintainer "Jan Moringen " 111 | :licence "MIT" 112 | :depends-on ("esrap" 113 | (:version "fiveam" "1.3")) 114 | :serial t 115 | :components ((:module "examples" 116 | :components ((:file "left-recursion") 117 | (:file "function-terminals"))) 118 | 119 | (:module "test" 120 | :serial t 121 | :components ((:file "package") 122 | (:file "util") 123 | 124 | (:file "expressions") 125 | (:file "macros") 126 | (:file "tests") 127 | 128 | (:file "stress") 129 | 130 | (:file "examples") 131 | (:file "readme")))) 132 | 133 | :perform (test-op (operation component) 134 | (uiop:symbol-call '#:esrap-tests '#:run-tests))) 135 | -------------------------------------------------------------------------------- /TODO.org: -------------------------------------------------------------------------------- 1 | Esrap TODO 2 | 3 | * Optimizations 4 | ** DONE Common special variable for cache and heads 5 | As access to special variables can be slow, it may make sense to 6 | store =*cache*= and =*head*= in slots of a structure =context= and 7 | this structure in a new special variable =*context*=. 8 | ** TODO Error vs Success results 9 | We're interested in the production/failure, and the position. 10 | 11 | The vast majority of parses result in failures, and we cons up a 12 | =failed-parse= for each. Unless the whole parse fails, the only 13 | thing we are interested in is the position. 14 | 15 | ...and even if the whole parse fails, we use the additional 16 | information only to display a fancy error message. 17 | 18 | So: unless =parse= has been called with =:debug t=, use the 19 | =fixnum= indicating the position to indicate a failed parse. 20 | ** TODO Results of sequence expressions 21 | In the case of a success and matching a sequence we don't really 22 | need the whole result object for each submatch. Maybe return result 23 | as multiple values from each rule, and have =with-cached-result= 24 | cons up the object to store it when? 25 | ** TODO Character ranges 26 | =(or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")= can be implemented 27 | with a range-check for =char-code=. Similarly (or "foobar" "foo" 28 | "bar") can be implemented using tricks from pkhuong's STRING-CASE. 29 | ** STARTED Cache 30 | The cache is a big bottleneck. Try out a few different designs. 31 | Early experiments show that while it's easy to make something that 32 | conses less, it's not trivial to make it much faster than the 33 | current simple hash-table based version. 34 | 35 | Some statistics: 36 | 37 | - 5-10% of positions in a given text have only failure results. 38 | If we can efficiently record the rules these are for... 39 | - 40-50% of positions in a given text end up with exactly one 40 | successful result, irrespective of number of failures. Not sure 41 | if we can use this. 42 | - 75% of positions in a given text end up with results. This 43 | should make a good estimate for the size of the cache needed. 44 | 45 | GC is another related bottleneck. Not because we cons so much, but 46 | because we have this massive cache that keeps being written to, so 47 | we have boxed objects on dirty pages. 48 | 49 | To reduce the GC pressure first optimize the result handling. If 50 | the issue still exists, see the first option below. 51 | 52 | Maybe: Map rule to a position cache. In the position cache, need 53 | to be able to differentiate between 3 states: no result, success, 54 | failure. Need to also be able to store the result. If we store 55 | results in a single global result vector, and use N bits per 56 | position in the position cache: 0 no result, 1 failure, anything 57 | else is the position of the result object in the global vector. 58 | 59 | Maybe: PCL-style multikey cache. 60 | 61 | Maybe: Basic two-level cache. (Version of this on a branch.) 62 | * Features 63 | ** TODO Thread safety 64 | Parsing is currently thread-safe if =parse= has been 65 | compiler-macroexanded. =*RULES*= needs locking, but isn't used 66 | during actual parsing. 67 | ** TODO Add =define-expression-syntax= 68 | ** STARTED Grammar objects 69 | Rules should be contained in grammars, so that symbols like =cl:if= 70 | can refer to different rules in different contexts. Grammars can 71 | also enforce rule numbering, making caching results easier. It 72 | should be possible to inherit from other grammars. 73 | *** TODO Optimizations leading to old rules being used are in principle fine 74 | but I would like to have that behind a flag so it can be turned off 75 | for debugging. No need to export or document the flag. 76 | *** DONE Accepting designators is good 77 | *** DONE No circular dependencies sounds good to me 78 | *** DONE Overall I value backwards compatibility highly. 79 | If keeping it doesn't make things clearly worse or implementation 80 | much harder, I would keep it. Therefore I would prefer ADD-RULE and 81 | &co to default to grammar at runtime, and have that as either a 82 | keyword or an optional argument. Analogously to how INTERN &co work. 83 | *** DONE DEFRULE on the other hand 84 | should IMO choose the grammar (if not explicitly given) at 85 | compile-time if at all possible. Earlier is better there, I suspect. 86 | *** DONE Naming things with string designators instead of symbols. 87 | I see the attraction, but this means that all rules in a grammar are 88 | public and prone to conflict, no? 89 | 90 | If I have a grammar G1 in package BAR that specifies a rule called 91 | BAR:WHITESPACE and a G2 in FOO that specified FOO:WHITESPACE, then 92 | G3 in QUUX can use both without problems. If both rules are really 93 | called "WHITESPACE", things can get confusing pretty quickly... 94 | *** DONE Grammar names in tests 95 | ** STARTED Character classes 96 | Have =standard-grammar= instances that define things like digit, 97 | whitespace, ascii, etc. 98 | 99 | This will probably be done in a different system. 100 | ** CANCELED Transform Subseq 101 | #+BEGIN_SRC lisp 102 | (defrule decimal (+ (or "0" "1" ...)) 103 | (:subseq-function parse-integer)) 104 | #+END_SRC 105 | ** DONE Character ranges 106 | Make it easy to specify character ranges, eg. =(char #\0 #\9)=. 107 | * Improvements 108 | ** TODO Run all tests in evaluated mode 109 | ** TODO Get rid of =*current-cell*= 110 | ** TODO Documentation strings 111 | Structures and classes have a mixture of documentation strings and 112 | documentation comments. Which do we want? After deciding, make this 113 | consistent. 114 | ** TODO Write tests for removing rules 115 | ** STARTED Reference example files from manual 116 | ** DONE Write tests for ~%expression-[direct-]dependencies~ 117 | ** DONE Better error reports 118 | For parse errors, in particular "incomplete parse" errors, provide 119 | a better description of where the parse actually failed, which rule 120 | or rules were involved and what input was expected. 121 | ** DONE Tests for rule tracing functionality 122 | ** DONE Remove =concat= 123 | * Bugs 124 | ** NEW Tracing does not work for interpreted rules 125 | ** NEW Recursive rules cannot be removed 126 | ** DONE ~%expression-direct-dependencies~ returns duplicates 127 | 128 | #+SEQ_TODO: NEW TODO STARTED | CANCELED DONE 129 | -------------------------------------------------------------------------------- /README.org: -------------------------------------------------------------------------------- 1 | #+TITLE: ESRAP -- a packrat parser for Common Lisp 2 | 3 | * Introduction 4 | 5 | In addition to regular Packrat / Parsing Grammar / TDPL features 6 | ESRAP supports: 7 | 8 | + dynamic redefinition of nonterminals 9 | + inline grammars 10 | + semantic predicates 11 | + introspective facilities (describing grammars, tracing, setting breaks) 12 | + left-recursive grammars 13 | + functions as terminals 14 | + accurate, customizable parse error reports 15 | 16 | Homepage & Documentation 17 | 18 | https://scymtym.github.io/esrap/ 19 | 20 | #+ATTR_HTML: :alt "build status image" :title Build Status :align right 21 | [[https://travis-ci.org/scymtym/esrap][https://travis-ci.org/scymtym/esrap.svg]] 22 | 23 | References 24 | 25 | + Bryan Ford, 2002, "Packrat Parsing: a Practical Linear Time 26 | Algorithm with Backtracking". 27 | 28 | http://pdos.csail.mit.edu/~baford/packrat/thesis/ 29 | 30 | + A. Warth et al, 2008, "Packrat Parsers Can Support Left 31 | Recursion". 32 | 33 | http://www.vpri.org/pdf/tr2007002_packrat.pdf 34 | 35 | License 36 | 37 | #+BEGIN_EXAMPLE 38 | Copyright (c) 2007-2013 Nikodemus Siivola 39 | Copyright (c) 2012-2019 Jan Moringen 40 | 41 | Permission is hereby granted, free of charge, to any person 42 | obtaining a copy of this software and associated documentation 43 | files (the "Software"), to deal in the Software without 44 | restriction, including without limitation the rights to use, copy, 45 | modify, merge, publish, distribute, sublicense, and/or sell copies 46 | of the Software, and to permit persons to whom the Software is 47 | furnished to do so, subject to the following conditions: 48 | 49 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 50 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 51 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 52 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT 53 | HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 54 | WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 55 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 56 | DEALINGS IN THE SOFTWARE. 57 | #+END_EXAMPLE 58 | 59 | * Syntax Overview 60 | 61 | #+BEGIN_EXAMPLE 62 | -- case-sensitive terminal 63 | (~ ) -- case-insensitive terminal 64 | character -- any single character 65 | (string ) -- any string of length 66 | (character-ranges ) -- character ranges 67 | ( ) -- semantic parsing 68 | (function ) -- call to parse some text 69 | 70 | (not ) -- complement of expression 71 | (and &rest ) -- sequence 72 | (or &rest ) -- ordered-choices 73 | 74 | (* ) -- greedy-repetition 75 | (+ ) -- greedy-positive-repetition 76 | (? ) -- optional 77 | (& ) -- followed-by; does not consume 78 | (! ) -- not-followed-by; does not consume 79 | (< ) -- lookbehind characters; does not consume 80 | (> ) -- lookahead characters; does not consume 81 | #+END_EXAMPLE 82 | 83 | * Trivial Examples 84 | 85 | #+BEGIN_SRC lisp :results silent :exports both :session "doc" 86 | (ql:quickload :esrap) 87 | #+END_SRC 88 | 89 | The =parse= function takes an expression: 90 | 91 | #+BEGIN_SRC lisp :results value :exports both :session "doc" 92 | (multiple-value-list (esrap:parse '(or "foo" "bar") "foo")) 93 | #+END_SRC 94 | 95 | #+RESULTS: 96 | #+BEGIN_SRC lisp 97 | ("foo" NIL T) 98 | #+END_SRC 99 | 100 | New rules can be added. 101 | 102 | Normally you'd use the declarative =defrule= interface to define new 103 | rules, but everything it does can be done directly by building 104 | instances of the =rule= class and using =add-rule= to activate them. 105 | 106 | #+BEGIN_SRC lisp :results value :exports both :session "doc" 107 | (progn 108 | (esrap:add-rule 'foo+ (make-instance 'esrap:rule :expression '(+ "foo"))) 109 | 110 | (multiple-value-list (esrap:parse 'foo+ "foofoofoo"))) 111 | #+END_SRC 112 | 113 | #+RESULTS: 114 | #+BEGIN_SRC lisp 115 | (("foo" "foo" "foo") NIL T) 116 | #+END_SRC 117 | 118 | The equivalent =defrule= form is 119 | 120 | #+BEGIN_SRC lisp :results silent :exports code :session "doc" 121 | (esrap:defrule foo+ (+ "foo")) 122 | #+END_SRC 123 | 124 | Note that rules can be redefined, i.e. this =defrule= form replaces 125 | the previous definition of the =foo+= rule. 126 | 127 | Rules can transform their matches: 128 | 129 | #+BEGIN_SRC lisp :results silent :exports code :session "doc" 130 | (esrap:add-rule 131 | 'decimal 132 | (make-instance 133 | 'esrap:rule 134 | :expression '(+ (or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) 135 | :transform (lambda (list start end) 136 | (declare (ignore start end)) 137 | (parse-integer (format nil "~{~A~}" list))))) 138 | #+END_SRC 139 | 140 | or using =defrule= 141 | 142 | #+BEGIN_SRC lisp :results silent :exports both :session "doc" 143 | (esrap:defrule decimal 144 | (+ (or "0" "1" "2" "3" "4" "5" "6" "7" "8" "9")) 145 | (:lambda (list) 146 | (parse-integer (format nil "~{~A~}" list)))) 147 | #+END_SRC 148 | 149 | Any lisp function can be used as a semantic predicate: 150 | 151 | #+BEGIN_SRC lisp :results value :exports both :session "doc" 152 | (list 153 | (multiple-value-list (esrap:parse '(oddp decimal) "123")) 154 | (multiple-value-list (esrap:parse '(evenp decimal) "123" :junk-allowed t))) 155 | #+END_SRC 156 | 157 | #+RESULTS: 158 | #+BEGIN_SRC lisp 159 | 160 | ((123 NIL T) (NIL 0)) 161 | #+END_SRC 162 | 163 | * Example Files 164 | 165 | More complete examples can be found in the following self-contained 166 | example files: 167 | 168 | + [[file:examples/sexp.lisp]]: complete sample grammar and usage 169 | + [[file:examples/symbol-table.lisp]]: grammar with lexical scope 170 | + [[file:examples/left-recursion.lisp]]: multiple grammars with left recursion 171 | + [[file:examples/function-terminals.lisp]]: grammars with functions as terminals 172 | -------------------------------------------------------------------------------- /src/conditions.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2016 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap) 21 | 22 | (define-condition invalid-expression-error (error) 23 | ((expression :initarg :expression :reader invalid-expression-error-expression)) 24 | (:default-initargs 25 | :expression (required-argument :expression)) 26 | (:documentation 27 | "Signaled when an invalid expression is encountered.")) 28 | 29 | (defmethod print-object ((condition invalid-expression-error) stream) 30 | (format stream "Invalid expression: ~S" 31 | (invalid-expression-error-expression condition))) 32 | 33 | (defun invalid-expression-error (expression) 34 | (error 'invalid-expression-error :expression expression)) 35 | 36 | (define-condition esrap-error (parse-error) 37 | ((text :initarg :text :initform nil :reader esrap-error-text)) 38 | (:documentation 39 | "Signaled when an Esrap parse fails. Use ESRAP-ERROR-TEXT to obtain the 40 | string that was being parsed, and ESRAP-ERROR-POSITION the position at which 41 | the error occurred.")) 42 | 43 | (defmethod print-object :before ((condition esrap-error) stream) 44 | (when (or *print-escape* 45 | *print-readably* 46 | (and *print-lines* (<= *print-lines* 5))) 47 | (return-from print-object)) 48 | 49 | ;; FIXME: this looks like it won't do the right thing when used as 50 | ;; part of a logical block. 51 | (if-let ((text (esrap-error-text condition)) 52 | (position (esrap-error-position condition))) 53 | (labels ((safe-index (index) 54 | (min (max index 0) (length text))) 55 | (find-newline (&key (start 0) (end (length text)) (from-end t)) 56 | (let ((start (safe-index start)) 57 | (end (safe-index end))) 58 | (cond 59 | ((when-let ((position (position #\Newline text 60 | :start start :end end 61 | :from-end from-end))) 62 | (1+ position))) 63 | ((and from-end (zerop start)) 64 | start) 65 | ((and (not from-end) (= end (length text))) 66 | end))))) 67 | ;; FIXME: magic numbers 68 | (let* ((line (count #\Newline text :end position)) 69 | (column (- position (or (find-newline :end position) 0) 1)) 70 | (min-start (- position 160)) 71 | (max-end (+ position 24)) 72 | (line-start (or (find-newline :start min-start 73 | :end position) 74 | (safe-index min-start))) 75 | (start (cond 76 | ((= (safe-index min-start) line-start) 77 | line-start) 78 | ((find-newline :start min-start 79 | :end (1- line-start))) 80 | (t 81 | 82 | line-start))) 83 | (end (or (find-newline :start position 84 | :end max-end 85 | :from-end nil) 86 | (safe-index max-end))) 87 | (*print-circle* nil)) 88 | (format stream "At~:[~; end of input~]~2%~ 89 | ~2@T~<~@;~A~:>~%~ 90 | ~2@T~V@T^ (Line ~D, Column ~D, Position ~D)~2%" 91 | (= position (length text)) 92 | (list (subseq text start end)) 93 | (- position line-start) 94 | (1+ line) (1+ column) position))) 95 | 96 | (format stream "~2&~2%"))) 97 | 98 | (define-condition esrap-parse-error (esrap-error) 99 | ((result :initarg :result 100 | :type result 101 | :reader esrap-parse-error-result) 102 | (%context :accessor esrap-parse-error-%context 103 | :initform nil)) 104 | (:default-initargs :result (required-argument :result)) 105 | (:documentation 106 | "This error is signaled when a parse attempt fails in a way that .")) 107 | 108 | (defmethod esrap-error-position ((condition esrap-parse-error)) 109 | (result-position (esrap-parse-error-context condition))) 110 | 111 | (defmethod esrap-parse-error-context ((condition esrap-parse-error)) 112 | (or (esrap-parse-error-%context condition) 113 | (setf (esrap-parse-error-%context condition) 114 | (let ((result (esrap-parse-error-result condition))) 115 | (or (result-context result) result))))) 116 | 117 | (defmethod print-object ((object esrap-parse-error) stream) 118 | (cond 119 | (*print-readably* 120 | (call-next-method)) 121 | (*print-escape* 122 | (print-unreadable-object (object stream :type t :identity t) 123 | (format stream "~@[~S~]~@[ @~D~]" 124 | (esrap-parse-error-context object) 125 | (esrap-error-position object)))) 126 | (t 127 | (error-report (esrap-parse-error-context object) stream)))) 128 | 129 | (declaim (ftype (function (string result) (values &optional nil)) 130 | esrap-parse-error)) 131 | (defun esrap-parse-error (text result) 132 | (error 'esrap-parse-error 133 | :text text 134 | :result result)) 135 | 136 | (define-condition left-recursion (esrap-error) 137 | ((position :initarg :position :initform nil :reader esrap-error-position) 138 | (nonterminal :initarg :nonterminal :initform nil :reader left-recursion-nonterminal) 139 | (path :initarg :path :initform nil :reader left-recursion-path)) 140 | (:documentation 141 | "May be signaled when left recursion is detected during Esrap parsing. 142 | 143 | LEFT-RECURSION-NONTERMINAL names the symbol for which left recursion 144 | was detected, and LEFT-RECURSION-PATH lists nonterminals of which the 145 | left recursion cycle consists. 146 | 147 | Note: This error is only signaled if *ON-LEFT-RECURSION* is bound 148 | to :ERROR.")) 149 | 150 | (defmethod print-object ((condition left-recursion) stream) 151 | (format stream "Left recursion in nonterminal ~S. ~_Path: ~ 152 | ~{~S~^ -> ~}" 153 | (left-recursion-nonterminal condition) 154 | (left-recursion-path condition))) 155 | 156 | (defun left-recursion (text position nonterminal path-butlast) 157 | (error 'left-recursion 158 | :text text 159 | :position position 160 | :nonterminal nonterminal 161 | :path (append path-butlast (list nonterminal)))) 162 | 163 | (define-condition undefined-rule (condition) 164 | ((symbol :initarg :symbol 165 | :type symbol 166 | :reader undefined-rule-symbol))) 167 | 168 | (defmethod print-object ((condition undefined-rule) stream) 169 | (format stream "~@" 170 | (undefined-rule-symbol condition))) 171 | 172 | (define-condition undefined-rule-error (undefined-rule error) 173 | () 174 | (:documentation 175 | "Signaled when an undefined rule is encountered.")) 176 | 177 | (defun undefined-rule (symbol) 178 | (error 'undefined-rule-error :symbol symbol)) 179 | -------------------------------------------------------------------------------- /src/context.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2019 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap) 21 | 22 | ;;; MEMOIZATION CACHE 23 | ;;; 24 | ;;; Because each [rule, position] tuple has an unambiguous 25 | ;;; result per source text, we can cache this result -- this is what 26 | ;;; makes packrat parsing O(N). 27 | ;;; 28 | ;;; For now we just use EQUAL hash-tables, but a specialized 29 | ;;; representation would probably pay off. 30 | 31 | (declaim (inline make-cache get-cached (setf get-cached))) 32 | 33 | (defun make-cache (length) 34 | (make-chunk-cache length)) 35 | 36 | (defun get-cached (symbol position cache) 37 | (cached symbol position cache)) 38 | 39 | (defun (setf get-cached) (result symbol position cache) 40 | (setf (cached symbol position cache) result)) 41 | 42 | ;;; Left-recursion support 43 | 44 | ;; In case of left recursion, this stores 45 | (defstruct (head (:predicate nil) (:copier nil)) 46 | ;; The rule at which the left recursion started. 47 | (rule (required-argument :rule) :type symbol) 48 | ;; The set of involved rules. 49 | (involved-set '() :type list) 50 | ;; The set of rules which can still be applied in the current round 51 | ;; of "seed parse" growing. 52 | (eval-set '() :type list)) 53 | 54 | (declaim (inline make-heads get-head (setf get-head))) 55 | 56 | (defun make-heads (length) 57 | (make-chunk-cache length)) 58 | 59 | (defun get-head (position heads) 60 | (when-let ((chunk (find-chunk position heads))) 61 | (aref chunk (ldb (byte +chunk-divisor+ 0) position)))) 62 | 63 | (defun (setf get-head) (head position heads) 64 | (let ((chunk (ensure-chunk position heads))) 65 | (setf (aref chunk (ldb (byte +chunk-divisor+ 0) position)) head))) 66 | 67 | (defun recall (rule position cache heads thunk) 68 | (let ((result (get-cached rule position cache)) 69 | (head (get-head position heads))) 70 | (cond 71 | ;; If not growing a seed parse, just return what is stored in 72 | ;; the cache. 73 | ((not head) 74 | result) 75 | ;; Do not evaluate any rule that is not involved in this left 76 | ;; recursion. 77 | ((and (not result) (not (or (eq rule (head-rule head)) 78 | (member rule (head-involved-set head))))) 79 | (make-failed-parse nil position nil)) 80 | ;; Allow involved rules to be evaluated, but only once, during a 81 | ;; seed-growing iteration. Subsequent requests just return what 82 | ;; is stored in the cache. 83 | (t 84 | (when (member rule (head-eval-set head)) 85 | (removef (head-eval-set head) rule :count 1) 86 | (setf result (funcall thunk position) 87 | (get-cached rule position cache) result)) 88 | result)))) 89 | 90 | ;;; Context 91 | 92 | (declaim (inline make-context 93 | context-cache context-heads 94 | context-nonterminal-stack (setf context-nonterminal-stack))) 95 | (defstruct (context 96 | (:constructor make-context (length 97 | &aux 98 | (cache (make-cache length)) 99 | (heads (make-heads length))))) 100 | (cache nil :type chunk-cache :read-only t) 101 | (heads nil :type chunk-cache :read-only t) 102 | (nonterminal-stack '() :type list)) 103 | #+sbcl (declaim (sb-ext:freeze-type context)) 104 | 105 | (declaim (type context *context*)) 106 | (defvar *context* (make-context 1)) 107 | 108 | (defmacro with-pushed-nonterminal ((symbol context) &body body) 109 | (with-gensyms (previous cell) 110 | (once-only (context) 111 | `(let* ((,previous (context-nonterminal-stack ,context)) 112 | (,cell (list* ,symbol ,previous))) 113 | (declare (dynamic-extent ,cell)) 114 | (setf (context-nonterminal-stack ,context) ,cell) 115 | (prog1 116 | (progn ,@body) 117 | (setf (context-nonterminal-stack ,context) ,previous)))))) 118 | 119 | ;;; SYMBOL and POSITION must all lexical variables! 120 | (defmacro with-cached-result ((symbol position &optional (text nil)) &body forms) 121 | (with-gensyms (context cache heads result) 122 | `(flet ((do-it (position) ,@forms)) 123 | (let* ((,context *context*) 124 | (,cache (context-cache ,context)) 125 | (,heads (context-heads ,context)) 126 | (,result (recall ,symbol ,position ,cache ,heads #'do-it))) 127 | (cond 128 | ;; Found left-recursion marker in the cache. Depending on 129 | ;; *ERROR-ON-LEFT-RECURSION*, we either signal an error or 130 | ;; prepare recovery from this situation (which is performed 131 | ;; by one of the "cache miss" cases (see below) up the 132 | ;; call-stack). 133 | ((left-recursion-result-p ,result) 134 | ;; If error on left-recursion has been requested, do that. 135 | (when (eq *on-left-recursion* :error) 136 | (left-recursion ,text ,position ,symbol 137 | (reverse (mapcar #'left-recursion-result-rule 138 | (context-nonterminal-stack 139 | ,context))))) 140 | ;; Otherwise, mark left recursion and fail this partial 141 | ;; parse. 142 | (let ((head (or (left-recursion-result-head ,result) 143 | (setf (left-recursion-result-head ,result) 144 | (make-head :rule ,symbol))))) 145 | ;; Put this head into left recursion markers on the 146 | ;; stack. Add rules on the stack to the "involved set". 147 | (dolist (item (context-nonterminal-stack ,context)) 148 | (when (eq (left-recursion-result-head item) head) 149 | (return)) 150 | (setf (left-recursion-result-head item) head) 151 | (pushnew (left-recursion-result-rule item) 152 | (head-involved-set head)))) 153 | (make-failed-parse ,symbol ,position nil)) 154 | ;; Cache hit without left-recursion. 155 | (,result 156 | ,result) 157 | ;; Cache miss. 158 | (t 159 | ;; First add a left recursion marker for this pair, then 160 | ;; compute the result, potentially recovering from left 161 | ;; recursion and cache that. 162 | (let* ((result (make-left-recursion-result ,symbol)) 163 | (result1 164 | (with-pushed-nonterminal (result ,context) 165 | (setf (get-cached ,symbol ,position ,cache) 166 | result 167 | (get-cached ,symbol ,position ,cache) 168 | (do-it position))))) 169 | ;; If we detect left recursion, handle it. 170 | (when (and (not (error-result-p result1)) 171 | (left-recursion-result-head result)) 172 | (let ((head (left-recursion-result-head result))) 173 | ;; Grow "seed parse" (grow-lr in the paper): 174 | ;; repeatedly apply rules involved in left-recursion 175 | ;; until no progress can be made. 176 | (setf (get-head ,position ,heads) head) 177 | (loop 178 | (setf (head-eval-set head) 179 | (copy-list (head-involved-set head))) 180 | (let ((result2 (do-it ,position))) 181 | (when (or (error-result-p result2) 182 | (<= (result-position result2) 183 | (result-position result1))) ; no progress 184 | (return)) 185 | (setf (get-cached ,symbol ,position ,cache) 186 | (%make-successful-parse 187 | ,symbol (result-position result2) 188 | result2 #'successful-parse-production) 189 | result1 result2))) 190 | (setf (get-head ,position ,heads) nil))) 191 | result1))))))) 192 | -------------------------------------------------------------------------------- /src/rule.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2019 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap) 21 | 22 | ;;; RULE PROPERTIES 23 | 24 | (macrolet 25 | ((define (&body properties) 26 | (let ((symbol+index 27 | (loop :for property :in properties 28 | :for index :from 0 29 | :collect (list property index)))) 30 | `(eval-when (:compile-toplevel :load-toplevel :execute) 31 | (deftype rule-properties/packed () 32 | '(unsigned-byte ,(length properties))) 33 | 34 | (declaim (inline make-rule-properties rule-property-p)) 35 | (defun make-rule-properties (&key ,@properties) 36 | (logior ,@(map 'list (lambda (entry) 37 | (destructuring-bind (symbol index) entry 38 | `(if ,symbol ,(ash 1 index) 0))) 39 | symbol+index))) 40 | (defun rule-property-p (properties property) 41 | (logbitp 42 | (ecase property 43 | ,@(map 'list (lambda (entry) 44 | (destructuring-bind (symbol index) entry 45 | `(,(make-keyword symbol) ,index))) 46 | symbol+index)) 47 | properties)))))) 48 | (define uses-cache 49 | uses-cache-unless-trivial 50 | transform-identity 51 | transform-constant 52 | transform-text)) 53 | 54 | (defconstant +default-rule-properties+ 55 | (make-rule-properties :uses-cache t 56 | :uses-cache-unless-trivial t 57 | :transform-identity nil 58 | :transform-constant nil 59 | :transform-text nil)) 60 | 61 | ;;; RULE REPRESENTATION AND STORAGE 62 | ;;; 63 | ;;; For each rule, there is a RULE-CELL in *RULES*, whose %INFO slot has the 64 | ;;; function that implements the rule in car, and the rule object in CDR. A 65 | ;;; RULE object can be attached to only one non-terminal at a time, which is 66 | ;;; accessible via RULE-SYMBOL. 67 | 68 | (defvar *rules* (make-hash-table)) 69 | 70 | (defun clear-rules () 71 | (clrhash *rules*) 72 | nil) 73 | 74 | (defstruct (rule-cell 75 | (:conc-name cell-) 76 | (:constructor 77 | make-rule-cell 78 | (symbol &aux (%info (cons (undefined-rule-function symbol) nil)))) 79 | (:copier nil) 80 | (:predicate nil)) 81 | ;; A cons 82 | ;; 83 | ;; (FUNCTION . RULE) 84 | ;; 85 | ;; where 86 | ;; 87 | ;; FUNCTION is a function with lambda-list (text position end) which 88 | ;; is called to do the actual parsing work (or immediately signal an 89 | ;; error in case of referenced but undefined rules). 90 | ;; 91 | ;; RULE is a RULE instance associated to the cell or nil for 92 | ;; referenced but undefined rules. 93 | (%info (required-argument :%info) :type (cons function t)) 94 | ;; Either NIL if the corresponding rule is not currently traced or a 95 | ;; list 96 | ;; 97 | ;; (INFO BREAK CONDITION) 98 | ;; 99 | ;; where 100 | ;; 101 | ;; INFO is the original value (i.e. before the rule was traced) of 102 | ;; the %INFO slot of the cell. 103 | ;; 104 | ;; BREAK is a Boolean indicating whether to CL:BREAK when the traced 105 | ;; rule is executed. 106 | ;; 107 | ;; CONDITION is NIL or a function that is called when the traced 108 | ;; rule is executed to determine whether the trace action should be 109 | ;; performed. 110 | (trace-info nil) 111 | (referents nil :type list)) 112 | 113 | (declaim (inline cell-function)) 114 | (defun cell-function (cell) 115 | (car (cell-%info cell))) 116 | 117 | (defun cell-rule (cell) 118 | (cdr (cell-%info cell))) 119 | 120 | (defun set-cell-info (cell function rule) 121 | ;; Atomic update 122 | (setf (cell-%info cell) (cons function rule)) 123 | cell) 124 | 125 | (defun undefined-rule-function (symbol) 126 | (lambda (&rest args) 127 | (declare (ignore args)) 128 | (undefined-rule symbol))) 129 | 130 | (defun ensure-rule-cell (symbol) 131 | (check-type symbol nonterminal) 132 | ;; FIXME: Need to lock *RULES*. 133 | (ensure-gethash symbol *rules* (make-rule-cell symbol))) 134 | 135 | (defun delete-rule-cell (symbol) 136 | (remhash symbol *rules*)) 137 | 138 | (defun reference-rule-cell (symbol referent) 139 | (let ((cell (ensure-rule-cell symbol))) 140 | (when referent 141 | (pushnew referent (cell-referents cell))) 142 | cell)) 143 | 144 | (defun dereference-rule-cell (symbol referent) 145 | (let ((cell (ensure-rule-cell symbol))) 146 | (setf (cell-referents cell) (delete referent (cell-referents cell))) 147 | cell)) 148 | 149 | (defun find-rule-cell (symbol) 150 | (check-type symbol nonterminal) 151 | (gethash symbol *rules*)) 152 | 153 | (defclass rule () 154 | ((%symbol :initform nil 155 | :reader rule-symbol) 156 | (%expression :initarg :expression 157 | :initform (required-argument :expression)) 158 | ;; Only for DESCRIBE-GRAMMAR. The %CONDITION slot stores the actual 159 | ;; condition. 160 | (%guard-expression :initarg :guard-expression 161 | :initform t 162 | :reader rule-guard-expression) 163 | ;; Either T for rules that are always active (the common case), 164 | ;; NIL for rules that are never active, or a function to call 165 | ;; to find out if the rule is active or not. 166 | (%condition :initarg :condition 167 | :initform t 168 | :reader rule-condition) 169 | (%transform :initarg :transform 170 | :initform nil 171 | :reader rule-transform) 172 | (%around :initarg :around 173 | :initform nil 174 | :reader rule-around) 175 | ;; Describes in which parts of an error report this rule, its 176 | ;; children and the input (transitively) expected by the rule may 177 | ;; be mentioned. This allows preventing "utility" rules from 178 | ;; cluttering up error reports. 179 | (%error-report :initarg :error-report 180 | :type rule-error-report 181 | :reader rule-error-report 182 | :initform t) 183 | ;; Used to characterize the rule (e.g. may the rule be inlined? 184 | ;; should the rule use the packrat cache?) and its transform 185 | ;; (e.g. is the transform constant/the identity/text?). 186 | (%properties :initarg :properties 187 | :type rule-properties/packed 188 | :reader rule-properties 189 | :initform +default-rule-properties+))) 190 | 191 | (setf (documentation 'rule-symbol 'function) 192 | "Returns the nonterminal associated with the RULE, or NIL if the 193 | rule is not attached to any nonterminal.") 194 | 195 | (declaim (ftype (function (symbol rule-error-report-pattern) 196 | (values boolean &optional)) 197 | rule-suitable-for-report-part-p)) 198 | (defun rule-suitable-for-report-part-p (symbol part-or-parts) 199 | (when-let ((rule (find-rule symbol))) 200 | (error-report-behavior-suitable-for-report-part-p 201 | (rule-error-report rule) part-or-parts))) 202 | 203 | (defun detach-rule (rule) 204 | (dolist (dep (%rule-direct-dependencies rule)) 205 | (dereference-rule-cell dep (rule-symbol rule))) 206 | (setf (slot-value rule '%symbol) nil)) 207 | 208 | (defmethod shared-initialize :after ((rule rule) slots &key) 209 | (declare (ignore slots)) 210 | (check-expression (rule-expression rule))) 211 | 212 | (defmethod print-object ((rule rule) stream) 213 | (print-unreadable-object (rule stream :type t :identity nil) 214 | (format stream "~:[(detached)~;~:*~S <- ~S~]" 215 | (rule-symbol rule) (rule-expression rule)))) 216 | 217 | (defun sort-dependencies (symbol dependencies) 218 | (let ((symbols (delete symbol dependencies)) 219 | (defined nil) 220 | (undefined nil)) 221 | (dolist (sym symbols) 222 | (if (find-rule sym) 223 | (push sym defined) 224 | (push sym undefined))) 225 | (values defined undefined))) 226 | 227 | (defun rule-dependencies (rule) 228 | "Returns the dependencies of the RULE: primary value is a list of defined 229 | nonterminal symbols, and secondary value is a list of undefined nonterminal 230 | symbols." 231 | (sort-dependencies 232 | (rule-symbol rule) (%expression-dependencies (rule-expression rule)))) 233 | 234 | (defun rule-direct-dependencies (rule) 235 | (sort-dependencies 236 | (rule-symbol rule) (%expression-direct-dependencies (rule-expression rule)))) 237 | 238 | (defun %rule-direct-dependencies (rule) 239 | (delete (rule-symbol rule) (%expression-direct-dependencies (rule-expression rule)))) 240 | -------------------------------------------------------------------------------- /doc/esrap.texinfo: -------------------------------------------------------------------------------- 1 | \input texinfo @c -*-texinfo-*- 2 | @c %**start of header 3 | @setfilename esrap.info 4 | @c %**end of header 5 | 6 | @settitle Esrap 7 | 8 | @c for install-info 9 | @dircategory Software development 10 | @direntry 11 | * Esrap: a packrat parser for Common Lisp 12 | @end direntry 13 | 14 | @titlepage 15 | 16 | @title Esrap 17 | @subtitle a packrat parser for Common Lisp 18 | 19 | @c The following two commands start the copyright page. 20 | @page 21 | @vskip 0pt plus 1filll 22 | @insertcopying 23 | 24 | @end titlepage 25 | 26 | In addition to regular Packrat / Parsing Grammar / TDPL features Esrap 27 | supports 28 | @itemize 29 | @item 30 | dynamic redefinition of nonterminals 31 | @item 32 | inline grammars 33 | @item 34 | semantic predicates 35 | @item 36 | introspective facilities for development 37 | @item 38 | support for left-recursive rules 39 | @item 40 | functions as terminals 41 | @item 42 | accurate, customizable parse error reports 43 | @end itemize 44 | 45 | Esrap was originally written by @email{nikodemus@@random-state.net, 46 | Nikodemus Siivola}. It is now developed and maintained by 47 | @email{jmoringe@@uni-bielefeld.de, Jan Moringen}. 48 | 49 | Esrap is maintained in Git: 50 | @example 51 | git clone -b stable git://github.com/scymtym/esrap.git 52 | @end example 53 | will get you a local copy (omit @code{-b stable} to get the latest 54 | development version). 55 | @example 56 | @url{http://github.com/scymtym/esrap} 57 | @end example 58 | is the GitHub project page. 59 | 60 | Esrap is licenced under an MIT-style licence. 61 | 62 | For more on packrat parsing, see 63 | @url{http://pdos.csail.mit.edu/~baford/packrat/thesis/} for Bryan Ford's 2002 64 | thesis: ``Packrat Parsing: a Practical Linear Time Algorithm with Backtracking''. 65 | 66 | For left recursion support in packrat parsers, see 67 | @url{http://www.vpri.org/pdf/tr2007002_packrat.pdf} for A. Warth et al's 68 | 2008 paper: ``Packrat Parsers Can Support Left Recursion''. 69 | 70 | @contents 71 | 72 | @ifnottex 73 | 74 | @include include/ifnottex.texinfo 75 | 76 | @end ifnottex 77 | 78 | @chapter Parsing Expressions 79 | 80 | Parsing proceeds by matching text against parsing expressions. 81 | Matching has three components: success vs failure, consumption of 82 | input, and associated production. 83 | 84 | Parsing expressions that fail never consume input. Parsing expressions 85 | that succeed may or may not consume input. 86 | 87 | A parsing expressions can be: 88 | 89 | @heading Terminal 90 | A terminal is a character or a string of length one, which succeeds and 91 | consumes a single character if that character matches the terminal. 92 | 93 | Additionally, Esrap supports some pseudoterminals. 94 | 95 | @itemize 96 | @item 97 | The wild terminal symbol @code{character} always succeeds, consuming 98 | and producing a single character. 99 | @item 100 | Expressions of the form @code{(character-ranges range ...)} match a 101 | single character from the given range(s), consuming and producing that 102 | character. A range can be either a list of the form @code{(#\start_char 103 | #\stop_char)} or a single character. 104 | @item 105 | Multicharacter strings can be used to specify sequences of terminals: 106 | @code{"foo"} succeeds and consumes input as if @code{(and #\f #\o 107 | #\o)}. Produces the consumed string. 108 | @item 109 | Expressions of the form @code{(string length)} can be used to specify 110 | sequences of arbitrary characters: @code{(string 2)} succeeds and 111 | consumes input as if @code{(and character character)}. Produces the 112 | consumed string. 113 | @end itemize 114 | 115 | @heading Nonterminal 116 | Nonterminals are specified using symbols. A nonterminal symbol 117 | succeeds if the parsing expression associated with it succeeds, and 118 | consumes whatever the input that expression consumes. 119 | 120 | The production of a nonterminal depends on the associated expression 121 | and an optional transformation rule. 122 | 123 | Nonterminals are defined using @code{defrule}. 124 | 125 | @emph{Note: Currently all rules share the same namespace, so you 126 | should not use symbols in the COMMON-LISP package or other shared 127 | packages to name your rules unless you are certain there are no other 128 | Esrap using components in your Lisp image. In a future version of 129 | Esrap grammar objects will be introduced to allow multiple definitions 130 | of nonterminals. Symbols in the COMMON-LISP package are specifically 131 | reserved for use by Esrap.} 132 | 133 | @heading Sequence 134 | @lisp 135 | (and subexpression ...) 136 | @end lisp 137 | 138 | A sequence succeeds if all subexpressions succeed, and consumes all 139 | input consumed by the subexpressions. A sequence produces the 140 | productions of its subexpressions as a list. 141 | 142 | @heading Ordered Choice 143 | @lisp 144 | (or subexpression ...) 145 | @end lisp 146 | 147 | An ordered choice succeeds if any of the subexpressions succeeds, and 148 | consumes all the input consumed by the successful subexpression. An 149 | ordered choice produces whatever the successful subexpression 150 | produces. 151 | 152 | Subexpressions are checked strictly in the specified order, and once 153 | a subexpression succeeds no further ones will be tried. 154 | 155 | @heading Negation 156 | @lisp 157 | (not subexpression) 158 | @end lisp 159 | 160 | A negation succeeds if the subexpression fails, and consumes one character 161 | of input. A negation produces the character it consumes. 162 | 163 | @heading Greedy Repetition 164 | @lisp 165 | (* subexpresssion) 166 | @end lisp 167 | 168 | A greedy repetition always succeeds, consuming all input consumed by 169 | applying subexpression repeatedly as long as it succeeds. 170 | 171 | A greedy repetition produces the productions of the subexpression as a 172 | list. 173 | 174 | @heading Greedy Positive Repetition 175 | @lisp 176 | (+ subexpresssion) 177 | @end lisp 178 | 179 | A greedy repetition succeeds if subexpression succeeds at least once, 180 | and consumes all input consumed by applying subexpression repeatedly 181 | as long as it succeeds. A greedy positive repetition produces the 182 | productions of the subexpression as a list. 183 | 184 | @heading Optional 185 | @lisp 186 | (? subexpression) 187 | @end lisp 188 | 189 | Optionals always succeed, and consume whatever input the subexpression 190 | consumes. An optional produces whatever the subexpression produces, or 191 | @code{nil} if the subexpression does not succeed. 192 | 193 | @heading Followed-By Predicate 194 | @lisp 195 | (& subexpression) 196 | @end lisp 197 | 198 | A followed-by predicate succeeds if the subexpression succeeds, and 199 | @emph{consumes no input}. A followed-by predicate produces whatever 200 | the subexpression produces. 201 | 202 | @heading Not-Followed-By Predicate 203 | @lisp 204 | (! subexpression) 205 | @end lisp 206 | 207 | A not-followed-by predicate succeeds if the subexpression does not 208 | succeed, and @emph{consumes no input}. A not-followed-by predicate 209 | produces @code{nil}. 210 | 211 | @heading Lookbehind 212 | @lisp 213 | (< amount subexpression) 214 | @end lisp 215 | 216 | A lookbehind succeeds if @code{subexpression} succeeds at the input 217 | position reached by moving backward @code{amount}, a positive integer, 218 | characters from the current position and @emph{consumes no input}. A 219 | lookbehind produces whatever @code{subexpression} produces. 220 | 221 | @heading Lookahead 222 | @lisp 223 | (> amount subexpression) 224 | @end lisp 225 | 226 | A lookahead succeeds if @code{subexpression} succeeds at the input 227 | position reached by moving forward @code{amount}, a positive integer, 228 | characters from the current position and @emph{consumes no input}. A 229 | lookahead produces whatever @code{subexpression} produces. 230 | 231 | @heading Semantic Predicates 232 | @lisp 233 | (predicate-name subexpression) 234 | @end lisp 235 | 236 | The @code{predicate-name} is a symbol naming a global function. A 237 | semantic predicate succeeds if subexpression succeeds @emph{and} the 238 | named function returns true for the production of the subexpression. A 239 | semantic predicate produces whatever the subexpression produces. 240 | 241 | @emph{Note: semantic predicates may change in the future to produce 242 | whatever the predicate function returns.} 243 | 244 | @heading Functions as Terminals 245 | @lisp 246 | (function function-name) 247 | @end lisp 248 | 249 | @code{function-name} is a symbol naming a global 250 | function. @code{function-name}'s lambda-list has to be compatible to 251 | @code{(text position end)} where @code{text} is the whole input and 252 | @code{position} and @code{end} indicate the maximal subsequence 253 | @code{function-name} should attempt to parse. 254 | 255 | A function terminal succeeds if either 256 | @enumerate 257 | @item 258 | @code{function-name} returns @code{T} as its third value. 259 | @item 260 | @code{function-name} returns @code{nil} as its third value (or returns 261 | only two values) and @code{nil} as its second value. This indicates that 262 | the entire remaining input has been consumed. 263 | @item 264 | @code{function-name} returns @code{nil} as its third value (or returns 265 | only two values) and an integer @code{> position} as its second value 266 | indicating the position up to which @code{text} has been consumed. 267 | @item 268 | @code{function-name} returns a value of type @code{successful-parse} as 269 | its first value. 270 | @end enumerate 271 | When a function terminal succeeds, the first return value is an 272 | arbitrary production. 273 | 274 | A function terminal fails if either 275 | @enumerate 276 | @item 277 | @code{function-name} returns two values: an ignored value and 278 | @code{position}. Returning @code{position} indicates that no progress 279 | has been made. 280 | @item 281 | @code{function-name} returns three values: an ignored value, @code{nil} 282 | or an integer @code{>= position} and a string or a condition explaining 283 | the failure. In this case, when the second value is not @code{nil}, it 284 | indicates the exact position of the failure. 285 | @item 286 | @code{function-name} returns a value of type @code{error-result} as its 287 | first value. 288 | @end enumerate 289 | 290 | Note that rules which use functions as terminals do not automatically 291 | pick up redefinitions of the used functions. For that to happen, the 292 | rules have to be redefined as well. 293 | 294 | See @file{example-function-terminals.lisp} for examples. 295 | 296 | @heading Left Recursion 297 | 298 | One aspect of designing Esrap rules is @emph{left recursion}. A 299 | @emph{direct left recursive} rule is of the form 300 | @lisp 301 | (defrule left-recursion (or (and left-recursion STUFF) ALTERNATIVES)) 302 | @end lisp 303 | The simplest @emph{indirect left recursive} rule is of the form 304 | @lisp 305 | (defrule left-recursion.1 left-recursion.2) 306 | (defrule left-recursion.2 (or (and left-recursion.1 STUFF) ALTERNATIVES)) 307 | @end lisp 308 | 309 | Esrap can handle both kinds of left recursive rules, but the linear-time 310 | guarantee generally no longer holds in such cases. The special variable 311 | @code{*on-left-recursion*} can be set to either @code{nil} or 312 | @code{:error} to control Esrap's behavior with respect to allowing left 313 | recursion. 314 | 315 | See @file{example-left-recursion.lisp} for examples. 316 | 317 | @chapter Dictionary 318 | 319 | @section Primary Interface 320 | 321 | @include include/macro-esrap-defrule.texinfo 322 | @include include/fun-esrap-parse.texinfo 323 | @include include/fun-esrap-describe-grammar.texinfo 324 | 325 | @section Utilities 326 | 327 | @include include/fun-esrap-text.texinfo 328 | 329 | @section Introspection and Intercession 330 | 331 | @include include/fun-esrap-add-rule.texinfo 332 | @include include/fun-esrap-change-rule.texinfo 333 | @include include/fun-esrap-find-rule.texinfo 334 | @include include/fun-esrap-remove-rule.texinfo 335 | @include include/fun-esrap-rule-dependencies.texinfo 336 | @include include/fun-esrap-rule-expression.texinfo 337 | @include include/fun-esrap-setf-rule-expression.texinfo 338 | @include include/fun-esrap-rule-symbol.texinfo 339 | 340 | @include include/fun-esrap-trace-rule.texinfo 341 | @include include/fun-esrap-untrace-rule.texinfo 342 | @include include/fun-esrap-untrace-all-rules.texinfo 343 | 344 | @include include/fun-esrap-expression-start-terminals.texinfo 345 | @include include/fun-esrap-describe-terminal.texinfo 346 | 347 | @section Error Conditions 348 | 349 | @include include/var-esrap-star-on-left-recursion-star.texinfo 350 | 351 | @include include/fun-esrap-esrap-error-position.texinfo 352 | @include include/fun-esrap-esrap-parse-error-result.texinfo 353 | @include include/fun-esrap-esrap-parse-error-context.texinfo 354 | 355 | @include include/condition-esrap-esrap-error.texinfo 356 | @include include/condition-esrap-left-recursion.texinfo 357 | @include include/condition-esrap-esrap-parse-error.texinfo 358 | 359 | @include include/condition-esrap-undefined-rule-error.texinfo 360 | 361 | @bye 362 | -------------------------------------------------------------------------------- /src/macros.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2020 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap) 21 | 22 | ;;; Miscellany 23 | 24 | (defun text (&rest arguments) 25 | "Arguments must be strings, or lists whose leaves are strings. 26 | Catenates all the strings in arguments into a single string." 27 | (with-output-to-string (s) 28 | (labels ((cat-list (list) 29 | (dolist (elt list) 30 | (etypecase elt 31 | (string (write-string elt s)) 32 | (character (write-char elt s)) 33 | (list (cat-list elt)))))) 34 | (cat-list arguments)))) 35 | 36 | (defun singleton-option (context form keyword type &key default) 37 | (let ((value default) 38 | (value-seen nil)) 39 | (lambda (&optional (new-value nil new-value-p)) 40 | (cond 41 | ((not new-value-p) 42 | value) 43 | ((not (typep new-value type)) 44 | (error 'simple-type-error 45 | :datum new-value 46 | :expected-type type 47 | :format-control "~@" 49 | :format-arguments (list new-value keyword context))) 50 | (value-seen 51 | (error "~@" 53 | keyword context form)) 54 | (t 55 | (setf value-seen t 56 | value new-value)))))) 57 | 58 | ;;; http://jcsu.jesus.cam.ac.uk/~csr21/papers/features.pdf 59 | (eval-when (:compile-toplevel :execute) 60 | (when (and (find-package '#:sb-ext) 61 | (find-symbol (string '#:with-current-source-form) '#:sb-ext)) 62 | (pushnew 'sb-ext-with-current-source-form *features*))) 63 | 64 | (defmacro with-current-source-form ((&rest forms) &body body) 65 | #-esrap::sb-ext-with-current-source-form (declare (ignore forms)) 66 | #+esrap::sb-ext-with-current-source-form 67 | `(sb-ext:with-current-source-form (,@forms) ,@body) 68 | #-esrap::sb-ext-with-current-source-form 69 | `(progn ,@body)) 70 | 71 | ;;; DEFRULE support functions 72 | 73 | (defun parse-lambda-list-maybe-containing-&bounds (lambda-list) 74 | "Parse &BOUNDS section in LAMBDA-LIST and return three values: 75 | 76 | 1. The standard lambda list sublist of LAMBDA-LIST 77 | 2. A symbol that should be bound to the start of a matching substring 78 | 3. A symbol that should be bound to the end of a matching substring 79 | 4. A list containing symbols that were GENSYM'ed. 80 | 81 | The second and/or third values are GENSYMS if LAMBDA-LIST contains a 82 | partial or no &BOUNDS section, in which case fourth value contains them 83 | for use with IGNORE." 84 | (let ((length (length lambda-list)) 85 | (index (position '&bounds lambda-list))) 86 | (multiple-value-bind (lambda-list start end gensyms) 87 | (cond 88 | ;; Look for &BOUNDS START END. 89 | ((eql index (- length 3)) 90 | (values (subseq lambda-list 0 index) 91 | (nth (+ index 1) lambda-list) 92 | (nth (+ index 2) lambda-list) 93 | '())) 94 | ;; Look for &BOUNDS START. 95 | ((eql index (- length 2)) 96 | (let ((end (gensym "END"))) 97 | (values (subseq lambda-list 0 index) 98 | (nth (+ index 1) lambda-list) 99 | end 100 | (list end)))) 101 | ;; &BOUNDS is present but not followed by either one or two 102 | ;; names. 103 | (index 104 | (error "~@" 105 | '&bounds (subseq lambda-list index))) 106 | ;; No &BOUNDS section. 107 | (t 108 | (let ((start (gensym "START")) 109 | (end (gensym "END"))) 110 | (values lambda-list 111 | start 112 | end 113 | (list start end))))) 114 | (check-type start symbol) 115 | (check-type end symbol) 116 | (values lambda-list start end gensyms)))) 117 | 118 | (defun check-lambda-list (lambda-list spec 119 | &key 120 | (report-lambda-list lambda-list)) 121 | (multiple-value-bind 122 | (required* optional* rest* keyword* allow-other-keys-p auxp keyp) 123 | (parse-ordinary-lambda-list lambda-list) 124 | (labels ((fail (expected actual) 125 | (let ((expected (ensure-list expected)) 126 | (actual (ensure-list actual))) 127 | (error "~@" 128 | (first expected) (rest expected) 129 | report-lambda-list 130 | (first actual) (rest actual)))) 131 | (check-section (section expected actual) 132 | (typecase expected 133 | ((eql nil) 134 | (when actual 135 | (fail (list "without ~A parameters" section) 136 | (list "has ~A parameters" section)))) 137 | ((eql t) 138 | (unless actual 139 | (fail (list "with ~A parameters" section) 140 | (list "has no ~A parameters" section)))) 141 | (integer 142 | (unless (length= expected actual) 143 | (fail (list "with ~D ~A parameter~:*~:P" expected section) 144 | (list "has ~D ~A parameter~:*~:P" 145 | (length actual) section)))))) 146 | (check-binary (name expected actual) 147 | (when (member expected '(t nil)) 148 | (unless (eq expected (when actual t)) 149 | (fail (list "~:[without~;with~] ~A" expected name) 150 | (list "~:[has no~;has~] ~A" actual name))))) 151 | (check-simple-spec (&key required optional rest 152 | keyword allow-other-keys aux key) 153 | (check-section "required" required required*) 154 | (check-section "optional" optional optional*) 155 | (check-binary '&rest rest rest*) 156 | (check-section "keyword" keyword keyword*) 157 | (check-binary '&allow-other-keys allow-other-keys allow-other-keys-p) 158 | (check-section "aux" aux auxp) 159 | (check-binary '&key key keyp)) 160 | (check-spec (spec) 161 | (typecase spec 162 | ((cons (eql or)) 163 | (loop :with errors = () 164 | :for sub-spec :in (rest spec) 165 | :do (handler-case 166 | (progn 167 | (check-spec sub-spec) 168 | (return)) 169 | (error (condition) 170 | (push condition errors))) 171 | :finally (error "~@<~{~A~^~@:_~}~@:>" errors))) 172 | (list 173 | (apply #'check-simple-spec spec))))) 174 | (check-spec spec)))) 175 | 176 | (defun parse-defrule-options (options form) 177 | (let ((when (singleton-option 'defrule form :when t :default '(t . t))) 178 | (transform nil) 179 | (around nil) 180 | (error-report (singleton-option 'defrule form :error-report 181 | 'rule-error-report :default t)) 182 | (use-cache (singleton-option 'defrule form :use-cache 183 | 'cache-policy :default :unless-trivial))) 184 | (dolist (option options) 185 | (with-current-source-form (option) 186 | (destructuring-ecase option 187 | ((:when expr &rest rest) 188 | (when rest 189 | (error "~@" 190 | :when form)) 191 | (funcall when (cons (cond 192 | ((not (constantp expr)) 193 | `(lambda () ,expr)) 194 | ((eval expr) 195 | t)) 196 | expr))) 197 | ((:constant value) 198 | (declare (ignore value)) 199 | (push option transform)) 200 | ((:text value) 201 | (when value 202 | (push option transform))) 203 | ((:identity value) 204 | (when value 205 | (push option transform))) 206 | ((:lambda lambda-list &body forms) 207 | (with-current-source-form (lambda-list option) 208 | (multiple-value-bind (lambda-list* start-var end-var ignore) 209 | (parse-lambda-list-maybe-containing-&bounds lambda-list) 210 | (check-lambda-list lambda-list* 211 | '(or (:required 1) (:optional 1)) 212 | :report-lambda-list lambda-list) 213 | (push (list :lambda lambda-list* start-var end-var ignore forms) 214 | transform)))) 215 | ((:function designator) 216 | (declare (ignore designator)) 217 | (push option transform)) 218 | ((:destructure lambda-list &body forms) 219 | (with-current-source-form (lambda-list option) 220 | (multiple-value-bind (lambda-list* start-var end-var ignore) 221 | (parse-lambda-list-maybe-containing-&bounds lambda-list) 222 | (push (list :destructure lambda-list* start-var end-var ignore forms) 223 | transform)))) 224 | ((:around lambda-list &body forms) 225 | (with-current-source-form (lambda-list option) 226 | (multiple-value-bind (lambda-list* start end ignore) 227 | (parse-lambda-list-maybe-containing-&bounds lambda-list) 228 | (check-lambda-list 229 | lambda-list* '() :report-lambda-list lambda-list) 230 | (setf around `(lambda (,start ,end transform) 231 | (declare (ignore ,@ignore) 232 | (function transform)) 233 | (flet ((call-transform () 234 | (funcall transform))) 235 | ,@forms)))))) 236 | ((:use-cache value) 237 | (funcall use-cache value)) 238 | ((:error-report behavior) 239 | (funcall error-report behavior))))) 240 | (values transform around (funcall when) 241 | (funcall error-report) (funcall use-cache)))) 242 | 243 | (defun expand-transforms (transforms) 244 | (let ((production-used-p t) 245 | (identityp t) 246 | (constantp nil) 247 | (textp nil)) 248 | (labels 249 | ((make-transform-body (start end start-var end-var ignore body) 250 | (let* ((start-end-vars (list start-var end-var)) 251 | (other-ignore (set-difference ignore start-end-vars))) 252 | (values 253 | `(,@(when other-ignore `((declare (ignore ,@other-ignore)))) 254 | ,@body) 255 | `(,@(unless (member start-var ignore :test #'eq) 256 | `((,start-var ,start))) 257 | ,@(unless (member end-var ignore :test #'eq) 258 | `((,end-var ,end))))))) 259 | (process-option (options start end production) 260 | (destructuring-bind (&optional option &rest rest) options 261 | (unless option 262 | (return-from process-option production)) 263 | (destructuring-ecase option 264 | ((:constant value) 265 | (setf production-used-p nil identityp nil constantp t) 266 | (process-option rest start end value)) 267 | ((:identity value) 268 | (declare (ignore value)) 269 | (process-option rest start end production)) 270 | ((:text value) 271 | (setf textp (and value identityp) 272 | identityp nil) 273 | (process-option rest start end `(text ,production))) 274 | ((:function designator) ; TODO resolve-function? 275 | (setf identityp nil constantp nil) 276 | (process-option rest start end `(,designator ,production))) 277 | ((:lambda lambda-list start-var end-var ignore forms) 278 | (setf identityp nil constantp nil) 279 | (multiple-value-bind (body bindings) 280 | (make-transform-body 281 | start end start-var end-var ignore forms) 282 | (process-option 283 | rest start end 284 | `((lambda (,@lambda-list &aux ,@bindings) 285 | ,@body) 286 | ,production)))) 287 | ((:destructure lambda-list start-var end-var ignore forms) 288 | (setf identityp nil constantp nil) 289 | (multiple-value-bind (body bindings) 290 | (make-transform-body 291 | start end start-var end-var ignore forms) 292 | (process-option 293 | rest start end 294 | `(destructuring-bind (,@lambda-list &aux ,@bindings) 295 | ,production 296 | ,@body)))))))) 297 | (with-gensyms (production start end) 298 | (let ((form (process-option (reverse transforms) start end production))) 299 | (values 300 | `(lambda (,production ,start ,end) 301 | (declare ,@(unless production-used-p `((ignore ,production))) 302 | (ignorable ,start ,end)) 303 | ,form) 304 | identityp 305 | constantp 306 | textp)))))) 307 | -------------------------------------------------------------------------------- /src/expressions.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2019 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap) 21 | 22 | ;;; 23 | 24 | (eval-when (:compile-toplevel :load-toplevel :execute) 25 | (defvar *expression-kinds* 26 | `((character . (eql character)) 27 | (character-ranges . (cons (eql character-ranges))) 28 | (string . (cons (eql string) (cons array-length null))) 29 | (and . (cons (eql and))) 30 | (or . (cons (eql or))) 31 | ,@(mapcar (lambda (symbol) 32 | `(,symbol . (cons (eql ,symbol) (cons t null)))) 33 | '(not * + ? & !)) 34 | ,@(mapcar (lambda (symbol) 35 | `(,symbol . (cons (eql ,symbol) 36 | (cons (and positive-integer input-position) 37 | (cons t null))))) 38 | '(< >)) 39 | (terminal . terminal) 40 | (nonterminal . nonterminal) 41 | (predicate . predicate) 42 | (function . (cons (eql function) (cons symbol null))) 43 | (t . t)) 44 | "Names and corresponding types of acceptable expression 45 | constructors.")) 46 | 47 | (eval-when (:compile-toplevel :load-toplevel :execute) 48 | (defmacro expression-case (expression &body clauses) 49 | "Similar to 50 | 51 | (cl:typecase EXPRESSION CLAUSES) 52 | 53 | but clause heads designate kinds of expressions instead of types. See 54 | *EXPRESSION-KINDS*." 55 | (let ((available (copy-list *expression-kinds*))) 56 | (labels ((type-for-expression-kind (kind) 57 | (if-let ((cell (assoc kind available))) 58 | (progn 59 | (removef available cell) 60 | (cdr cell)) 61 | (error "Invalid or duplicate clause: ~S" kind))) 62 | (process-clause (clause) 63 | (destructuring-bind (kind &body body) clause 64 | (etypecase kind 65 | (cons 66 | `((or ,@(mapcar #'type-for-expression-kind kind)) 67 | ,@body)) 68 | (symbol 69 | `(,(type-for-expression-kind kind) 70 | ,@body)))))) 71 | (let ((clauses (mapcar #'process-clause clauses))) 72 | ;; We did not provide clauses for all expression 73 | ;; constructors and did not specify a catch-all clause => 74 | ;; error. 75 | (when (and (assoc t available) (> (length available) 1)) 76 | (error "Unhandled expression kinds: ~{~S~^, ~}" 77 | (remove t (mapcar #'car available)))) 78 | ;; If we did not specify a catch-all clause, insert one 79 | ;; which signals INVALID-EXPRESSION-ERROR. 80 | (once-only (expression) 81 | `(typecase ,expression 82 | ,@clauses 83 | ,@(when (assoc t available) 84 | `((t (invalid-expression-error ,expression))))))))))) 85 | 86 | (defmacro with-expression ((expr spec) &body body) 87 | (destructuring-bind (type &optional (first-var (gensym))) 88 | (etypecase (first spec) 89 | ((cons symbol (cons symbol null)) 90 | (first spec)) 91 | (symbol 92 | (list (first spec)))) 93 | (let ((lambda-list (list* first-var (rest spec)))) 94 | (once-only (expr) 95 | `(destructuring-bind ,lambda-list ,expr 96 | ,@(unless (eq t type) 97 | `((unless (eq ',type ,first-var) 98 | (error "~S-expression expected, got: ~S" ',type ,expr)))) 99 | (locally ,@body)))))) 100 | 101 | ;;; 102 | 103 | (defun check-function-reference (name expression) 104 | (cond 105 | ((not (fboundp name)) 106 | (warn 'simple-style-warning 107 | :format-control "~@" 109 | :format-arguments (list name expression)) 110 | nil) 111 | ((or (macro-function name) 112 | (special-operator-p name)) 113 | (warn 'simple-style-warning 114 | :format-control "~@<~S in expression ~S is not a ~ 115 | function (but a macro or special ~ 116 | operator).~@:>" 117 | :format-arguments (list name expression)) 118 | nil) 119 | (t 120 | t))) 121 | 122 | (defun check-expression (expression) 123 | (labels 124 | ((rec (expression) 125 | (expression-case expression 126 | ((character string function terminal nonterminal)) 127 | (character-ranges 128 | (unless (every (of-type 'character-range) (rest expression)) 129 | (invalid-expression-error expression))) 130 | ((and or) 131 | (mapc #'rec (rest expression))) 132 | ((not * + ? & ! predicate) 133 | (rec (second expression))) 134 | ((< >) 135 | (rec (third expression)))))) 136 | (rec expression))) 137 | 138 | (defun %expression-dependencies (expression) 139 | (labels ((rec (expression result) 140 | (expression-case expression 141 | ((character string character-ranges function terminal) 142 | result) 143 | (nonterminal 144 | (if (member expression result :test #'eq) 145 | result 146 | (let ((rule (find-rule expression)) 147 | (result (list* expression result))) 148 | (if rule 149 | (rec (rule-expression rule) result) 150 | result)))) 151 | ((and or) 152 | (reduce #'rec (rest expression) 153 | :initial-value result :from-end t)) 154 | ((not * + ? & ! predicate) 155 | (rec (second expression) result)) 156 | ((< >) 157 | (rec (third expression) result))))) 158 | (rec expression '()))) 159 | 160 | (defun %expression-direct-dependencies (expression) 161 | (labels ((rec (expression result) 162 | (expression-case expression 163 | ((character string character-ranges function terminal) 164 | result) 165 | (nonterminal 166 | (adjoin expression result :test #'eq)) 167 | ((and or) 168 | (reduce #'rec (rest expression) 169 | :initial-value result :from-end t)) 170 | ((not * + ? & ! predicate) 171 | (rec (second expression) result)) 172 | ((< >) 173 | (rec (third expression) result))))) 174 | (rec expression '()))) 175 | 176 | (defun expression-start-terminals 177 | (expression 178 | &key (when-rule-error-report nil when-rule-error-report-p)) 179 | "Return a list of terminals or tree of expressions with which a text 180 | parsable by EXPRESSION can start. 181 | 182 | A tree instead of a list is returned when EXPRESSION contains 183 | semantic predicates, NOT or !. Elements in the returned list or 184 | tree are 185 | 186 | * case (in)sensitive characters, character ranges, 187 | case (in)sensitive strings, function terminals 188 | * semantic predicates represented as 189 | 190 | (PREDICATE-NAME NESTED-ELEMENTS) 191 | 192 | where NESTED-ELEMENTS is the list of start terminals of the 193 | expression to which PREDICATE-NAME is applied. 194 | * NOT and ! expressions are represented as 195 | 196 | ({not,!} NESTED-ELEMENTS) 197 | 198 | where NESTED-ELEMENTS is the list of start terminals of the 199 | negated expression. 200 | 201 | * < and > expressions are represented as 202 | 203 | ({<,>} OFFSET NESTED-ELEMENTS) 204 | 205 | where OFFSET is a positive integer and NESTED-ELEMENTS is the 206 | list of start terminals of the expression that should match 207 | OFFSET characters backward/forward from the current position. 208 | 209 | The (outermost) list is sorted likes this: 210 | 211 | 1. string terminals 212 | 2. character terminals 213 | 3. the CHARACTER wildcard terminal 214 | 4. semantic predicates 215 | 5. everything else 216 | 217 | If supplied, WHEN-RULE-ERROR-REPORT restricts processing of 218 | nonterminals to rules whose :ERROR-REPORT option is compatible with 219 | the value of WHEN-RULE-ERROR-REPORT." 220 | (labels ((rec (expression seen) 221 | (expression-case expression 222 | ((character string character-ranges function terminal) 223 | (list expression)) 224 | (predicate 225 | (when-let ((result (rec/sorted (second expression) seen))) 226 | (list (list (first expression) result)))) 227 | (nonterminal 228 | (unless (member expression seen :test #'equal) 229 | (when-let ((rule (find-rule expression))) 230 | (when (or (not when-rule-error-report-p) 231 | (error-report-behavior-suitable-for-report-part-p 232 | (rule-error-report rule) when-rule-error-report)) 233 | (rec (rule-expression rule) (list* expression seen)))))) 234 | ((not !) 235 | (when-let ((result (rec/sorted (second expression) seen))) 236 | (list (list (first expression) result)))) 237 | ((+ &) 238 | (rec (second expression) seen)) 239 | ((? *) 240 | (values (rec (second expression) seen) t)) 241 | ((< >) 242 | (with-expression (expression ((t direction) amount subexpr)) 243 | (list (list direction amount (rec subexpr seen))))) 244 | (and 245 | (let ((result '())) 246 | (dolist (sub-expression (rest expression) result) 247 | (multiple-value-bind (sub-start-terminals optionalp) 248 | (rec sub-expression seen) 249 | (when sub-start-terminals 250 | (appendf result sub-start-terminals) 251 | (unless optionalp 252 | (return result))))))) 253 | (or 254 | (mapcan (rcurry #'rec seen) (rest expression))))) 255 | (rec/without-duplicates (expression seen) 256 | (remove-duplicates (rec expression seen) :test #'equal)) 257 | (rec/sorted (expression seen) 258 | (stable-sort (rec/without-duplicates expression seen) 259 | #'expression<))) 260 | (rec/sorted expression '()))) 261 | 262 | (defun expression< (left right) 263 | (or (and (typep left 'string) 264 | (typep right '(not string))) 265 | (and (typep left 'string) 266 | (string-lessp left right)) 267 | (and (typep left 'character) 268 | (typep right '(not (or string character)))) 269 | (and (typep left 'character) 270 | (typep right 'character) 271 | (char-lessp left right)) 272 | (and (typep left '(eql character)) 273 | (typep left '(not (eql character)))) 274 | (and (typep left '(cons predicate-name)) 275 | (typep right '(not (or string character (eql character) 276 | (cons predicate-name))))) 277 | (typep right '(not (or string character (eql character) 278 | (cons predicate-name)))))) 279 | 280 | (defun expression-equal-p (left right) 281 | (labels ((rec (left right) 282 | (cond 283 | ((and (typep left '(or string character)) 284 | (typep right '(or string character))) 285 | (string= left right)) 286 | ((and (consp left) (consp right)) 287 | (and (rec (car left) (car right)) 288 | (rec (cdr left) (cdr right)))) 289 | (t 290 | (equalp left right))))) 291 | (declare (dynamic-extent #'rec)) 292 | (rec left right))) 293 | 294 | (defun expression-simple-p (expression &key 295 | (depth-limit 3) 296 | (string-length-limit 4) 297 | (character-ranges-size-limit 10)) 298 | (labels ((rec (expression depth) 299 | (when (< depth depth-limit) 300 | (expression-case expression 301 | (character 302 | t) 303 | (character-ranges 304 | (< (length (rest expression)) character-ranges-size-limit)) 305 | (string 306 | (< (second expression) string-length-limit)) 307 | ((and or) 308 | (every (rcurry #'rec (1+ depth)) (rest expression))) 309 | ((not ? & !) 310 | (rec (second expression) (1+ depth))) 311 | ((< >) 312 | (rec (third expression) (1+ depth))) 313 | (terminal 314 | (etypecase expression 315 | (character t) 316 | (string (< (length expression) string-length-limit)) 317 | ((cons (eql ~)) (rec (second expression) depth)))) 318 | (t 319 | nil))))) 320 | (rec expression 0))) 321 | 322 | (defun describe-terminal (terminal &optional (stream *standard-output*)) 323 | "Print a description of TERMINAL onto STREAM. 324 | 325 | In additional to actual terminals, TERMINAL can be of the forms 326 | 327 | (PREDICATE-NAME TERMINALS) 328 | ({not,!} TERMINALS) 329 | ({<,>} OFFSET TERMINALS) 330 | 331 | (i.e. as produced by EXPRESSION-START-TERMINALS)." 332 | (labels 333 | ((output (format-control &rest format-arguments) 334 | (apply #'format stream format-control format-arguments)) 335 | (rec/sub-expression (sub-expression prefix separator) 336 | (output prefix (length sub-expression)) 337 | (rec (first sub-expression)) 338 | (loop :for terminal :in (rest sub-expression) 339 | :do (output separator) (rec terminal))) 340 | (rec (terminal) 341 | (expression-case terminal 342 | (character 343 | (output "any character")) 344 | (string 345 | (output "a string of length ~D" (second terminal))) 346 | (character-ranges 347 | (output "a character in ~{[~{~C~^-~C~}]~^ or ~}" 348 | (mapcar #'ensure-list (rest terminal)))) 349 | (function 350 | (output "a string that can be parsed by the function ~S" 351 | (second terminal))) 352 | (terminal 353 | (labels ((rec (thing) 354 | (etypecase thing 355 | (character 356 | ;; For non-graphic or whitespace characters, 357 | ;; just print the name. 358 | (let ((both-p (and (graphic-char-p thing) 359 | (not (member thing 360 | '(#\Space #\Tab #\Newline))))) 361 | (name (char-name thing))) 362 | (output "the character ~:[~*~A~;~A~@[ (~A)~]~]" 363 | both-p thing name))) 364 | (string 365 | (if (length= 1 thing) 366 | (rec (char thing 0)) 367 | (output "the string ~S" thing))) 368 | ((cons (eql ~)) 369 | (rec (second thing)) 370 | (output ", disregarding case"))))) 371 | (rec terminal))) 372 | ((not !) 373 | (let ((sub-expression (second terminal))) 374 | (typecase sub-expression 375 | ((cons (eql character) null) 376 | (output "")) 377 | (t 378 | (output "anything but") 379 | (pprint-logical-block (stream sub-expression) 380 | (rec/sub-expression 381 | sub-expression "~[~; ~:; ~5:T~]" "~@:_ and ")))))) 382 | ((< >) 383 | (with-expression (terminal ((t direction) amount sub-expression)) 384 | (pprint-logical-block (stream sub-expression) 385 | (rec/sub-expression sub-expression "~[~;~:; ~4:T~]" "~@:_ or ") 386 | (output "~[~; ~:;~@:_~]~ 387 | ~D character~:P ~[before~;after~] the ~ 388 | current position" 389 | (length sub-expression) 390 | amount (case direction (< 0) (> 1)))))) 391 | (predicate 392 | (let ((sub-expression (second terminal))) 393 | (pprint-logical-block (stream sub-expression) 394 | (rec/sub-expression 395 | sub-expression "~[~;~:; ~4:T~]" "~@:_ or ") 396 | (output "~[~; ~:;~@:_~]satisfying ~A" 397 | (length sub-expression) (first terminal))))) 398 | (t 399 | (error "~@" terminal))))) 400 | (rec terminal))) 401 | 402 | ;; For use as ~/esrap:print-terminal/ in format control. 403 | (defun print-terminal (stream terminal &optional colonp atp) 404 | (declare (ignore colonp atp)) 405 | (describe-terminal terminal stream)) 406 | -------------------------------------------------------------------------------- /src/interface.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2019 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | (cl:in-package #:esrap) 21 | 22 | (defun parse (expression text &key (start 0) end junk-allowed raw) 23 | "Parses TEXT using EXPRESSION from START to END. 24 | 25 | Incomplete parses, that is not consuming the entirety of TEXT, are 26 | allowed only if JUNK-ALLOWED is true. 27 | 28 | Returns three values: 29 | 30 | 1) A production, if the parse succeeded, NIL otherwise. 31 | 2) The position up to which TEXT has been consumed or NIL if the 32 | entirety of TEXT has been consumed. 33 | 3) If the parse succeeded, even if it did not consume any input, T is 34 | returned as a third value. 35 | 36 | The third return value is necessary to distinguish successful and 37 | failed parses for cases like 38 | 39 | (parse '(! #\\a) \"a\" :junk-allowed t) 40 | (parse '(! #\\a) \"b\" :junk-allowed t) 41 | 42 | in which the first two return values cannot indicate failures. 43 | 44 | RAW controls whether the parse result is interpreted and translated 45 | into the return values described above. If RAW is true, a parse result 46 | of type RESULT or ERROR-RESULT is returned as a single value. 47 | 48 | Note that the combination of arguments :junk-allowed t :raw t does not 49 | make sense since the JUNK-ALLOWED parameter is used when parse results 50 | are interpreted and translated into return values which does not 51 | happen when :raw t." 52 | ;; There is no backtracking in the toplevel expression -- so there's 53 | ;; no point in compiling it as it will be executed only once -- unless 54 | ;; it's a constant, for which we have a compiler-macro. 55 | (when (and junk-allowed raw) 56 | (error "~@" 58 | (list :junk-allowed junk-allowed :raw raw))) 59 | (let* ((end (or end (length text))) 60 | (*context* (make-context end)) 61 | (result (eval-expression expression text start end))) 62 | (declare (dynamic-extent *context*)) 63 | (if raw 64 | result 65 | (process-parse-result result text start end junk-allowed)))) 66 | 67 | (define-compiler-macro parse (&whole form expression text 68 | &rest arguments &key &allow-other-keys) 69 | (flet ((make-expansion (result-var rawp junk-allowed-p body) 70 | ;; This inline-lambda provides keyword defaults and 71 | ;; parsing, so the compiler-macro doesn't have to worry 72 | ;; about evaluation order. 73 | (with-gensyms (expr-fun) 74 | `(let ((,expr-fun (load-time-value (compile-expression ,expression)))) 75 | ((lambda (text &key (start 0) end 76 | ,@(if rawp '(raw)) 77 | ,@(if junk-allowed-p '(junk-allowed))) 78 | (let* ((end (or end (length text))) 79 | (*context* (make-context end)) 80 | (,result-var (funcall ,expr-fun text start end))) 81 | (declare (dynamic-extent *context*)) 82 | ,body)) 83 | ,text ,@(remove-from-plist arguments :raw)))))) 84 | (cond 85 | ((not (constantp expression)) ; cannot use ENV due to LOAD-TIME-VALUE 86 | form) 87 | ((let ((raw (getf arguments :raw 'missing))) 88 | (when (and (not (eq raw 'missing)) 89 | (constantp raw)) ; cannot used ENV due to following EVAL 90 | (let ((rawp (eval raw))) 91 | (make-expansion 'result nil (not rawp) 92 | (if rawp 93 | 'result 94 | '(process-parse-result 95 | result text start end junk-allowed))))))) 96 | (t 97 | (make-expansion 'result t t 98 | '(if raw 99 | result 100 | (process-parse-result 101 | result text start end junk-allowed))))))) 102 | 103 | (defun process-parse-result (result text start end junk-allowed) 104 | (cond 105 | ;; Successfully parsed something. 106 | ((successful-parse-p result) 107 | (with-accessors ((position result-position) 108 | (production successful-parse-production)) 109 | result 110 | (cond 111 | ((= position end) ; Consumed all input. 112 | (values production nil t)) 113 | (junk-allowed ; Did not consume all input; junk 114 | (values production position t)) ; is OK. 115 | (t ; Junk is not OK. 116 | (esrap-parse-error text result))))) 117 | ;; Did not parse anything, but junk is allowed. 118 | (junk-allowed 119 | (values nil start)) 120 | ;; Did not parse anything and junk is not allowed. 121 | (t 122 | (esrap-parse-error text result)))) 123 | 124 | (defmacro defrule (&whole form symbol expression &body options) 125 | "Define SYMBOL as a nonterminal, using EXPRESSION as associated the parsing expression. 126 | 127 | Multiple OPTIONS specifying transforms are composed in the order of 128 | appearance: 129 | 130 | (:text t) 131 | (:function parse-integer) 132 | => 133 | (alexandria:compose #'parse-integer #'text) 134 | 135 | Following OPTIONS can be specified: 136 | 137 | * (:WHEN TEST) 138 | 139 | The rule is active only when TEST evaluates to true. This can be used 140 | to specify optional extensions to a grammar. 141 | 142 | This option can only be supplied once. 143 | 144 | * (:CONSTANT CONSTANT) 145 | 146 | No matter what input is consumed or what EXPRESSION produces, the production 147 | of the rule is always CONSTANT. 148 | 149 | * (:FUNCTION FUNCTION) 150 | 151 | If provided the production of the expression is transformed using 152 | FUNCTION. FUNCTION can be a function name or a lambda-expression. 153 | 154 | * (:IDENTITY BOOLEAN) 155 | 156 | If true, the production of expression is used as-is, as if (:FUNCTION IDENTITY) 157 | has been specified. If no production option is specified, this is the default. 158 | 159 | * (:TEXT BOOLEAN) 160 | 161 | If true, the production of expression is flattened and concatenated into a string 162 | as if by (:FUNCTION TEXT) has been specified. 163 | 164 | * (:LAMBDA LAMBDA-LIST &BODY BODY) 165 | 166 | If provided, same as using the corresponding lambda-expression with :FUNCTION. 167 | 168 | As an extension of the standard lambda list syntax, LAMBDA-LIST accepts 169 | the optional pseudo lambda-list keyword ESRAP:&BOUNDS, which (1) must appear 170 | after all standard lambda list keywords. (2) can be followed by one or two 171 | variables to which bounding indexes of the matching substring are bound. 172 | 173 | Therefore: 174 | 175 | LAMBDA-LIST ::= (STANDARD-LAMBDA-LIST-ELEMENTS [&BOUNDS START [END]]) 176 | 177 | * (:DESTRUCTURE DESTRUCTURING-LAMBDA-LIST &BODY BODY) 178 | 179 | If provided, same as using a lambda-expression that destructures its argument 180 | using DESTRUCTURING-BIND and the provided lambda-list with :FUNCTION. 181 | 182 | DESTRUCTURING-LAMBDA-LIST can use ESRAP:&BOUNDS in the same way 183 | as described for :LAMBDA. 184 | 185 | * (:AROUND ([&BOUNDS START [END]]) &BODY BODY) 186 | 187 | If provided, execute BODY around the construction of the production of the 188 | rule. BODY has to call ESRAP:CALL-TRANSFORM to trigger the computation of 189 | the production. Any transformation provided via :LAMBDA, :FUNCTION 190 | or :DESTRUCTURE is executed inside the call to ESRAP:CALL-TRANSFORM. As a 191 | result, modification to the dynamic state are visible within the 192 | transform. 193 | 194 | ESRAP:&BOUNDS can be used in the same way as described for :LAMBDA 195 | and :DESTRUCTURE. 196 | 197 | This option can be used to safely track nesting depth, manage symbol 198 | tables or for other stack-like operations. 199 | 200 | * (:USE-CACHE ( T | NIL | :UNLESS-TRIVIAL )) 201 | 202 | Defaults to :UNLESS-TRIVIAL if not provided. Controls whether the 203 | rule should be compiled with caching. :UNLESS-TRIVIAL 204 | automatically disables caching if 1) it doesn't change the 205 | behavior of the rule (see below) 2) the expression of the rule is 206 | simple enough to guarantee that disabling caching will improve 207 | performance. 208 | 209 | For rules with simple expressions, the overhead of cache lookup 210 | and update can by far exceed the cost of simply evaluating the 211 | rule expression. Disabling caching can improve performance in such 212 | cases. 213 | 214 | Note that disabling caching can change the behavior of the rule, 215 | for example when the rule transform returns a fresh object. 216 | 217 | * (:ERROR-REPORT ( T | NIL | :CONTEXT | :DETAIL )) 218 | 219 | Defaults to T if not provided. Controls whether and how the rule 220 | is used in parse error reports: 221 | 222 | * T 223 | 224 | The rule is used in parse error reports without 225 | restriction (i.e. when describing the context of a failure as 226 | well as listing failed rules and expected inputs). 227 | 228 | * NIL 229 | 230 | The rule is not used in parse error reports in any capacity. In 231 | particular, inputs expected by the rule are not mentioned. 232 | 233 | This value is useful for things like whitespace rules since 234 | something like \"expected space, tab or newline\", even if it 235 | would have allowed the parser to continue for one character, is 236 | rarely helpful. 237 | 238 | * :CONTEXT 239 | 240 | The rule is used in the \"context\" part of parse error 241 | reports. The rule is neither mentioned in the list of failed 242 | rules nor are inputs expected by it. 243 | 244 | * :DETAIL 245 | 246 | The rule is not used in the \"context\" part of parse error 247 | reports, but can appear in the list of failed rules. Inputs 248 | expected by the rule are mentioned as well. 249 | " 250 | (multiple-value-bind (transforms around when error-report use-cache) 251 | (parse-defrule-options options form) 252 | (multiple-value-bind 253 | (transform transform-identity-p transform-constant-p transform-text-p) 254 | (expand-transforms transforms) 255 | (let ((properties (make-rule-properties 256 | :uses-cache use-cache 257 | :uses-cache-unless-trivial (eq use-cache :unless-trivial) 258 | :transform-identity transform-identity-p 259 | :transform-constant transform-constant-p 260 | :transform-text transform-text-p))) 261 | `(eval-when (:load-toplevel :execute) 262 | (add-rule ',symbol (make-instance 'rule 263 | :expression ',expression 264 | :guard-expression ',(cdr when) 265 | :condition ,(car when) 266 | :transform ,transform 267 | :around ,around 268 | :error-report ,error-report 269 | :properties ,properties))))))) 270 | 271 | (defun add-rule (symbol rule) 272 | "Associates RULE with the nonterminal SYMBOL. Signals an error if the 273 | rule is already associated with a nonterminal. If the symbol is already 274 | associated with a rule, the old rule is removed first." 275 | ;; FIXME: This needs locking and WITHOUT-INTERRUPTS. 276 | (check-type symbol nonterminal) 277 | (when (rule-symbol rule) 278 | (error "~S is already associated with the nonterminal ~S -- remove it first." 279 | rule (rule-symbol rule))) 280 | (let* ((cell (ensure-rule-cell symbol)) 281 | (function (compile-rule symbol 282 | (rule-expression rule) 283 | (rule-condition rule) 284 | (rule-transform rule) 285 | (rule-around rule) 286 | (rule-properties rule))) 287 | (trace-info (cell-trace-info cell))) 288 | (set-cell-info cell function rule) 289 | (setf (cell-trace-info cell) nil 290 | (slot-value rule '%symbol) symbol) 291 | (when trace-info 292 | (destructuring-bind (break condition) (rest trace-info) 293 | (trace-rule symbol :break break :condition condition))) 294 | symbol)) 295 | 296 | (defun find-rule (symbol) 297 | "Returns rule designated by SYMBOL, if any. Symbol must be a nonterminal 298 | symbol." 299 | (check-type symbol nonterminal) 300 | (when-let ((cell (find-rule-cell symbol))) 301 | (cell-rule cell))) 302 | 303 | (defun remove-rule (symbol &key force) 304 | "Makes the nonterminal SYMBOL undefined. If the nonterminal is defined an 305 | already referred to by other rules, an error is signalled unless :FORCE is 306 | true." 307 | (check-type symbol nonterminal) 308 | ;; FIXME: Lock and WITHOUT-INTERRUPTS. 309 | (let* ((cell (find-rule-cell symbol)) 310 | (rule (cell-rule cell)) 311 | (trace-info (cell-trace-info cell))) 312 | (when cell 313 | (flet ((frob () 314 | (set-cell-info cell (undefined-rule-function symbol) nil) ; TODO update trace-info as part of this function? 315 | (when trace-info 316 | (setf (cell-trace-info cell) (list* (cell-%info cell) (rest trace-info)))) 317 | (when rule 318 | (detach-rule rule)))) 319 | (cond ((and rule (cell-referents cell)) 320 | (unless force 321 | (error "Nonterminal ~S is used by other nonterminal~P:~% ~{~S~^, ~}" 322 | symbol (length (cell-referents cell)) (cell-referents cell))) 323 | (frob)) 324 | ((not (cell-referents cell)) 325 | (frob) 326 | ;; There are no references to the rule at all, so 327 | ;; we can remove the cell. 328 | (unless trace-info 329 | (delete-rule-cell symbol))))) 330 | rule))) 331 | 332 | (defvar *trace-level* 0) 333 | 334 | (defun trace-rule (symbol &key recursive break condition) 335 | "Turn on tracing of nonterminal SYMBOL. 336 | 337 | If RECURSIVE is true, turn on tracing for the whole grammar rooted at 338 | SYMBOL. If RECURSIVE is a positive integer, turn on tracing for all 339 | rules reachable from the nonterminal SYMBOL in that number of steps. 340 | 341 | If BREAK is true, break is entered when the rule is invoked. 342 | 343 | If supplied, CONDITION has to be a function whose lambda-list is 344 | compatible to (symbol text position end). This function is called to 345 | determine whether trace actions should be executed for the traced 346 | rule. 347 | 348 | SYMBOL is the name of the rule being executed. 349 | 350 | TEXT is the whole text being parsed. 351 | 352 | POSITION is the position within TEXT at which the rule is executed. 353 | 354 | END is the end position of the portion of TEXT being parsed." 355 | (let ((seen (make-hash-table :test #'eq))) 356 | (labels ((traced (symbol break fun text position end) 357 | (when break 358 | (break "rule ~S" symbol)) 359 | (format *trace-output* "~&~V@T~D: ~S ~S[~A]?~%" 360 | *trace-level* (1+ *trace-level*) symbol position 361 | (substitute #\¶ #\Newline 362 | (subseq text 363 | (max 0 (- position 2)) 364 | (min (length text) (+ position 3))))) 365 | (finish-output *trace-output*) 366 | (let* ((*trace-level* (1+ *trace-level*)) 367 | (result (funcall fun text position end))) 368 | (format *trace-output* "~&~V@T~D: ~S " 369 | (1- *trace-level*) *trace-level* symbol) 370 | (if (error-result-p result) 371 | (format *trace-output* "-|~%") 372 | (format *trace-output* "~S-~S -> ~S~%" 373 | position (result-position result) 374 | (successful-parse-production result))) 375 | (finish-output *trace-output*) 376 | result)) 377 | (traced/condition (condition symbol break fun text position end) 378 | (if (funcall condition symbol text position end) 379 | (traced symbol break fun text position end) 380 | (funcall fun text position end))) 381 | (trace-one (symbol cell depth) 382 | ;; Avoid infinite recursion and processing sub-trees 383 | ;; multiple times. 384 | (if (gethash cell seen) 385 | (return-from trace-one) 386 | (setf (gethash cell seen) t)) 387 | ;; If there is old trace information, removed it first. 388 | (when (cell-trace-info cell) 389 | (untrace-rule symbol)) 390 | ;; Wrap the cell function in a tracing function. Store 391 | ;; old info in trace-info slot of CELL. 392 | (let ((fun (cell-function cell)) 393 | (rule (cell-rule cell)) 394 | (info (cell-%info cell))) 395 | (set-cell-info 396 | cell (if condition 397 | (curry #'traced/condition condition symbol break fun) 398 | (curry #'traced symbol break fun)) 399 | rule) 400 | (setf (cell-trace-info cell) (list info break condition)) 401 | ;; If requested, trace dependencies 402 | ;; recursively. Checking RULE avoids recursing into 403 | ;; referenced but undefined rules. 404 | (when (and rule 405 | (if (integerp depth) (plusp depth) depth)) 406 | (dolist (dep (%rule-direct-dependencies rule)) 407 | (trace-one dep (find-rule-cell dep) 408 | (if (integerp depth) (1- depth) depth))))) 409 | t)) 410 | (trace-one symbol 411 | (or (find-rule-cell symbol) 412 | (undefined-rule symbol)) 413 | recursive)))) 414 | 415 | (defun untrace-rule (symbol &key recursive break condition) 416 | "Turn off tracing of nonterminal SYMBOL. 417 | 418 | If RECURSIVE is true, turn off tracing for the whole grammar rooted at 419 | SYMBOL. If RECURSIVE is a positive integer, turn off tracing for all 420 | rules reachable from the nonterminal SYMBOL in that number of steps. 421 | 422 | BREAK and CONDITION are ignored, and are provided only for symmetry 423 | with TRACE-RULE." 424 | (declare (ignore break condition)) 425 | (let ((seen (make-hash-table :test #'eq))) 426 | (labels ((untrace-one (cell depth) 427 | ;; Avoid infinite recursion and processing sub-trees 428 | ;; multiple times. 429 | (if (gethash cell seen) 430 | (return-from untrace-one) 431 | (setf (gethash cell seen) t)) 432 | ;; Restore info from trace-info slot of CELL. 433 | (let ((rule (cell-rule cell)) 434 | (trace-info (cell-trace-info cell))) 435 | (when trace-info 436 | (setf (cell-%info cell) (first trace-info) 437 | (cell-trace-info cell) nil)) 438 | ;; If requested, trace dependencies 439 | ;; recursively. Checking RULE avoids recursing into 440 | ;; referenced but undefined rules. 441 | (when (and rule 442 | (if (integerp depth) (plusp depth) depth)) 443 | (dolist (dep (%rule-direct-dependencies rule)) 444 | (untrace-one (find-rule-cell dep) 445 | (if (integerp depth) (1- depth) depth))))) 446 | nil)) 447 | (untrace-one (or (find-rule-cell symbol) 448 | (undefined-rule symbol)) 449 | recursive)))) 450 | 451 | (defun untrace-all-rules () 452 | "Turn off tracing of all nonterminals." 453 | (maphash-keys #'untrace-rule *rules*)) 454 | 455 | (defun rule-expression (rule) 456 | "Return the parsing expression associated with the RULE." 457 | (slot-value rule '%expression)) 458 | 459 | (defun (setf rule-expression) (expression rule) 460 | "Modify RULE to use EXPRESSION as the parsing expression. The rule must be 461 | detached beforehand." 462 | (let ((name (rule-symbol rule))) 463 | (when name 464 | (error "~@" 466 | name)) 467 | (setf (slot-value rule '%expression) expression))) 468 | 469 | (defun change-rule (symbol expression) 470 | "Modifies the nonterminal SYMBOL to use EXPRESSION instead. Temporarily 471 | removes the rule while it is being modified." 472 | (let ((rule (remove-rule symbol :force t))) 473 | (unless rule 474 | (undefined-rule symbol)) 475 | (setf (rule-expression rule) expression) 476 | (add-rule symbol rule))) 477 | 478 | (defun describe-grammar (symbol &optional (stream *standard-output*)) 479 | "Prints the grammar tree rooted at nonterminal SYMBOL to STREAM for human 480 | inspection." 481 | (check-type symbol nonterminal) 482 | (flet ((max-symbol-length (symbols) 483 | (reduce #'max symbols 484 | :key (compose #'length #'prin1-to-string) 485 | :initial-value 0)) 486 | (output-rule (length rule) 487 | (format stream "~3T~S~VT<- ~S~@[ : ~S~]~%" 488 | (rule-symbol rule) 489 | length 490 | (rule-expression rule) 491 | (when (rule-condition rule) 492 | (rule-guard-expression rule))))) 493 | (if-let ((rule (find-rule symbol))) 494 | (progn 495 | (format stream "~&Grammar ~S:~%" symbol) 496 | (multiple-value-bind (defined undefined) (rule-dependencies rule) 497 | (let ((length 498 | (+ 4 (max (max-symbol-length defined) 499 | (max-symbol-length undefined))))) 500 | (output-rule length rule) 501 | (mapc (compose (curry #'output-rule length) #'find-rule) defined) 502 | (when undefined 503 | (format stream "~%Undefined nonterminal~P:~%~{~3T~S~%~}" 504 | (length undefined) undefined))))) 505 | (format stream "Symbol ~S is not a defined nonterminal." symbol)))) 506 | -------------------------------------------------------------------------------- /src/results.lisp: -------------------------------------------------------------------------------- 1 | ;;;; Copyright (c) 2007-2013 Nikodemus Siivola 2 | ;;;; Copyright (c) 2012-2018 Jan Moringen 3 | ;;;; 4 | ;;;; Permission is hereby granted, free of charge, to any person 5 | ;;;; obtaining a copy of this software and associated documentation files 6 | ;;;; (the "Software"), to deal in the Software without restriction, 7 | ;;;; including without limitation the rights to use, copy, modify, merge, 8 | ;;;; publish, distribute, sublicense, and/or sell copies of the Software, 9 | ;;;; and to permit persons to whom the Software is furnished to do so, 10 | ;;;; subject to the following conditions: 11 | ;;;; 12 | ;;;; THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 13 | ;;;; EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 14 | ;;;; MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 15 | ;;;; IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 16 | ;;;; CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 17 | ;;;; TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 18 | ;;;; SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | 20 | ;;;; We always return a RESULT -- ERROR-RESULT for failed parses, and 21 | ;;;; SUCCESSFUL-PARSE for successes. 22 | ;;;; 23 | ;;;; We implement a simple lazy evaluation for the productions. This 24 | ;;;; is used to perform semantic actions only when necessary -- either 25 | ;;;; when we call a semantic predicate or once parse has finished. 26 | 27 | (cl:in-package #:esrap) 28 | 29 | (defstruct (result (:constructor nil) (:copier nil)) 30 | ;; Expression that succeeded/failed to match. 31 | (expression nil :read-only t) 32 | ;; Position at which match was attempted. 33 | ;; Either 34 | ;; * the position at which the parse failed 35 | ;; * or function returning that position when called with the 36 | ;; FAILED-PARSE instance and optionally a minimum position as its 37 | ;; arguments. 38 | (%position #'max-of-result-positions :type (or function input-position)) 39 | ;; One of the following things: 40 | ;; * nested error, closer to actual failure site 41 | ;; * a (possibly empty) list thereof 42 | ;; * a string describing the failure 43 | ;; * a condition instance describing the failure 44 | (detail nil :type (or structure-object #|result|# list string condition) :read-only t)) 45 | 46 | ;; The following function is only called from slow paths. 47 | (declaim (ftype (function (result) (values input-position &optional)) 48 | result-position)) 49 | (defun result-position (result) 50 | (let ((position (result-%position result))) 51 | (if (functionp position) 52 | (setf (result-%position result) 53 | (funcall position (ensure-list (result-detail result)))) 54 | position))) 55 | 56 | (defmethod print-object ((object result) stream) 57 | (print-unreadable-object (object stream :type t) 58 | (let ((*print-level* (min 2 (or *print-level* 2))) 59 | (*print-length* (min 3 (or *print-length* 3)))) 60 | (format stream "~S~@[ @~D~]" 61 | (result-expression object) (result-position object))))) 62 | 63 | (defstruct (error-result (:include result) (:constructor nil) (:copier nil))) 64 | 65 | (defstruct (inactive-rule (:include error-result) 66 | (:constructor make-inactive-rule 67 | (expression %position)) 68 | (:copier nil))) 69 | 70 | (declaim (ftype (function (inactive-rule) (values nonterminal &optional)) 71 | inactive-rule-rule)) 72 | (defun inactive-rule-rule (result) 73 | (result-expression result)) 74 | 75 | (defstruct (failed-parse 76 | (:include error-result) 77 | (:constructor make-failed-parse (expression %position detail)) 78 | (:constructor make-failed-parse/no-position (expression detail)) 79 | (:copier nil))) 80 | 81 | ;; This is placed in the cache as a place in which information 82 | ;; regarding left recursion can be stored temporarily. 83 | (declaim (inline make-left-recursion-result 84 | left-recursion-result-p)) 85 | (defstruct (left-recursion-result 86 | (:include error-result) 87 | (:constructor make-left-recursion-result (expression)) 88 | (:copier nil)) 89 | (head nil :type (or null head))) 90 | 91 | (declaim (ftype (function (left-recursion-result) (values nonterminal &optional)) 92 | left-recursion-result-rule)) 93 | (defun left-recursion-result-rule (result) 94 | (result-expression result)) 95 | 96 | (defstruct (successful-parse 97 | (:include result) 98 | (:constructor %make-successful-parse 99 | (expression %position detail %production)) 100 | (:copier nil)) 101 | ;; Either a list of results, whose first element is the production, 102 | ;; or a function to call that will return the production. 103 | (%production nil :type (or list function))) 104 | 105 | (defun successful-parse-production (result) 106 | (let ((thunk (successful-parse-%production result))) 107 | (if (functionp thunk) 108 | (let ((value (funcall thunk (result-detail result)))) 109 | (setf (successful-parse-%production result) (list value)) 110 | value) 111 | (first thunk)))) 112 | 113 | ;; Result helper functions 114 | 115 | (defmacro make-successful-parse (expression position detail production) 116 | `(%make-successful-parse 117 | ,expression ,position ,detail 118 | ,(typecase production 119 | (symbol 120 | `(list ,production)) 121 | ((cons (eql function)) 122 | production) 123 | (t 124 | `(lambda (detail) 125 | (declare (ignore detail)) 126 | ,production))))) 127 | 128 | (defun result-nonterminal-p (result) 129 | (typep (result-expression result) 'nonterminal)) 130 | 131 | (defun result-unsatisfied-predicate-p (result) 132 | (and (failed-parse-p result) 133 | (typep (result-expression result) 'predicate) 134 | (successful-parse-p (result-detail result)))) 135 | 136 | (defun result-trivial-predicate-p (result) 137 | (and (typep (result-expression result) 'predicate) 138 | (expression-case (second (result-expression result)) 139 | ((character character-ranges string terminal) 140 | t) 141 | (t 142 | nil)))) 143 | 144 | (declaim (ftype (function (result rule-error-report-pattern) 145 | (values boolean &optional)) 146 | result-suitable-for-report-part-p)) 147 | (defun result-suitable-for-report-part-p (result part) 148 | (when (result-nonterminal-p result) 149 | (rule-suitable-for-report-part-p 150 | (result-expression result) part))) 151 | 152 | (declaim (ftype (function (list &optional input-position) 153 | (values input-position &optional)) 154 | max-of-result-positions)) 155 | (defun max-of-result-positions (results &optional (start 0)) 156 | (reduce #'max results :key #'result-position :initial-value start)) 157 | 158 | (declaim (ftype (function (list) (values list &optional)) 159 | list-of-result-productions 160 | list-of-result-productions/butlast)) 161 | 162 | (defun list-of-result-productions (results) 163 | (mapcar #'successful-parse-production results)) 164 | 165 | (defun list-of-result-productions/butlast (results) 166 | (loop :for rest :on results :while (rest rest) 167 | :collect (successful-parse-production (first rest)))) 168 | 169 | ;;; For technical reasons, INACTIVE-RULE instances cannot be directly 170 | ;;; created with the correct value in the POSITION slot. Fix this by 171 | ;;; copying the position from adjacent results, if possible. 172 | (defun maybe-augment-inactive-rules (results) 173 | (unless (some #'inactive-rule-p results) 174 | (return-from maybe-augment-inactive-rules results)) 175 | (loop :for previous = nil :then (if (result-p current) 176 | current 177 | previous) 178 | :for current :in results 179 | :collect (if (and (inactive-rule-p current) 180 | (result-p previous)) 181 | (make-inactive-rule (result-expression current) 182 | (result-position previous)) 183 | current))) 184 | 185 | (declaim (ftype (function (function result &key (:augment-inactive-rules t))) 186 | map-results) 187 | (ftype (function (function result 188 | &key (:when-error-report rule-error-report-pattern))) 189 | map-max-results map-max-leaf-results)) 190 | 191 | ;;; Apply FUNCTION to RESULT and potentially all its ancestor results 192 | ;;; (by providing a RECURSE function to FUNCTION) and return whatever 193 | ;;; FUNCTION returns. 194 | ;;; 195 | ;;; More concretely, the lambda-list of FUNCTION has to be compatible 196 | ;;; to 197 | ;;; 198 | ;;; (result recurse) 199 | ;;; 200 | ;;; where RESULT is the result object currently being visited and 201 | ;;; RECURSE is a function of no arguments that, when called, continues 202 | ;;; the traversal into children of RESULT and returns whatever 203 | ;;; FUNCTION returns for the sub-tree of ancestor results. 204 | (defun map-results (function result &key (augment-inactive-rules t)) 205 | (let ((function (ensure-function function)) 206 | (augment (if augment-inactive-rules 207 | #'maybe-augment-inactive-rules 208 | #'identity))) 209 | (labels ((do-result (result) 210 | (flet ((recurse () 211 | (let ((detail (result-detail result))) 212 | (typecase detail 213 | (cons 214 | (mapcar #'do-result (funcall augment detail))) 215 | (result 216 | (do-result detail)))))) 217 | (declare (dynamic-extent #'recurse)) 218 | (funcall function result #'recurse)))) 219 | (declare (dynamic-extent #'do-result)) 220 | (do-result result)))) 221 | 222 | ;;; Like MAP-RESULTS but only process results the position of which 223 | ;;; (computed as the recursive maximum over ancestors for inner result 224 | ;;; nodes) is maximal within the result tree RESULT. 225 | ;;; 226 | ;;; Furthermore, stop the traversal at results corresponding to !, NOT 227 | ;;; and PREDICATE expressions since failed parses among their 228 | ;;; respective ancestors are not causes of a failed (or successful) 229 | ;;; parse in the usual sense. 230 | ;;; 231 | ;;; Also restrict processing of nonterminals according to their 232 | ;;; :ERROR-REPORT option and WHEN-ERROR-REPORT. 233 | (defun map-max-results (function result 234 | &key (when-error-report nil when-error-report-p)) 235 | ;; Process result tree in two passes: 236 | ;; 237 | ;; 1. Use MAP-RESULTS to visit results, processing each with either 238 | ;; PROCESS-{LEAF or INNER}-RESULT, and collecting results into a 239 | ;; tree with nodes of the form 240 | ;; 241 | ;; (RECURSIVE-MAX-POSITION RESULT LIST-OF-CHILDREN) 242 | ;; 243 | ;; 2. Use local function MAP-MAX-RESULTS to traverse the tree 244 | ;; calling FUNCTION on the RESULT of each node. 245 | (let ((function (ensure-function function))) 246 | (labels ((process-leaf-result (result) 247 | (list (result-position result) result '())) 248 | (process-inner-result (result recurse) 249 | (declare (type function recurse)) 250 | (let ((children (remove nil (typecase (result-detail result) 251 | (result (list (funcall recurse))) 252 | (cons (funcall recurse)))))) 253 | (cond 254 | (children 255 | (let* ((max (reduce #'max children :key #'first)) 256 | (max-children (remove max children 257 | :test-not #'= :key #'first))) 258 | (list max result max-children))) 259 | ((not (successful-parse-p result)) 260 | (process-leaf-result result))))) 261 | (process-result (result recurse) 262 | ;; Treat results produced by inactive rules as if the 263 | ;; rule was not part of the grammar. 264 | (unless (inactive-rule-p result) 265 | (let ((expression (result-expression result))) 266 | (expression-case expression 267 | ;; Do not recurse into results for negation-ish 268 | ;; expressions. 269 | ((! not < >) 270 | (process-leaf-result result)) 271 | ;; If the associated rule is a nonterminal, maybe 272 | ;; suppress the result depending on the error-report 273 | ;; slot of the rule. 274 | (nonterminal 275 | (when (or (not when-error-report-p) 276 | (rule-suitable-for-report-part-p 277 | expression when-error-report)) 278 | (process-inner-result result recurse))) 279 | (t 280 | (process-inner-result result recurse)))))) 281 | (map-max-results (node) 282 | (destructuring-bind (position result children) node 283 | (declare (ignore position)) 284 | (flet ((recurse () 285 | (mapcar #'map-max-results children))) 286 | (declare (dynamic-extent #'recurse)) 287 | (funcall function result #'recurse))))) 288 | (declare (dynamic-extent #'process-leaf-result #'process-inner-result 289 | #'process-result #'map-max-results)) 290 | (if-let ((max-result-root (map-results #'process-result result))) 291 | (map-max-results max-result-root) 292 | (funcall function result (constantly '())))))) 293 | 294 | (defun map-max-leaf-results (function result 295 | &rest args &key when-error-report) 296 | (declare (ignore when-error-report)) 297 | (let ((function (ensure-function function))) 298 | (apply #'map-max-results 299 | (lambda (result recurse) 300 | (declare (type function recurse)) 301 | ;; In addition to actual leafs, treat unsatisfied 302 | ;; predicate results or trivial predicates as leafs (the 303 | ;; latter are one level above leafs anyway and allow for 304 | ;; better "expected" messages). 305 | (when (or (result-unsatisfied-predicate-p result) 306 | (result-trivial-predicate-p result) 307 | (not (funcall recurse))) 308 | (funcall function result))) 309 | result args))) 310 | 311 | (declaim (inline flattened-children)) 312 | (defun flattened-children (recurse) 313 | (let ((all-children (funcall (the function recurse)))) 314 | (remove-duplicates (reduce #'append all-children) :test #'eq))) 315 | 316 | ;;; Return a "context"-providing child result of RESULT, i.e. the most 317 | ;;; specific ancestor result of RESULT the path to which contains no 318 | ;;; forks: 319 | ;;; 320 | ;;; RESULT 321 | ;;; | 322 | ;;; `-child1 323 | ;;; | 324 | ;;; `-child2 325 | ;;; | 326 | ;;; `-nonterminal <- context 327 | ;;; | 328 | ;;; +-child4 329 | ;;; | | 330 | ;;; | ... 331 | ;;; `-child5 332 | ;;; | 333 | ;;; ... 334 | ;;; 335 | (defun result-context (result) 336 | (first 337 | (map-max-results 338 | (lambda (result recurse) 339 | (declare (type function recurse)) 340 | (let ((children (flattened-children recurse))) 341 | (cond 342 | ;; unsatisfied predicate result => collect into the result. 343 | ;; 344 | ;; This suppresses children of RESULT. The actual context 345 | ;; will normally be a nonterminal result above RESULT. 346 | ((result-unsatisfied-predicate-p result) 347 | (list result)) 348 | ;; nonterminal with a single child => return the child. 349 | ((and (length= 1 children) 350 | (or (result-suitable-for-report-part-p 351 | (first children) :context) 352 | (not (result-suitable-for-report-part-p 353 | result :context)))) 354 | children) 355 | ;; nonterminal with multiple children, i.e. common 356 | ;; derivation ends here => return RESULT. 357 | (t 358 | (list result))))) 359 | result :when-error-report '(:context :detail)))) 360 | 361 | ;;; Return an explicit description (i.e. a STRING or CONDITION) of the 362 | ;;; cause of the parse failure if such a thing can be found in the 363 | ;;; result tree rooted at RESULT. 364 | (defun result-root-cause (result) 365 | (first 366 | (map-max-results 367 | (lambda (result recurse) 368 | (cond 369 | ((typep result 'inactive-rule) 370 | (list (let ((*package* (load-time-value (find-package :keyword)))) 371 | (format nil "Rule ~S is not active" 372 | (result-expression result))))) 373 | ((typep (result-detail result) '(or string condition)) 374 | (list (result-detail result))) 375 | ((result-unsatisfied-predicate-p result) 376 | (list (format nil "The production~ 377 | ~2%~ 378 | ~2@T~<~S~:>~ 379 | ~2%~ 380 | does not satisfy the predicate ~S." 381 | (list (successful-parse-production 382 | (result-detail result))) 383 | (first (result-expression result))))) 384 | (t 385 | (flattened-children recurse)))) 386 | result))) 387 | 388 | ;;; Return a list of terminals that would have allowed the failed 389 | ;;; parsed represented by RESULT to succeed. 390 | (defun result-expected-input (result) 391 | (let ((expected '())) 392 | (map-max-leaf-results 393 | (lambda (leaf) 394 | (mapc (lambda (start-terminal) 395 | (pushnew start-terminal expected :test #'expression-equal-p)) 396 | (typecase leaf 397 | (failed-parse 398 | (expression-start-terminals 399 | (result-expression leaf) :when-rule-error-report :detail)) 400 | (successful-parse 401 | '((not (character))))))) 402 | result :when-error-report :detail) 403 | (sort expected #'expression<))) 404 | 405 | ;;; Return a list of children of RESULT that are the roots of disjoint 406 | ;;; result sub-trees. 407 | ;;; 408 | ;;; Precondition: RESULT is a nonterminal with multiple children 409 | ;;; (I.e. RESULT is typically the return value of RESULT-CONTEXT). 410 | (defun partition-results (result) 411 | (flet ((child-closure (result) 412 | (let ((results (list result))) 413 | (map-max-results (lambda (result recurse) 414 | (pushnew result results :test #'eq) 415 | (funcall recurse)) 416 | result) 417 | results))) 418 | (declare (dynamic-extent #'child-closure)) 419 | (map-max-results 420 | (lambda (result recurse) 421 | (let ((children (flattened-children recurse))) 422 | (cond 423 | ;; Unsatisfied predicate result => return RESULT. 424 | ((result-unsatisfied-predicate-p result) 425 | (list result)) 426 | ;; No children => certainly no fork in ancestors => return 427 | ;; RESULT. 428 | ((null children) 429 | (list result)) 430 | ;; Only a single child, i.e. children have not been 431 | ;; partitioned => return RESULT. 432 | ((length= 1 children) 433 | (if (result-suitable-for-report-part-p (first children) :detail) 434 | children 435 | (list result))) 436 | ;; Multiple children, but not all of them are nonterminals 437 | ;; and RESULT is a nonterminal => do not use the partition 438 | ;; into CHILDREN and instead return RESULT. 439 | ((and (result-suitable-for-report-part-p result :detail) 440 | (notevery #'result-nonterminal-p children)) 441 | (list result)) 442 | ;; Multiple children, all of which are nonterminals. If the 443 | ;; child-closures of all children are disjoint => use the 444 | ;; partition into children. Otherwise => do not use the 445 | ;; partition and instead return RESULT. 446 | (t 447 | (let ((closures (mapcar #'child-closure children))) 448 | (loop :named outer :for (closure1 . rest) :on closures :do 449 | (loop :for closure2 :in rest :do 450 | (when (intersection closure1 closure2 :test #'eq) 451 | (return-from outer (list result)))) 452 | :finally (return-from outer children))))))) 453 | result :when-error-report :detail))) 454 | 455 | ;;; Given the "context" result (see RESULT-CONTEXT) CONTEXT, determine 456 | ;;; the set of failed ancestor results (see PARTITION-RESULTS). 457 | ;;; Display the context and all failed ancestor results optionally 458 | ;;; printing the reason for the failure and listing the respective 459 | ;;; expected inputs that would have allowed the failed results to 460 | ;;; succeed. 461 | (defun error-report (context stream) 462 | (let* ((partitioned (partition-results context)) 463 | (expected (mapcar (lambda (root) 464 | (let ((reason (result-root-cause root)) 465 | (expected (result-expected-input root))) 466 | (list root 467 | (when reason (list reason)) 468 | (length expected) 469 | expected))) 470 | partitioned)) 471 | (expected (sort expected #'expression< :key #'first))) 472 | ;; Print context (if any), then print each failure result from the 473 | ;; PARTITIONED set with its name and the set of expected inputs, 474 | ;; if any. 475 | (format stream "~@<~@[In context ~/esrap:print-result/:~ 476 | ~@:_~@:_~ 477 | ~]~ 478 | ~{~{~ 479 | While parsing ~/esrap:print-result/. ~ 480 | ~@[Problem:~@:_~@:_~ 481 | ~2@T~<~@;~A~:>~ 482 | ~[~:;~@:_~@:_~]~:*~ 483 | ~]~ 484 | ~[~ 485 | ~*~ 486 | ~:;~ 487 | ~:*Expected:~@:_~@:_~ 488 | ~[~ 489 | ~2@T~{~/esrap::print-terminal/~}~ 490 | ~:;~ 491 | ~5@T~{~/esrap::print-terminal/~^~@:_ or ~}~ 492 | ~]~ 493 | ~]~ 494 | ~}~^~@:_~@:_~}~ 495 | ~:>" 496 | context expected))) 497 | 498 | (defvar *result-pprint-dispatch* 499 | (let ((dispatch (copy-pprint-dispatch))) 500 | (set-pprint-dispatch 501 | 'string (lambda (stream x) 502 | (write x :stream stream :escape t :pretty nil)) 503 | 0 dispatch) 504 | (set-pprint-dispatch 505 | 'character (lambda (stream x) 506 | (if (or (not (graphic-char-p x)) 507 | (member x '(#\Space #\Tab #\Newline))) 508 | (write-string (char-name x) stream) 509 | (write (string x) :stream stream :escape t :pretty nil))) 510 | 0 dispatch) 511 | dispatch)) 512 | 513 | ;; For use as ~/esrap::print-result/ in format control. 514 | (defun print-result (stream result &optional colon? at?) 515 | (declare (ignore colon? at?)) 516 | (let ((*print-pprint-dispatch* *result-pprint-dispatch*)) 517 | (princ (result-expression result) stream))) 518 | --------------------------------------------------------------------------------