├── .gitignore ├── parser-tools-doc ├── LICENSE.txt ├── info.rkt └── parser-tools │ ├── info.rkt │ └── parser-tools.scrbl ├── parser-tools-lib ├── LICENSE.txt ├── info.rkt └── parser-tools │ ├── cfg-parser.rkt │ ├── examples │ ├── calc.rkt │ └── read.rkt │ ├── info.rkt │ ├── lex-plt-v200.rkt │ ├── lex-sre.rkt │ ├── lex.rkt │ ├── private-lex │ ├── actions.rkt │ ├── deriv.rkt │ ├── error-tests.rkt │ ├── front.rkt │ ├── re.rkt │ ├── stx.rkt │ ├── token-syntax.rkt │ ├── token.rkt │ ├── unicode-chars.rkt │ └── util.rkt │ ├── private-yacc │ ├── grammar.rkt │ ├── graph.rkt │ ├── input-file-parser.rkt │ ├── lalr.rkt │ ├── lr0.rkt │ ├── parser-actions.rkt │ ├── parser-builder.rkt │ ├── table.rkt │ └── yacc-helper.rkt │ ├── yacc-to-scheme.rkt │ └── yacc.rkt └── parser-tools ├── LICENSE.txt └── info.rkt /.gitignore: -------------------------------------------------------------------------------- 1 | # Racket compiled files 2 | compiled/ 3 | 4 | # common backups, autosaves, lock files, OS meta-files 5 | *~ 6 | \#* 7 | .#* 8 | .DS_Store 9 | *.bak 10 | TAGS 11 | -------------------------------------------------------------------------------- /parser-tools-doc/LICENSE.txt: -------------------------------------------------------------------------------- 1 | parser-tools-doc 2 | Copyright (c) 2010-2014 PLT Design Inc. 3 | 4 | This package is distributed under the GNU Lesser General Public 5 | License (LGPL). This means that you can link this package into proprietary 6 | applications, provided you follow the rules stated in the LGPL. You 7 | can also modify this package; if you distribute a modified version, 8 | you must distribute it under the terms of the LGPL, which in 9 | particular means that you must release the source code for the 10 | modified software. See http://www.gnu.org/copyleft/lesser.html 11 | for more information. 12 | -------------------------------------------------------------------------------- /parser-tools-doc/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define collection 'multi) 4 | (define deps '("base")) 5 | (define build-deps '("scheme-lib" 6 | "racket-doc" 7 | "syntax-color-doc" 8 | "parser-tools-lib" 9 | "scribble-lib")) 10 | (define update-implies '("parser-tools-lib")) 11 | 12 | (define pkg-desc "documentation part of \"parser-tools\"") 13 | 14 | (define pkg-authors '(mflatt)) 15 | 16 | (define license 17 | '(Apache-2.0 OR MIT)) 18 | -------------------------------------------------------------------------------- /parser-tools-doc/parser-tools/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define scribblings '(("parser-tools.scrbl" (multi-page) (parsing-library)))) 4 | -------------------------------------------------------------------------------- /parser-tools-lib/LICENSE.txt: -------------------------------------------------------------------------------- 1 | parser-tools-lib 2 | Copyright (c) 2010-2014 PLT Design Inc. 3 | 4 | This package is distributed under the GNU Lesser General Public 5 | License (LGPL). This means that you can link this package into proprietary 6 | applications, provided you follow the rules stated in the LGPL. You 7 | can also modify this package; if you distribute a modified version, 8 | you must distribute it under the terms of the LGPL, which in 9 | particular means that you must release the source code for the 10 | modified software. See http://www.gnu.org/copyleft/lesser.html 11 | for more information. 12 | -------------------------------------------------------------------------------- /parser-tools-lib/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define collection 'multi) 4 | (define deps '("base")) 5 | (define build-deps '("rackunit-lib")) 6 | 7 | (define pkg-desc "implementation (no documentation) part of \"parser-tools\"") 8 | 9 | (define pkg-authors '(mflatt)) 10 | 11 | (define license 12 | '(Apache-2.0 OR MIT)) 13 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/examples/calc.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; An interactive calculator inspired by the calculator example in the bison manual. 4 | 5 | ;; Import the parser and lexer generators. 6 | (require parser-tools/yacc 7 | parser-tools/lex 8 | (prefix-in : parser-tools/lex-sre)) 9 | 10 | (define-tokens value-tokens (NUM VAR FNCT)) 11 | (define-empty-tokens op-tokens (newline = OP CP + - * / ^ EOF NEG)) 12 | 13 | ;; A hash table to store variable values in for the calculator 14 | (define vars (make-hash)) 15 | 16 | (define-lex-abbrevs 17 | (lower-letter (:/ "a" "z")) 18 | 19 | (upper-letter (:/ #\A #\Z)) 20 | 21 | ;; (:/ 0 9) would not work because the lexer does not understand numbers. (:/ #\0 #\9) is ok too. 22 | (digit (:/ "0" "9"))) 23 | 24 | (define calcl 25 | (lexer 26 | [(eof) 'EOF] 27 | ;; recursively call the lexer on the remaining input after a tab or space. Returning the 28 | ;; result of that operation. This effectively skips all whitespace. 29 | [(:or #\tab #\space) (calcl input-port)] 30 | ;; (token-newline) returns 'newline 31 | [#\newline (token-newline)] 32 | ;; Since (token-=) returns '=, just return the symbol directly 33 | [(:or "=" "+" "-" "*" "/" "^") (string->symbol lexeme)] 34 | ["(" 'OP] 35 | [")" 'CP] 36 | ["sin" (token-FNCT sin)] 37 | [(:+ (:or lower-letter upper-letter)) (token-VAR (string->symbol lexeme))] 38 | [(:+ digit) (token-NUM (string->number lexeme))] 39 | [(:: (:+ digit) #\. (:* digit)) (token-NUM (string->number lexeme))])) 40 | 41 | (define parser-errors 42 | (hash 43 | '((0 . #f)) 44 | "missing lhs of eq" 45 | '((18 . #f) (6 . x) (0 . #f)) 46 | "missing rhs of eq" 47 | '((12 . #f) (3 . 1) (0 . #f)) 48 | "missing rhs of plus" 49 | '((3 . 1) (0 . #f)) 50 | "missing left parenthesis" 51 | '((20 . 1) (8 . #f) (0 . #f)) 52 | "missing right parenthesis")) 53 | 54 | (define calcp 55 | (parser 56 | 57 | (start start) 58 | (end newline EOF) 59 | (tokens value-tokens op-tokens) 60 | (error 61 | (lambda (tok-ok? tok-name tok-value #:stack se) 62 | (define pe 63 | (hash-ref parser-errors se #f)) 64 | (if pe (error 'calc pe) 65 | (error 66 | 'calc 67 | "Unexpected token: ~a~a\nparser-state = ~v" 68 | tok-name 69 | (if tok-value 70 | (format "(~a)" tok-value) 71 | "") 72 | se)))) 73 | 74 | (precs (right =) 75 | (left - +) 76 | (left * /) 77 | (left NEG) 78 | (right ^)) 79 | 80 | (grammar 81 | 82 | (start [() #f] 83 | ;; If there is an error, ignore everything before the error 84 | ;; and try to start over right after the error 85 | [(error start) $2] 86 | [(exp) $1]) 87 | 88 | (exp [(NUM) $1] 89 | [(VAR) (hash-ref vars $1 (lambda () 0))] 90 | [(VAR = exp) (begin (hash-set! vars $1 $3) 91 | $3)] 92 | [(FNCT OP exp CP) ($1 $3)] 93 | [(exp + exp) (+ $1 $3)] 94 | [(exp - exp) (- $1 $3)] 95 | [(exp * exp) (* $1 $3)] 96 | [(exp / exp) (/ $1 $3)] 97 | [(- exp) (prec NEG) (- $2)] 98 | [(exp ^ exp) (expt $1 $3)] 99 | [(OP exp CP) $2])))) 100 | 101 | ;; run the calculator on the given input-port 102 | (define (calc ip) 103 | (port-count-lines! ip) 104 | (let one-line () 105 | (define result 106 | (calcp (lambda () (calcl ip)))) 107 | (when result 108 | (printf "~a\n" result) 109 | (one-line)))) 110 | 111 | (module+ test 112 | (require rackunit 113 | racket/port) 114 | (define (run s) 115 | (with-output-to-string 116 | (λ () 117 | (calc (open-input-string s))))) 118 | (define (ok s o) 119 | (check-equal? (run s) o)) 120 | (define (no s xm) 121 | (with-handlers 122 | ([exn:fail? 123 | (λ (x) 124 | (check-regexp-match 125 | xm (exn-message x)))]) 126 | (define o (run s)) 127 | (check-true 128 | #f 129 | (format "expected error, got: ~v" 130 | o)))) 131 | 132 | (ok "x=1\n(x + 2 * 3) - (1+2)*3" 133 | "1\n-2\n") 134 | (no "(x" 135 | "right parenthesis") 136 | (no "x)" 137 | "left parenthesis") 138 | (no "x+" 139 | "missing rhs of plus") 140 | (no "x=" 141 | "missing rhs of eq") 142 | (no "=1" 143 | "missing lhs of eq")) 144 | 145 | (module+ main 146 | (calc (current-input-port))) 147 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/examples/read.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; This implements the equivalent of racket's read-syntax for R5RS scheme. 4 | ;; It has not been thoroughly tested. Also it will read an entire file into a 5 | ;; list of syntax objects, instead of returning one syntax object at a time 6 | 7 | (require (for-syntax racket/base) 8 | parser-tools/lex 9 | (prefix-in : parser-tools/lex-sre) 10 | parser-tools/yacc 11 | syntax/readerr) 12 | 13 | (define-tokens data (DATUM)) 14 | (define-empty-tokens delim (OP CP HASHOP QUOTE QUASIQUOTE UNQUOTE UNQUOTE-SPLICING DOT EOF)) 15 | 16 | (define scheme-lexer 17 | (lexer-src-pos 18 | 19 | ;; Skip comments, without accumulating extra position information 20 | [(:or scheme-whitespace comment) (return-without-pos (scheme-lexer input-port))] 21 | 22 | ["#t" (token-DATUM #t)] 23 | ["#f" (token-DATUM #f)] 24 | [(:: "#\\" any-char) (token-DATUM (caddr (string->list lexeme)))] 25 | ["#\\space" (token-DATUM #\space)] 26 | ["#\\newline" (token-DATUM #\newline)] 27 | [(:or (:: initial (:* subsequent)) "+" "-" "...") (token-DATUM (string->symbol lexeme))] 28 | [#\" (token-DATUM (list->string (get-string-token input-port)))] 29 | [#\( 'OP] 30 | [#\) 'CP] 31 | [#\[ 'OP] 32 | [#\] 'CP] 33 | ["#(" 'HASHOP] 34 | [num2 (token-DATUM (string->number lexeme 2))] 35 | [num8 (token-DATUM (string->number lexeme 8))] 36 | [num10 (token-DATUM (string->number lexeme 10))] 37 | [num16 (token-DATUM (string->number lexeme 16))] 38 | ["'" 'QUOTE] 39 | ["`" 'QUASIQUOTE] 40 | ["," 'UNQUOTE] 41 | [",@" 'UNQUOTE-SPLICING] 42 | ["." 'DOT] 43 | [(eof) 'EOF])) 44 | 45 | (define get-string-token 46 | (lexer 47 | [(:~ #\" #\\) (cons (car (string->list lexeme)) 48 | (get-string-token input-port))] 49 | [(:: #\\ #\\) (cons #\\ (get-string-token input-port))] 50 | [(:: #\\ #\") (cons #\" (get-string-token input-port))] 51 | [#\" null])) 52 | 53 | 54 | (define-lex-abbrevs 55 | [letter (:or (:/ "a" "z") (:/ #\A #\Z))] 56 | [digit (:/ #\0 #\9)] 57 | [scheme-whitespace (:or #\newline #\return #\tab #\space #\vtab)] 58 | [initial (:or letter (char-set "!$%&*/:<=>?^_~@"))] 59 | [subsequent (:or initial digit (char-set "+-.@"))] 60 | [comment (:: #\; (:* (:~ #\newline)) #\newline)] 61 | 62 | 63 | ;; See ${PLTHOME}/collects/syntax-color/racket-lexer.rkt for an example of 64 | ;; using regexp macros to avoid the cut and paste. 65 | ; [numR (:: prefixR complexR)] 66 | ; [complexR (:or realR 67 | ; (:: realR "@" realR) 68 | ; (:: realR "+" urealR "i") 69 | ; (:: realR "-" urealR "i") 70 | ; (:: realR "+i") 71 | ; (:: realR "-i") 72 | ; (:: "+" urealR "i") 73 | ; (:: "-" urealR "i") 74 | ; (:: "+i") 75 | ; (:: "-i"))] 76 | ; [realR (:: sign urealR)] 77 | ; [urealR (:or uintegerR (:: uintegerR "/" uintegerR) decimalR)] 78 | ; [uintegerR (:: (:+ digitR) (:* #\#))] 79 | ; [prefixR (:or (:: radixR exactness) 80 | ; (:: exactness radixR))] 81 | 82 | [num2 (:: prefix2 complex2)] 83 | [complex2 (:or real2 84 | (:: real2 "@" real2) 85 | (:: real2 "+" ureal2 "i") 86 | (:: real2 "-" ureal2 "i") 87 | (:: real2 "+i") 88 | (:: real2 "-i") 89 | (:: "+" ureal2 "i") 90 | (:: "-" ureal2 "i") 91 | (:: "+i") 92 | (:: "-i"))] 93 | [real2 (:: sign ureal2)] 94 | [ureal2 (:or uinteger2 (:: uinteger2 "/" uinteger2))] 95 | [uinteger2 (:: (:+ digit2) (:* #\#))] 96 | [prefix2 (:or (:: radix2 exactness) 97 | (:: exactness radix2))] 98 | [radix2 "#b"] 99 | [digit2 (:or "0" "1")] 100 | [num8 (:: prefix8 complex8)] 101 | [complex8 (:or real8 102 | (:: real8 "@" real8) 103 | (:: real8 "+" ureal8 "i") 104 | (:: real8 "-" ureal8 "i") 105 | (:: real8 "+i") 106 | (:: real8 "-i") 107 | (:: "+" ureal8 "i") 108 | (:: "-" ureal8 "i") 109 | (:: "+i") 110 | (:: "-i"))] 111 | [real8 (:: sign ureal8)] 112 | [ureal8 (:or uinteger8 (:: uinteger8 "/" uinteger8))] 113 | [uinteger8 (:: (:+ digit8) (:* #\#))] 114 | [prefix8 (:or (:: radix8 exactness) 115 | (:: exactness radix8))] 116 | [radix8 "#o"] 117 | [digit8 (:/ "0" "7")] 118 | 119 | [num10 (:: prefix10 complex10)] 120 | [complex10 (:or real10 121 | (:: real10 "@" real10) 122 | (:: real10 "+" ureal10 "i") 123 | (:: real10 "-" ureal10 "i") 124 | (:: real10 "+i") 125 | (:: real10 "-i") 126 | (:: "+" ureal10 "i") 127 | (:: "-" ureal10 "i") 128 | (:: "+i") 129 | (:: "-i"))] 130 | [real10 (:: sign ureal10)] 131 | [ureal10 (:or uinteger10 (:: uinteger10 "/" uinteger10) decimal10)] 132 | [uinteger10 (:: (:+ digit10) (:* #\#))] 133 | [prefix10 (:or (:: radix10 exactness) 134 | (:: exactness radix10))] 135 | [radix10 (:? "#d")] 136 | [digit10 digit] 137 | [decimal10 (:or (:: uinteger10 suffix) 138 | (:: #\. (:+ digit10) (:* #\#) suffix) 139 | (:: (:+ digit10) #\. (:* digit10) (:* #\#) suffix) 140 | (:: (:+ digit10) (:+ #\#) #\. (:* #\#) suffix))] 141 | 142 | [num16 (:: prefix16 complex16)] 143 | [complex16 (:or real16 144 | (:: real16 "@" real16) 145 | (:: real16 "+" ureal16 "i") 146 | (:: real16 "-" ureal16 "i") 147 | (:: real16 "+i") 148 | (:: real16 "-i") 149 | (:: "+" ureal16 "i") 150 | (:: "-" ureal16 "i") 151 | "+i" 152 | "-i")] 153 | [real16 (:: sign ureal16)] 154 | [ureal16 (:or uinteger16 (:: uinteger16 "/" uinteger16))] 155 | [uinteger16 (:: (:+ digit16) (:* #\#))] 156 | [prefix16 (:or (:: radix16 exactness) 157 | (:: exactness radix16))] 158 | [radix16 "#x"] 159 | [digit16 (:or digit (:/ #\a #\f) (:/ #\A #\F))] 160 | 161 | 162 | [suffix (:or "" (:: exponent-marker sign (:+ digit10)))] 163 | [exponent-marker (:or "e" "s" "f" "d" "l")] 164 | [sign (:or "" "+" "-")] 165 | [exactness (:or "" "#i" "#e")]) 166 | 167 | 168 | (define stx-for-original-property (read-syntax #f (open-input-string "original"))) 169 | 170 | ;; A macro to build the syntax object 171 | (define-syntax (build-so stx) 172 | (syntax-case stx () 173 | ((_ value start end) 174 | (with-syntax ((start-pos (datum->syntax 175 | (syntax end) 176 | (string->symbol 177 | (format "$~a-start-pos" 178 | (syntax->datum (syntax start)))))) 179 | (end-pos (datum->syntax 180 | (syntax end) 181 | (string->symbol 182 | (format "$~a-end-pos" 183 | (syntax->datum (syntax end)))))) 184 | (source (datum->syntax 185 | (syntax end) 186 | 'source-name))) 187 | (syntax 188 | (datum->syntax 189 | #f 190 | value 191 | (list source 192 | (position-line start-pos) 193 | (position-col start-pos) 194 | (position-offset start-pos) 195 | (- (position-offset end-pos) 196 | (position-offset start-pos))) 197 | stx-for-original-property)))))) 198 | 199 | (define (scheme-parser source-name) 200 | (parser 201 | (src-pos) 202 | 203 | (start s) 204 | (end EOF) 205 | (error (lambda (a name val start end) 206 | (raise-read-error 207 | "read-error" 208 | source-name 209 | (position-line start) 210 | (position-col start) 211 | (position-offset start) 212 | (- (position-offset end) 213 | (position-offset start))))) 214 | (tokens data delim) 215 | 216 | 217 | (grammar 218 | 219 | (s [(sexp-list) (reverse $1)]) 220 | 221 | (sexp [(DATUM) (build-so $1 1 1)] 222 | [(OP sexp-list CP) (build-so (reverse $2) 1 3)] 223 | [(HASHOP sexp-list CP) (build-so (list->vector (reverse $2)) 1 3)] 224 | [(QUOTE sexp) (build-so (list 'quote $2) 1 2)] 225 | [(QUASIQUOTE sexp) (build-so (list 'quasiquote $2) 1 2)] 226 | [(UNQUOTE sexp) (build-so (list 'unquote $2) 1 2)] 227 | [(UNQUOTE-SPLICING sexp) (build-so (list 'unquote-splicing $2) 1 2)] 228 | [(OP sexp-list DOT sexp CP) (build-so (append (reverse $2) $4) 1 5)]) 229 | 230 | (sexp-list [() null] 231 | [(sexp-list sexp) (cons $2 $1)])))) 232 | 233 | (define (rs sn ip) 234 | (port-count-lines! ip) 235 | ((scheme-parser sn) (lambda () (scheme-lexer ip)))) 236 | 237 | (define readsyntax 238 | (case-lambda ((sn) (rs sn (current-input-port))) 239 | ((sn ip) (rs sn ip)))) 240 | 241 | (provide (rename-out [readsyntax read-syntax])) 242 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define compile-omit-paths '("private-lex/error-tests.rkt")) 4 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/lex-plt-v200.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | (require parser-tools/lex 3 | (prefix-in : parser-tools/lex-sre)) 4 | 5 | (#%provide epsilon 6 | ~ 7 | (rename :* *) 8 | (rename :+ +) 9 | (rename :? ?) 10 | (rename :or :) 11 | (rename :& &) 12 | (rename :: @) 13 | (rename :~ ^) 14 | (rename :/ -)) 15 | 16 | (define-lex-trans epsilon 17 | (syntax-rules () 18 | ((_) ""))) 19 | 20 | (define-lex-trans ~ 21 | (syntax-rules () 22 | ((_ re) (complement re)))) 23 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/lex-sre.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | (require (for-syntax racket/base) 3 | parser-tools/lex) 4 | 5 | (#%provide (rename sre-* *) 6 | (rename sre-+ +) 7 | ? 8 | (rename sre-= =) 9 | (rename sre->= >=) 10 | ** 11 | (rename sre-or or) 12 | : 13 | seq 14 | & 15 | ~ 16 | (rename sre-- -) 17 | (rename sre-/ /) 18 | /-only-chars) 19 | 20 | (define-lex-trans sre-* 21 | (syntax-rules () 22 | ((_ re ...) 23 | (repetition 0 +inf.0 (union re ...))))) 24 | 25 | (define-lex-trans sre-+ 26 | (syntax-rules () 27 | ((_ re ...) 28 | (repetition 1 +inf.0 (union re ...))))) 29 | 30 | (define-lex-trans ? 31 | (syntax-rules () 32 | ((_ re ...) 33 | (repetition 0 1 (union re ...))))) 34 | 35 | (define-lex-trans sre-= 36 | (syntax-rules () 37 | ((_ n re ...) 38 | (repetition n n (union re ...))))) 39 | 40 | (define-lex-trans sre->= 41 | (syntax-rules () 42 | ((_ n re ...) 43 | (repetition n +inf.0 (union re ...))))) 44 | 45 | (define-lex-trans ** 46 | (syntax-rules () 47 | ((_ low #f re ...) 48 | (** low +inf.0 re ...)) 49 | ((_ low high re ...) 50 | (repetition low high (union re ...))))) 51 | 52 | (define-lex-trans sre-or 53 | (syntax-rules () 54 | ((_ re ...) 55 | (union re ...)))) 56 | 57 | (define-lex-trans : 58 | (syntax-rules () 59 | ((_ re ...) 60 | (concatenation re ...)))) 61 | 62 | (define-lex-trans seq 63 | (syntax-rules () 64 | ((_ re ...) 65 | (concatenation re ...)))) 66 | 67 | (define-lex-trans & 68 | (syntax-rules () 69 | ((_ re ...) 70 | (intersection re ...)))) 71 | 72 | (define-lex-trans ~ 73 | (syntax-rules () 74 | ((_ re ...) 75 | (char-complement (union re ...))))) 76 | 77 | ;; set difference 78 | (define-lex-trans (sre-- stx) 79 | (syntax-case stx () 80 | ((_) 81 | (raise-syntax-error #f 82 | "must have at least one argument" 83 | stx)) 84 | ((_ big-re re ...) 85 | (syntax (& big-re (complement (union re ...))))))) 86 | 87 | (define-lex-trans (sre-/ stx) 88 | (syntax-case stx () 89 | ((_ range ...) 90 | (let ((chars 91 | (apply append (map (lambda (r) 92 | (let ((x (syntax-e r))) 93 | (cond 94 | ((char? x) (list x)) 95 | ((string? x) (string->list x)) 96 | (else 97 | (raise-syntax-error 98 | #f 99 | "not a char or string" 100 | stx 101 | r))))) 102 | (syntax->list (syntax (range ...))))))) 103 | (unless (even? (length chars)) 104 | (raise-syntax-error 105 | #f 106 | "not given an even number of characters" 107 | stx)) 108 | #`(/-only-chars #,@chars))))) 109 | 110 | (define-lex-trans /-only-chars 111 | (syntax-rules () 112 | ((_ c1 c2) 113 | (char-range c1 c2)) 114 | ((_ c1 c2 c ...) 115 | (union (char-range c1 c2) 116 | (/-only-chars c ...))))) 117 | 118 | 119 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/lex.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; Provides the syntax used to create lexers and the functions needed to 4 | ;; create and use the buffer that the lexer reads from. See docs. 5 | 6 | (require (for-syntax racket/base 7 | racket/list 8 | racket/promise 9 | syntax/stx 10 | syntax/define 11 | syntax/boundmap 12 | syntax/parse 13 | "private-lex/util.rkt" 14 | "private-lex/actions.rkt" 15 | "private-lex/front.rkt" 16 | "private-lex/unicode-chars.rkt")) 17 | 18 | (require racket/stxparam 19 | syntax/readerr 20 | "private-lex/token.rkt") 21 | 22 | (provide lexer lexer-src-pos define-lex-abbrev define-lex-abbrevs define-lex-trans 23 | 24 | ;; Dealing with tokens and related structures 25 | define-tokens define-empty-tokens token-name token-value token? 26 | (struct-out position) 27 | (struct-out position-token) 28 | 29 | ;; File path for highlighting errors while lexing 30 | file-path 31 | 32 | ;; Lex abbrevs for unicode char sets. See mzscheme manual section 3.4. 33 | any-char any-string nothing alphabetic lower-case upper-case title-case 34 | numeric symbolic punctuation graphic whitespace blank iso-control 35 | 36 | ;; A regular expression operator 37 | char-set) 38 | 39 | ;; wrap-action: syntax-object src-pos? -> syntax-object 40 | (define-for-syntax (wrap-action action src-pos?) 41 | (with-syntax ((action-stx 42 | (if src-pos? 43 | #`(let/ec ret 44 | (syntax-parameterize 45 | ((return-without-pos (make-rename-transformer #'ret))) 46 | (make-position-token #,action start-pos end-pos))) 47 | action))) 48 | (syntax/loc action 49 | (lambda (start-pos-p end-pos-p lexeme-p input-port-p) 50 | (syntax-parameterize 51 | ((start-pos (make-rename-transformer #'start-pos-p)) 52 | (end-pos (make-rename-transformer #'end-pos-p)) 53 | (lexeme (make-rename-transformer #'lexeme-p)) 54 | (input-port (make-rename-transformer #'input-port-p))) 55 | action-stx))))) 56 | 57 | (define-for-syntax (make-lexer-trans src-pos?) 58 | (lambda (stx) 59 | (define-splicing-syntax-class maybe-suppress-warnings 60 | (pattern (~seq #:suppress-warnings) 61 | #:attr suppress? #t) 62 | (pattern (~seq) 63 | #:attr suppress? #f)) 64 | (syntax-parse stx 65 | ((_ suppress:maybe-suppress-warnings re-act ...) 66 | (begin 67 | (for-each 68 | (lambda (x) 69 | (syntax-case x () 70 | ((re act) (void)) 71 | (_ (raise-syntax-error #f 72 | "not a regular expression / action pair" 73 | stx 74 | x)))) 75 | (syntax->list (syntax (re-act ...)))) 76 | (let* ((spec/re-act-lst 77 | (syntax->list (syntax (re-act ...)))) 78 | (eof-act 79 | (get-special-action spec/re-act-lst #'eof #''eof)) 80 | (spec-act 81 | (get-special-action spec/re-act-lst #'special #'(void))) 82 | (spec-comment-act 83 | (get-special-action spec/re-act-lst #'special-comment #'#f)) 84 | (ids (list #'special #'special-comment #'eof)) 85 | (re-act-lst 86 | (filter 87 | (lambda (spec/re-act) 88 | (syntax-case spec/re-act () 89 | (((special) act) 90 | (not (ormap 91 | (lambda (x) 92 | (and (identifier? #'special) 93 | (module-or-top-identifier=? (syntax special) x))) 94 | ids))) 95 | (_ #t))) 96 | spec/re-act-lst)) 97 | (name-lst (map (lambda (x) (datum->syntax #f (gensym))) re-act-lst)) 98 | (act-lst (map (lambda (x) (stx-car (stx-cdr x))) re-act-lst)) 99 | (re-actname-lst (map (lambda (re-act name) 100 | (list (stx-car re-act) 101 | name)) 102 | re-act-lst 103 | name-lst))) 104 | (when (null? spec/re-act-lst) 105 | (raise-syntax-error (if src-pos? 'lexer/src-pos 'lexer) "expected at least one action" stx)) 106 | (let-values (((trans start action-names no-look disappeared-uses) 107 | (build-lexer re-actname-lst))) 108 | (when (and (not (attribute suppress.suppress?)) 109 | (vector-ref action-names start)) ;; Start state is final 110 | (unless (and 111 | ;; All the successor states are final 112 | (andmap (lambda (x) (vector-ref action-names (vector-ref x 2))) 113 | (vector->list (vector-ref trans start))) 114 | ;; Each character has a successor state 115 | (let loop ((check 0) 116 | (nexts (vector->list (vector-ref trans start)))) 117 | (cond 118 | ((null? nexts) #f) 119 | (else 120 | (let ((next (car nexts))) 121 | (and (= (vector-ref next 0) check) 122 | (let ((next-check (vector-ref next 1))) 123 | (or (>= next-check max-char-num) 124 | (loop (add1 next-check) (cdr nexts)))))))))) 125 | (log-error "Warning: lexer at ~a can accept the empty string.\n" stx))) 126 | (with-syntax ((start-state-stx start) 127 | (trans-table-stx trans) 128 | (no-lookahead-stx no-look) 129 | ((name ...) name-lst) 130 | ((act ...) (map (lambda (a) 131 | (wrap-action a src-pos?)) 132 | act-lst)) 133 | ((act-name ...) (vector->list action-names)) 134 | (spec-act-stx 135 | (wrap-action spec-act src-pos?)) 136 | (has-comment-act?-stx 137 | (if (syntax-e spec-comment-act) #t #f)) 138 | (spec-comment-act-stx 139 | (wrap-action spec-comment-act src-pos?)) 140 | (eof-act-stx (wrap-action eof-act src-pos?))) 141 | (syntax-property 142 | (syntax/loc stx 143 | (let ([name act] ...) 144 | (let ([proc 145 | (lexer-body start-state-stx 146 | trans-table-stx 147 | (vector act-name ...) 148 | no-lookahead-stx 149 | spec-act-stx 150 | has-comment-act?-stx 151 | spec-comment-act-stx 152 | eof-act-stx)]) 153 | ;; reverse eta to get named procedures: 154 | (lambda (port) (proc port))))) 155 | 'disappeared-use 156 | disappeared-uses))))))))) 157 | 158 | (define-syntax lexer (make-lexer-trans #f)) 159 | (define-syntax lexer-src-pos (make-lexer-trans #t)) 160 | 161 | (define-syntax (define-lex-abbrev stx) 162 | (syntax-case stx () 163 | ((_ name re) 164 | (identifier? (syntax name)) 165 | (syntax/loc stx 166 | (define-syntax name 167 | (make-lex-abbrev (lambda () (quote-syntax re)))))) 168 | (_ 169 | (raise-syntax-error 170 | #f 171 | "form should be (define-lex-abbrev name re)" 172 | stx)))) 173 | 174 | (define-syntax (define-lex-abbrevs stx) 175 | (syntax-case stx () 176 | ((_ x ...) 177 | (with-syntax (((abbrev ...) 178 | (map 179 | (lambda (a) 180 | (syntax-case a () 181 | ((name re) 182 | (identifier? (syntax name)) 183 | (syntax/loc a (define-lex-abbrev name re))) 184 | (_ (raise-syntax-error 185 | #f 186 | "form should be (define-lex-abbrevs (name re) ...)" 187 | stx 188 | a)))) 189 | (syntax->list (syntax (x ...)))))) 190 | (syntax/loc stx (begin abbrev ...)))) 191 | (_ 192 | (raise-syntax-error 193 | #f 194 | "form should be (define-lex-abbrevs (name re) ...)" 195 | stx)))) 196 | 197 | (define-syntax (define-lex-trans stx) 198 | (syntax-case stx () 199 | ((_ name-form body-form) 200 | (let-values (((name body) 201 | (normalize-definition (syntax (define-syntax name-form body-form)) #'lambda))) 202 | 203 | #`(define-syntax #,name 204 | (let ((func #,body)) 205 | (unless (procedure? func) 206 | (raise-syntax-error 'define-lex-trans "expected a procedure as the transformer, got ~e" func)) 207 | (unless (procedure-arity-includes? func 1) 208 | (raise-syntax-error 'define-lex-trans "expected a procedure that accepts 1 argument as the transformer, got ~e" func)) 209 | (make-lex-trans func))))) 210 | (_ 211 | (raise-syntax-error 212 | #f 213 | "form should be (define-lex-trans name transformer)" 214 | stx)))) 215 | 216 | 217 | (define (get-next-state-helper char min max table) 218 | (if (>= min max) 219 | #f 220 | (let* ((try (quotient (+ min max) 2)) 221 | (el (vector-ref table try)) 222 | (r1 (vector-ref el 0)) 223 | (r2 (vector-ref el 1))) 224 | (cond 225 | ((and (>= char r1) (<= char r2)) (vector-ref el 2)) 226 | ((< char r1) (get-next-state-helper char min try table)) 227 | (else (get-next-state-helper char (add1 try) max table)))))) 228 | 229 | 230 | 231 | 232 | (define (get-next-state char table) 233 | (if table 234 | (get-next-state-helper char 0 (vector-length table) table) 235 | #f)) 236 | 237 | (define (lexer-body start-state trans-table actions no-lookahead special-action 238 | has-special-comment-action? special-comment-action eof-action) 239 | (letrec ((lexer 240 | (lambda (ip) 241 | (let ((first-pos (get-position ip)) 242 | (first-char (peek-char-or-special ip 0))) 243 | ;(printf "(peek-char-or-special port 0) = ~e\n" first-char) 244 | (cond 245 | ((eof-object? first-char) 246 | (do-match ip first-pos eof-action (read-char-or-special ip))) 247 | ((special-comment? first-char) 248 | (read-char-or-special ip) 249 | (cond 250 | (has-special-comment-action? 251 | (do-match ip first-pos special-comment-action #f)) 252 | (else (lexer ip)))) 253 | ((not (char? first-char)) 254 | (do-match ip first-pos special-action (read-char-or-special ip))) 255 | (else 256 | (let lexer-loop ( 257 | ;; current-state 258 | (state start-state) 259 | ;; the character to transition on 260 | (char first-char) 261 | ;; action for the longest match seen thus far 262 | ;; including a match at the current state 263 | (longest-match-action 264 | (vector-ref actions start-state)) 265 | ;; how many bytes precede char 266 | (length-bytes 0) 267 | ;; how many characters have been read 268 | ;; including the one just read 269 | (length-chars 1) 270 | ;; how many characters are in the longest match 271 | (longest-match-length 0)) 272 | (let ((next-state 273 | (cond 274 | ((not (char? char)) #f) 275 | (else (get-next-state (char->integer char) 276 | (vector-ref trans-table state)))))) 277 | (cond 278 | ((not next-state) 279 | (check-match ip first-pos longest-match-length 280 | length-chars longest-match-action)) 281 | ((vector-ref no-lookahead next-state) 282 | (let ((act (vector-ref actions next-state))) 283 | (check-match ip 284 | first-pos 285 | (if act length-chars longest-match-length) 286 | length-chars 287 | (if act act longest-match-action)))) 288 | (else 289 | (let* ((act (vector-ref actions next-state)) 290 | (next-length-bytes (+ (char-utf-8-length char) length-bytes)) 291 | (next-char (peek-char-or-special ip next-length-bytes))) 292 | #;(printf "(peek-char-or-special port ~e) = ~e\n" 293 | next-length-bytes next-char) 294 | (lexer-loop next-state 295 | next-char 296 | (if act 297 | act 298 | longest-match-action) 299 | next-length-bytes 300 | (add1 length-chars) 301 | (if act 302 | length-chars 303 | longest-match-length))))))))))))) 304 | (lambda (ip) 305 | (unless (input-port? ip) 306 | (raise-argument-error 307 | 'lexer 308 | "input-port?" 309 | 0 310 | ip)) 311 | (lexer ip)))) 312 | 313 | (define (check-match lb first-pos longest-match-length length longest-match-action) 314 | (unless longest-match-action 315 | (let* ((match (read-string length lb)) 316 | (end-pos (get-position lb))) 317 | (raise-read-error 318 | (format "lexer: No match found in input starting with: ~a" match) 319 | (file-path) 320 | (position-line first-pos) 321 | (position-col first-pos) 322 | (position-offset first-pos) 323 | (- (position-offset end-pos) (position-offset first-pos))))) 324 | (let ((match (read-string longest-match-length lb))) 325 | ;(printf "(read-string ~e port) = ~e\n" longest-match-length match) 326 | (do-match lb first-pos longest-match-action match))) 327 | 328 | (define file-path (make-parameter #f)) 329 | 330 | (define (do-match ip first-pos action value) 331 | #;(printf "(action ~a ~a ~a ~a)\n" 332 | (position-offset first-pos) (position-offset (get-position ip)) value ip) 333 | (action first-pos (get-position ip) value ip)) 334 | 335 | (define (get-position ip) 336 | (let-values (((line col off) (port-next-location ip))) 337 | (make-position off line col))) 338 | 339 | (define-syntax (create-unicode-abbrevs stx) 340 | (syntax-case stx () 341 | ((_ ctxt) 342 | (with-syntax (((ranges ...) (map (lambda (range) 343 | `(union ,@(map (lambda (x) 344 | `(char-range ,(integer->char (car x)) 345 | ,(integer->char (cdr x)))) 346 | range))) 347 | (list (force alphabetic-ranges) 348 | (force lower-case-ranges) 349 | (force upper-case-ranges) 350 | (force title-case-ranges) 351 | (force numeric-ranges) 352 | (force symbolic-ranges) 353 | (force punctuation-ranges) 354 | (force graphic-ranges) 355 | (force whitespace-ranges) 356 | (force blank-ranges) 357 | (force iso-control-ranges)))) 358 | ((names ...) (map (lambda (sym) 359 | (datum->syntax (syntax ctxt) sym #f)) 360 | '(alphabetic 361 | lower-case 362 | upper-case 363 | title-case 364 | numeric 365 | symbolic 366 | punctuation 367 | graphic 368 | whitespace 369 | blank 370 | iso-control)))) 371 | (syntax (define-lex-abbrevs (names ranges) ...)))))) 372 | 373 | (define-lex-abbrev any-char (char-complement (union))) 374 | (define-lex-abbrev any-string (intersection)) 375 | (define-lex-abbrev nothing (union)) 376 | (create-unicode-abbrevs #'here) 377 | 378 | (define-lex-trans (char-set stx) 379 | (syntax-case stx () 380 | ((_ str) 381 | (string? (syntax-e (syntax str))) 382 | (with-syntax (((char ...) (string->list (syntax-e (syntax str))))) 383 | (syntax (union char ...)))))) 384 | 385 | (define-syntax provide-lex-keyword 386 | (syntax-rules () 387 | [(_ id ...) 388 | (begin 389 | (define-syntax-parameter id 390 | (make-set!-transformer 391 | (lambda (stx) 392 | (raise-syntax-error 393 | #f 394 | (format "use of a lexer keyword (~a) is not in an appropriate lexer action" 395 | 'id) 396 | stx)))) 397 | ... 398 | (provide id ...))])) 399 | 400 | (provide-lex-keyword start-pos end-pos lexeme input-port return-without-pos) 401 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-lex/actions.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (provide (all-defined-out)) 4 | (require syntax/stx) 5 | 6 | ;; get-special-action: (syntax-object list) syntax-object syntax-object -> syntax-object 7 | ;; Returns the first action from a rule of the form ((which-special) action) 8 | (define (get-special-action rules which-special none) 9 | (cond 10 | ((null? rules) none) 11 | (else 12 | (syntax-case (car rules) () 13 | (((special) act) 14 | (and (identifier? #'special) (module-or-top-identifier=? (syntax special) which-special)) 15 | (syntax act)) 16 | (_ (get-special-action (cdr rules) which-special none)))))) 17 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-lex/deriv.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require racket/list 4 | (prefix-in is: data/integer-set) 5 | "re.rkt" 6 | "util.rkt") 7 | 8 | (provide build-dfa print-dfa (struct-out dfa)) 9 | 10 | (define e (build-epsilon)) 11 | (define z (build-zero)) 12 | 13 | 14 | ;; Don't do anything with this one but extract the chars 15 | (define all-chars (->re `(char-complement (union)) (make-cache))) 16 | 17 | ;; get-char-groups : re bool -> (list-of char-setR?) 18 | ;; Collects the char-setRs in r that could be used in 19 | ;; taking the derivative of r. 20 | (define (get-char-groups r found-negation) 21 | (cond 22 | ((or (eq? r e) (eq? r z)) null) 23 | ((char-setR? r) (list r)) 24 | ((concatR? r) 25 | (if (re-nullable? (concatR-re1 r)) 26 | (append (get-char-groups (concatR-re1 r) found-negation) 27 | (get-char-groups (concatR-re2 r) found-negation)) 28 | (get-char-groups (concatR-re1 r) found-negation))) 29 | ((repeatR? r) 30 | (get-char-groups (repeatR-re r) found-negation)) 31 | ((orR? r) 32 | (apply append (map (lambda (x) (get-char-groups x found-negation)) (orR-res r)))) 33 | ((andR? r) 34 | (apply append (map (lambda (x) (get-char-groups x found-negation)) (andR-res r)))) 35 | ((negR? r) 36 | (if found-negation 37 | (get-char-groups (negR-re r) #t) 38 | (cons all-chars (get-char-groups (negR-re r) #t)))))) 39 | 40 | (test-block ((c (make-cache)) 41 | (r1 (->re #\1 c)) 42 | (r2 (->re #\2 c))) 43 | ((get-char-groups e #f) null) 44 | ((get-char-groups z #f) null) 45 | ((get-char-groups r1 #f) (list r1)) 46 | ((get-char-groups (->re `(concatenation ,r1 ,r2) c) #f) 47 | (list r1)) 48 | ((get-char-groups (->re `(concatenation ,e ,r2) c) #f) 49 | (list r2)) 50 | ((get-char-groups (->re `(concatenation (repetition 0 +inf.0 ,r1) ,r2) c) #f) 51 | (list r1 r2)) 52 | ((get-char-groups (->re `(repetition 0 +inf.0 ,r1) c) #f) 53 | (list r1)) 54 | ((get-char-groups 55 | (->re `(union (repetition 0 +inf.0 ,r1) 56 | (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) 57 | (list r1 r2 (->re "3" c) (->re "4" c))) 58 | ((get-char-groups (->re `(complement ,r1) c) #f) 59 | (list all-chars r1)) 60 | ((get-char-groups 61 | (->re `(intersection (repetition 0 +inf.0 ,r1) 62 | (concatenation (repetition 0 +inf.0 ,r2) "3") "4") c) #f) 63 | (list r1 r2 (->re "3" c) (->re "4" c))) 64 | ) 65 | (define loc:member? is:member?) 66 | 67 | ;; deriveR : re char cache -> re 68 | (define (deriveR r c cache) 69 | (cond 70 | ((or (eq? r e) (eq? r z)) z) 71 | ((char-setR? r) 72 | (if (loc:member? c (char-setR-chars r)) e z)) 73 | ((concatR? r) 74 | (let* ((r1 (concatR-re1 r)) 75 | (r2 (concatR-re2 r)) 76 | (d (build-concat (deriveR r1 c cache) r2 cache))) 77 | (if (re-nullable? r1) 78 | (build-or (list d (deriveR r2 c cache)) cache) 79 | d))) 80 | ((repeatR? r) 81 | (build-concat (deriveR (repeatR-re r) c cache) 82 | (build-repeat (sub1 (repeatR-low r)) 83 | (sub1 (repeatR-high r)) 84 | (repeatR-re r) cache) 85 | cache)) 86 | ((orR? r) 87 | (build-or (map (lambda (x) (deriveR x c cache)) 88 | (orR-res r)) 89 | cache)) 90 | ((andR? r) 91 | (build-and (map (lambda (x) (deriveR x c cache)) 92 | (andR-res r)) 93 | cache)) 94 | ((negR? r) 95 | (build-neg (deriveR (negR-re r) c cache) cache)))) 96 | 97 | (test-block ((c (make-cache)) 98 | (a (char->integer #\a)) 99 | (b (char->integer #\b)) 100 | (r1 (->re #\a c)) 101 | (r2 (->re `(repetition 0 +inf.0 #\a) c)) 102 | (r3 (->re `(repetition 0 +inf.0 ,r2) c)) 103 | (r4 (->re `(concatenation #\a ,r2) c)) 104 | (r5 (->re `(repetition 0 +inf.0 ,r4) c)) 105 | (r6 (->re `(union ,r5 #\a) c)) 106 | (r7 (->re `(concatenation ,r2 ,r2) c)) 107 | (r8 (->re `(complement ,r4) c)) 108 | (r9 (->re `(intersection ,r2 ,r4) c))) 109 | ((deriveR e a c) z) 110 | ((deriveR z a c) z) 111 | ((deriveR r1 b c) z) 112 | ((deriveR r1 a c) e) 113 | ((deriveR r2 a c) r2) 114 | ((deriveR r2 b c) z) 115 | ((deriveR r3 a c) r2) 116 | ((deriveR r3 b c) z) 117 | ((deriveR r4 a c) r2) 118 | ((deriveR r4 b c) z) 119 | ((deriveR r5 a c) (->re `(concatenation ,r2 ,r5) c)) 120 | ((deriveR r5 b c) z) 121 | ((deriveR r6 a c) (->re `(union (concatenation ,r2 ,r5) "") c)) 122 | ((deriveR r6 b c) z) 123 | ((deriveR r7 a c) (->re `(union (concatenation ,r2 ,r2) ,r2) c)) 124 | ((deriveR r7 b c) z) 125 | ((deriveR r8 a c) (->re `(complement, r2) c)) 126 | ((deriveR r8 b c) (->re `(complement ,z) c)) 127 | ((deriveR r9 a c) r2) 128 | ((deriveR r9 b c) z) 129 | ((deriveR (->re `(repetition 1 2 "ab") c) a c) 130 | (->re `(concatenation "b" (repetition 0 1 "ab")) c))) 131 | 132 | ;; An re-action is (cons re action) 133 | 134 | ;; derive : (list-of re-action) char cache -> (union (list-of re-action) #f) 135 | ;; applies deriveR to all the re-actions's re parts. 136 | ;; Returns #f if the derived state is equivalent to z. 137 | (define (derive r c cache) 138 | (let ((new-r (map (lambda (ra) 139 | (cons (deriveR (car ra) c cache) (cdr ra))) 140 | r))) 141 | (if (andmap (lambda (x) (eq? z (car x))) 142 | new-r) 143 | #f 144 | new-r))) 145 | 146 | (test-block ((c (make-cache)) 147 | (r1 (->re #\1 c)) 148 | (r2 (->re #\2 c))) 149 | ((derive null (char->integer #\1) c) #f) 150 | ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\1) c) 151 | (list (cons e 1) (cons z 2))) 152 | ((derive (list (cons r1 1) (cons r2 2)) (char->integer #\3) c) #f)) 153 | 154 | 155 | ;; get-final : (list-of re-action) -> (union #f syntax-object) 156 | ;; An re that accepts e represents a final state. Return the 157 | ;; action from the first final state or #f if there is none. 158 | (define (get-final res) 159 | (cond 160 | ((null? res) #f) 161 | ((re-nullable? (caar res)) (cdar res)) 162 | (else (get-final (cdr res))))) 163 | 164 | (test-block ((c->i char->integer) 165 | (c (make-cache)) 166 | (r1 (->re #\a c)) 167 | (r2 (->re #\b c)) 168 | (b (list (cons z 1) (cons z 2) (cons z 3) (cons e 4) (cons z 5))) 169 | (a (list (cons r1 1) (cons r2 2)))) 170 | ((derive null (c->i #\a) c) #f) 171 | ((derive a (c->i #\a) c) (list (cons e 1) (cons z 2))) 172 | ((derive a (c->i #\b) c) (list (cons z 1) (cons e 2))) 173 | ((derive a (c->i #\c) c) #f) 174 | ((derive (list (cons (->re `(union " " "\n" ",") c) 1) 175 | (cons (->re `(concatenation (repetition 0 1 "-") 176 | (repetition 1 +inf.0 (char-range "0" "9"))) c) 2) 177 | (cons (->re `(concatenation "-" (repetition 1 +inf.0 "-")) c) 3) 178 | (cons (->re "[" c) 4) 179 | (cons (->re "]" c) 5)) (c->i #\[) c) 180 | b) 181 | ((get-final a) #f) 182 | ((get-final (list (cons e 1) (cons e 2))) 1) 183 | ((get-final b) 4)) 184 | 185 | 186 | ;; A state is (make-state (list-of re-action) nat) 187 | (define-struct state (spec index)) 188 | 189 | ;; get->key : re-action -> (list-of nat) 190 | ;; states are indexed by the list of indexes of their res 191 | (define (get-key s) 192 | (map (lambda (x) (re-index (car x))) s)) 193 | 194 | (define loc:partition is:partition) 195 | 196 | ;; compute-chars : (list-of state) -> (list-of char-set) 197 | ;; Computed the sets of equivalent characters for taking the 198 | ;; derivative of the car of st. Only one derivative per set need to be taken. 199 | (define (compute-chars st) 200 | (cond 201 | ((null? st) null) 202 | (else 203 | (loc:partition (map char-setR-chars 204 | (apply append (map (lambda (x) (get-char-groups (car x) #f)) 205 | (state-spec (car st))))))))) 206 | 207 | (test-block ((c (make-cache)) 208 | (c->i char->integer) 209 | (r1 (->re `(char-range #\1 #\4) c)) 210 | (r2 (->re `(char-range #\2 #\3) c))) 211 | ((compute-chars null) null) 212 | ((compute-chars (list (make-state null 1))) null) 213 | ((map is:integer-set-contents 214 | (compute-chars (list (make-state (list (cons r1 1) (cons r2 2)) 2)))) 215 | (list (is:integer-set-contents (is:make-range (c->i #\2) (c->i #\3))) 216 | (is:integer-set-contents (is:union (is:make-range (c->i #\1)) 217 | (is:make-range (c->i #\4))))))) 218 | 219 | 220 | ;; A dfa is (make-dfa int int 221 | ;; (list-of (cons int syntax-object)) 222 | ;; (list-of (cons int (list-of (cons char-set int))))) 223 | ;; Each transitions is a state and a list of chars with the state to transition to. 224 | ;; The finals and transitions are sorted by state number, and duplicate free. 225 | (define-struct dfa (num-states start-state final-states/actions transitions) 226 | #:inspector (make-inspector)) 227 | 228 | (define loc:get-integer is:get-integer) 229 | 230 | ;; build-dfa : (list-of re-action) cache -> dfa 231 | (define (build-dfa rs cache) 232 | (let* ((transitions (make-hasheq)) 233 | (get-state-number (make-counter)) 234 | (start (make-state rs (get-state-number)))) 235 | (cache (cons 'state (get-key rs)) (lambda () start)) 236 | (let loop ((old-states (list start)) 237 | (new-states null) 238 | (all-states (list start)) 239 | (cs (compute-chars (list start)))) 240 | (cond 241 | ((and (null? old-states) (null? new-states)) 242 | (make-dfa (get-state-number) (state-index start) 243 | (sort (filter (lambda (x) (cdr x)) 244 | (map (lambda (state) 245 | (cons (state-index state) (get-final (state-spec state)))) 246 | all-states)) 247 | (lambda (a b) (< (car a) (car b)))) 248 | (sort (hash-map transitions 249 | (lambda (state trans) 250 | (cons (state-index state) 251 | (map (lambda (t) 252 | (cons (car t) 253 | (state-index (cdr t)))) 254 | trans)))) 255 | (lambda (a b) (< (car a) (car b)))))) 256 | ((null? old-states) 257 | (loop new-states null all-states (compute-chars new-states))) 258 | ((null? cs) 259 | (loop (cdr old-states) new-states all-states (compute-chars (cdr old-states)))) 260 | (else 261 | (let* ((state (car old-states)) 262 | (c (car cs)) 263 | (new-re (derive (state-spec state) (loc:get-integer c) cache))) 264 | (cond 265 | (new-re 266 | (let* ((new-state? #f) 267 | (new-state (cache (cons 'state (get-key new-re)) 268 | (lambda () 269 | (set! new-state? #t) 270 | (make-state new-re (get-state-number))))) 271 | (new-all-states (if new-state? (cons new-state all-states) all-states))) 272 | (hash-set! transitions 273 | state 274 | (cons (cons c new-state) 275 | (hash-ref transitions state null))) 276 | (cond 277 | (new-state? 278 | (loop old-states (cons new-state new-states) new-all-states (cdr cs))) 279 | (else 280 | (loop old-states new-states new-all-states (cdr cs)))))) 281 | (else (loop old-states new-states all-states (cdr cs)))))))))) 282 | 283 | (define (print-dfa x) 284 | (printf "number of states: ~a\n" (dfa-num-states x)) 285 | (printf "start state: ~a\n" (dfa-start-state x)) 286 | (printf "final states: ~a\n" (map car (dfa-final-states/actions x))) 287 | (for-each (lambda (trans) 288 | (printf "state: ~a\n" (car trans)) 289 | (for-each (lambda (rule) 290 | (printf " -~a-> ~a\n" 291 | (is:integer-set-contents (car rule)) 292 | (cdr rule))) 293 | (cdr trans))) 294 | (dfa-transitions x))) 295 | 296 | (define (build-test-dfa rs) 297 | (let ((c (make-cache))) 298 | (build-dfa (map (lambda (x) (cons (->re x c) 'action)) 299 | rs) 300 | c))) 301 | 302 | 303 | #| 304 | (define t1 (build-test-dfa null)) 305 | (define t2 (build-test-dfa `(#\a))) 306 | (define t3 (build-test-dfa `(#\a #\b))) 307 | (define t4 (build-test-dfa `((repetition 0 +inf.0 #\a) 308 | (repetition 0 +inf.0 (concatenation #\a #\b))))) 309 | (define t5 (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\0 #\1)) #\1)))) 310 | (define t6 (build-test-dfa `((repetition 0 +inf.0 (repetition 0 +inf.0 #\a)) 311 | (repetition 0 +inf.0 (concatenation #\b (repetition 1 +inf.0 #\b)))))) 312 | (define t7 (build-test-dfa `((concatenation (repetition 0 +inf.0 #\a) (repetition 0 +inf.0 #\b) 313 | (repetition 0 +inf.0 #\c) (repetition 0 +inf.0 #\d) 314 | (repetition 0 +inf.0 #\e))))) 315 | (define t8 316 | (build-test-dfa `((concatenation (repetition 0 +inf.0 (union #\a #\b)) #\a (union #\a #\b) 317 | (union #\a #\b) (union #\a #\b) (union #\a #\b))))) 318 | (define t9 (build-test-dfa `((concatenation "/*" 319 | (complement (concatenation (intersection) "*/" (intersection))) 320 | "*/")))) 321 | (define t11 (build-test-dfa `((complement "1")))) 322 | (define t12 (build-test-dfa `((concatenation (intersection (concatenation (repetition 0 +inf.0 "a") "b") 323 | (concatenation "a" (repetition 0 +inf.0 "b"))) 324 | "ab")))) 325 | (define x (build-test-dfa `((union " " "\n" ",") 326 | (concatenation (repetition 0 1 "-") (repetition 1 +inf.0 (char-range "0" "9"))) 327 | (concatenation "-" (repetition 1 +inf.0 "-")) 328 | "[" 329 | "]"))) 330 | (define y (build-test-dfa 331 | `((repetition 1 +inf.0 332 | (union (concatenation "|" (repetition 0 +inf.0 (char-complement "|")) "|") 333 | (concatenation "|" (repetition 0 +inf.0 (char-complement "|")))))))) 334 | (define t13 (build-test-dfa `((intersection (concatenation (intersection) "111" (intersection)) 335 | (complement (union (concatenation (intersection) "01") 336 | (repetition 1 +inf.0 "1"))))))) 337 | (define t14 (build-test-dfa `((complement "1")))) 338 | |# 339 | 340 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-lex/error-tests.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | (require (for-syntax racket/base) 3 | "../lex.rkt" 4 | rackunit) 5 | 6 | (define-syntax (catch-syn-error stx) 7 | (syntax-case stx () 8 | [(_ arg) 9 | (datum->syntax 10 | #'here 11 | (with-handlers ((exn:fail:syntax? exn-message)) 12 | (syntax-local-expand-expression #'arg) 13 | "not-an-error"))])) 14 | 15 | (check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev))) 16 | (check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev a))) 17 | (check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev (a b) v))) 18 | (check-regexp-match #rx"lex-abbrev" (catch-syn-error (define-lex-abbrev 1 1))) 19 | (check-regexp-match #rx"lex-abbrevs" (catch-syn-error (define-lex-abbrevs ()))) 20 | 21 | (check-regexp-match #rx"lex-trans" (catch-syn-error (define-lex-trans))) 22 | 23 | (check-regexp-match #rx"lexer" (catch-syn-error (lexer))) 24 | (check-regexp-match #rx"lexer" (catch-syn-error (lexer ("a" "b" "c")))) 25 | (check-regexp-match #rx"lexer" (catch-syn-error (lexer ()))) 26 | (check-regexp-match #rx"lexer" (catch-syn-error (lexer ("")))) 27 | 28 | (check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (a 1)))) 29 | (check-regexp-match #rx"regular-expression" (catch-syn-error (lexer ((a) 1)))) 30 | (check-regexp-match #rx"regular-expression" (catch-syn-error (let ((a 1)) (lexer ((a) 1))))) 31 | 32 | (check-regexp-match #rx"regular-expression" 33 | (catch-syn-error (let-syntax ((a 1)) 34 | (lexer ((a) 1))))) 35 | 36 | (check-regexp-match #rx"define-lex-trans" 37 | (catch-syn-error 38 | (let () 39 | (define-lex-trans a 1) 40 | (let () 41 | (lexer ((a) 1)))))) 42 | 43 | ;; Detecting mutual recursion cycle: 44 | (check-regexp-match #rx"regular-expression" 45 | (catch-syn-error 46 | (let () 47 | (define-lex-abbrev a b) 48 | (define-lex-abbrev b a) 49 | (let () 50 | (lexer (a 1)))))) 51 | 52 | (check-regexp-match #rx"regular-expression" 53 | (catch-syn-error 54 | (let () 55 | (define-lex-abbrev a (repetition 0 1 b)) 56 | (define-lex-abbrev b (repetition 0 1 a)) 57 | (let () 58 | (lexer (a 1)))))) 59 | 60 | ;; Detecting cycle within same abbreviation: 61 | (check-regexp-match #rx"regular-expression" 62 | (catch-syn-error 63 | (let () 64 | (define-lex-abbrev balanced 65 | (union (concatenation "(" balanced ")" balanced) 66 | any-char)) 67 | (lexer 68 | [balanced (string-append lexeme (balanced input-port))] 69 | [(eof) ""])))) 70 | 71 | 72 | (check-regexp-match #rx"regular-expression" (catch-syn-error (lexer (1 1)))) 73 | (check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition) 1)))) 74 | (check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition #\1 #\1 "3") 1)))) 75 | (check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 #\1 "3") 1)))) 76 | (check-regexp-match #rx"repetition" (catch-syn-error (lexer ((repetition 1 0 "3") 1)))) 77 | (check-regexp-match #rx"complement" (catch-syn-error (lexer ((complement) 1)))) 78 | (check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range) 1)))) 79 | (check-regexp-match #rx"char-range" (catch-syn-error (lexer ((char-range #\9 #\0) 1)))) 80 | (check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement) 1)))) 81 | (check-regexp-match #rx"char-complement" (catch-syn-error (lexer ((char-complement (concatenation "1" "2")) 1)))) 82 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-lex/front.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | (require (prefix-in is: data/integer-set) 3 | racket/list 4 | syntax/stx 5 | "util.rkt" 6 | "stx.rkt" 7 | "re.rkt" 8 | "deriv.rkt") 9 | 10 | (provide build-lexer) 11 | 12 | (define-syntax time-label 13 | (syntax-rules () 14 | ((_ l e ...) 15 | (begin 16 | (printf "~a: " l) 17 | (time (begin e ...)))))) 18 | 19 | ;; A table is either 20 | ;; - (vector-of (union #f nat)) 21 | ;; - (vector-of (vector-of (vector nat nat nat))) 22 | 23 | (define loc:integer-set-contents is:integer-set-contents) 24 | 25 | ;; dfa->1d-table : dfa -> (same as build-lexer) 26 | (define (dfa->1d-table dfa) 27 | (let ((state-table (make-vector (dfa-num-states dfa) #f)) 28 | (transition-cache (make-hash))) 29 | (for-each 30 | (lambda (trans) 31 | (let* ((from-state (car trans)) 32 | (all-chars/to (cdr trans)) 33 | (flat-all-chars/to 34 | (sort 35 | (apply append 36 | (map (lambda (chars/to) 37 | (let ((char-ranges (loc:integer-set-contents (car chars/to))) 38 | (to (cdr chars/to))) 39 | (map (lambda (char-range) 40 | (let ((entry (vector (car char-range) (cdr char-range) to))) 41 | (hash-ref transition-cache entry 42 | (lambda () 43 | (hash-set! transition-cache 44 | entry 45 | entry) 46 | entry)))) 47 | char-ranges))) 48 | all-chars/to)) 49 | (lambda (a b) 50 | (< (vector-ref a 0) (vector-ref b 0)))))) 51 | (vector-set! state-table from-state (list->vector flat-all-chars/to)))) 52 | (dfa-transitions dfa)) 53 | state-table)) 54 | 55 | 56 | (define loc:foldr is:foldr) 57 | 58 | ;; dfa->2d-table : dfa -> (same as build-lexer) 59 | (define (dfa->2d-table dfa) 60 | (let ( 61 | ;; char-table : (vector-of (union #f nat)) 62 | ;; The lexer table, one entry per state per char. 63 | ;; Each entry specifies a state to transition to. 64 | ;; #f indicates no transition 65 | (char-table (make-vector (* 256 (dfa-num-states dfa)) #f))) 66 | 67 | ;; Fill the char-table vector 68 | (for-each 69 | (lambda (trans) 70 | (let ((from-state (car trans))) 71 | (for-each (lambda (chars/to) 72 | (let ((to-state (cdr chars/to))) 73 | (loc:foldr (lambda (char _) 74 | (vector-set! char-table 75 | (bitwise-ior 76 | char 77 | (arithmetic-shift from-state 8)) 78 | to-state)) 79 | (void) 80 | (car chars/to)))) 81 | (cdr trans)))) 82 | (dfa-transitions dfa)) 83 | char-table)) 84 | 85 | 86 | ;; dfa->actions : dfa -> (vector-of (union #f syntax-object)) 87 | ;; The action for each final state, #f if the state isn't final 88 | (define (dfa->actions dfa) 89 | (let ((actions (make-vector (dfa-num-states dfa) #f))) 90 | (for-each (lambda (state/action) 91 | (vector-set! actions (car state/action) (cdr state/action))) 92 | (dfa-final-states/actions dfa)) 93 | actions)) 94 | 95 | ;; dfa->no-look : dfa -> (vector-of bool) 96 | ;; For each state whether the lexer can ignore the next input. 97 | ;; It can do this only if there are no transitions out of the 98 | ;; current state. 99 | (define (dfa->no-look dfa) 100 | (let ((no-look (make-vector (dfa-num-states dfa) #t))) 101 | (for-each (lambda (trans) 102 | (vector-set! no-look (car trans) #f)) 103 | (dfa-transitions dfa)) 104 | no-look)) 105 | 106 | (test-block ((d1 (make-dfa 1 1 (list) (list))) 107 | (d2 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) 108 | (list (cons 1 (list (cons (is:make-range 49 50) 1) 109 | (cons (is:make-range 51) 2))) 110 | (cons 2 (list (cons (is:make-range 49) 3)))))) 111 | (d3 (make-dfa 4 1 (list (cons 2 2) (cons 3 3)) 112 | (list (cons 1 (list (cons (is:make-range 100 200) 0) 113 | (cons (is:make-range 49 50) 1) 114 | (cons (is:make-range 51) 2))) 115 | (cons 2 (list (cons (is:make-range 49) 3))))))) 116 | ((dfa->2d-table d1) (make-vector 256 #f)) 117 | ((dfa->2d-table d2) (let ((v (make-vector 1024 #f))) 118 | (vector-set! v 305 1) 119 | (vector-set! v 306 1) 120 | (vector-set! v 307 2) 121 | (vector-set! v 561 3) 122 | v)) 123 | ((dfa->1d-table d1) (make-vector 1 #f)) 124 | ((dfa->1d-table d2) #(#f 125 | #(#(49 50 1) #(51 51 2)) 126 | #(#(49 49 3)) 127 | #f)) 128 | ((dfa->1d-table d3) #(#f 129 | #(#(49 50 1) #(51 51 2) #(100 200 0)) 130 | #(#(49 49 3)) 131 | #f)) 132 | ((dfa->actions d1) (vector #f)) 133 | ((dfa->actions d2) (vector #f #f 2 3)) 134 | ((dfa->no-look d1) (vector #t)) 135 | ((dfa->no-look d2) (vector #t #f #f #t))) 136 | 137 | ;; build-lexer : syntax-object list -> 138 | ;; (values table nat (vector-of (union #f syntax-object)) (vector-of bool) (list-of syntax-object)) 139 | ;; each syntax object has the form (re action) 140 | (define (build-lexer sos) 141 | (let* ((disappeared-uses (box null)) 142 | (s-re-acts (map (lambda (so) 143 | (cons (parse (stx-car so) disappeared-uses) 144 | (stx-car (stx-cdr so)))) 145 | sos)) 146 | 147 | (cache (make-cache)) 148 | 149 | (re-acts (map (lambda (s-re-act) 150 | (cons (->re (car s-re-act) cache) 151 | (cdr s-re-act))) 152 | s-re-acts)) 153 | 154 | (dfa (build-dfa re-acts cache)) 155 | (table (dfa->1d-table dfa))) 156 | ;(print-dfa dfa) 157 | #;(let ((num-states (vector-length table)) 158 | (num-vectors (length (filter values (vector->list table)))) 159 | (num-entries (apply + (map 160 | (lambda (x) (if x (vector-length x) 0)) 161 | (vector->list table)))) 162 | (num-different-entries 163 | (let ((ht (make-hasheq))) 164 | (for-each 165 | (lambda (x) 166 | (when x 167 | (for-each 168 | (lambda (y) 169 | (hash-set! ht y #t)) 170 | (vector->list x)))) 171 | (vector->list table)) 172 | (length (hash-map ht cons))))) 173 | (printf "~a states, ~aKB\n" 174 | num-states 175 | (/ (* 4.0 (+ 2 num-states (* 2 num-vectors) num-entries 176 | (* 5 num-different-entries))) 1024))) 177 | (values table (dfa-start-state dfa) (dfa->actions dfa) (dfa->no-look dfa) 178 | (unbox disappeared-uses)))) 179 | 180 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-lex/re.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | (require racket/list 3 | racket/match 4 | (prefix-in is: data/integer-set) 5 | "util.rkt") 6 | 7 | (provide ->re build-epsilon build-zero build-char-set build-concat 8 | build-repeat build-or build-and build-neg 9 | epsilonR? zeroR? char-setR? concatR? repeatR? orR? andR? negR? 10 | char-setR-chars concatR-re1 concatR-re2 repeatR-re repeatR-low repeatR-high 11 | orR-res andR-res negR-re 12 | re-nullable? re-index) 13 | 14 | ;; get-index : -> nat 15 | (define get-index (make-counter)) 16 | 17 | ;; An re is either 18 | ;; - (make-epsilonR bool nat) 19 | ;; - (make-zeroR bool nat) 20 | ;; - (make-char-setR bool nat char-set) 21 | ;; - (make-concatR bool nat re re) 22 | ;; - (make-repeatR bool nat nat nat-or-+inf.0 re) 23 | ;; - (make-orR bool nat (list-of re)) Must not directly contain any orRs 24 | ;; - (make-andR bool nat (list-of re)) Must not directly contain any andRs 25 | ;; - (make-negR bool nat re) 26 | ;; 27 | ;; Every re must have an index field globally different from all 28 | ;; other re index fields. 29 | (define-struct re (nullable? index) #:inspector (make-inspector)) 30 | (define-struct (epsilonR re) () #:inspector (make-inspector)) 31 | (define-struct (zeroR re) () #:inspector (make-inspector)) 32 | (define-struct (char-setR re) (chars) #:inspector (make-inspector)) 33 | (define-struct (concatR re) (re1 re2) #:inspector (make-inspector)) 34 | (define-struct (repeatR re) (low high re) #:inspector (make-inspector)) 35 | (define-struct (orR re) (res) #:inspector (make-inspector)) 36 | (define-struct (andR re) (res) #:inspector (make-inspector)) 37 | (define-struct (negR re) (re) #:inspector (make-inspector)) 38 | 39 | ;; e : re 40 | ;; The unique epsilon re 41 | (define e (make-epsilonR #t (get-index))) 42 | 43 | ;; z : re 44 | ;; The unique zero re 45 | (define z (make-zeroR #f (get-index))) 46 | 47 | 48 | ;; s-re = char constant 49 | ;; | string constant (sequence of characters) 50 | ;; | re a precompiled re 51 | ;; | (repetition low high s-re) repetition between low and high times (inclusive) 52 | ;; | (union s-re ...) 53 | ;; | (intersection s-re ...) 54 | ;; | (complement s-re) 55 | ;; | (concatenation s-re ...) 56 | ;; | (char-range rng rng) match any character between two (inclusive) 57 | ;; | (char-complement char-set) match any character not listed 58 | ;; low = natural-number 59 | ;; high = natural-number or +inf.0 60 | ;; rng = char or string with length 1 61 | ;; (concatenation) (repetition 0 0 x), and "" match the empty string. 62 | ;; (union) matches no strings. 63 | ;; (intersection) matches any string. 64 | 65 | (define loc:make-range is:make-range) 66 | (define loc:union is:union) 67 | (define loc:split is:split) 68 | (define loc:complement is:complement) 69 | 70 | ;; ->re : s-re cache -> re 71 | (define (->re exp cache) 72 | (match exp 73 | ((? char?) (build-char-set (loc:make-range (char->integer exp)) cache)) 74 | ((? string?) (->re `(concatenation ,@(string->list exp)) cache)) 75 | ((? re?) exp) 76 | (`(repetition ,low ,high ,r) 77 | (build-repeat low high (->re r cache) cache)) 78 | (`(union ,rs ...) 79 | (build-or (flatten-res (map (lambda (r) (->re r cache)) rs) 80 | orR? orR-res loc:union cache) 81 | cache)) 82 | (`(intersection ,rs ...) 83 | (build-and (flatten-res (map (lambda (r) (->re r cache)) rs) 84 | andR? andR-res (lambda (a b) 85 | (let-values (((i _ __) (loc:split a b))) i)) 86 | cache) 87 | cache)) 88 | (`(complement ,r) 89 | (build-neg (->re r cache) cache)) 90 | (`(concatenation ,rs ...) 91 | (foldr (lambda (x y) 92 | (build-concat (->re x cache) y cache)) 93 | e 94 | rs)) 95 | (`(char-range ,c1 ,c2) 96 | (let ((i1 (char->integer (if (string? c1) (string-ref c1 0) c1))) 97 | (i2 (char->integer (if (string? c2) (string-ref c2 0) c2)))) 98 | (if (<= i1 i2) 99 | (build-char-set (loc:make-range i1 i2) cache) 100 | z))) 101 | (`(char-complement ,crs ...) 102 | (let ((cs (->re `(union ,@crs) cache))) 103 | (cond 104 | ((zeroR? cs) (build-char-set (loc:make-range 0 max-char-num) cache)) 105 | ((char-setR? cs) 106 | (build-char-set (loc:complement (char-setR-chars cs) 0 max-char-num) cache)) 107 | (else z)))))) 108 | 109 | 110 | 111 | 112 | ;; flatten-res: (list-of re) (re -> bool) (re -> (list-of re)) 113 | ;; (char-set char-set -> char-set) cache -> (list-of re) 114 | ;; Takes all the char-sets in l and combines them into one char-set using the combine function. 115 | ;; Flattens out the values of type?. get-res only needs to function on things type? returns 116 | ;; true for. 117 | (define (flatten-res l type? get-res combine cache) 118 | (let loop ((res l) 119 | ;; chars : (union #f char-set) 120 | (chars #f) 121 | (no-chars null)) 122 | (cond 123 | ((null? res) 124 | (if chars 125 | (cons (build-char-set chars cache) no-chars) 126 | no-chars)) 127 | ((char-setR? (car res)) 128 | (if chars 129 | (loop (cdr res) (combine (char-setR-chars (car res)) chars) no-chars) 130 | (loop (cdr res) (char-setR-chars (car res)) no-chars))) 131 | ((type? (car res)) 132 | (loop (append (get-res (car res)) (cdr res)) chars no-chars)) 133 | (else (loop (cdr res) chars (cons (car res) no-chars)))))) 134 | 135 | ;; build-epsilon : -> re 136 | (define (build-epsilon) e) 137 | 138 | (define (build-zero) z) 139 | 140 | (define loc:integer-set-contents is:integer-set-contents) 141 | 142 | ;; build-char-set : char-set cache -> re 143 | (define (build-char-set cs cache) 144 | (let ((l (loc:integer-set-contents cs))) 145 | (cond 146 | ((null? l) z) 147 | (else 148 | (cache l 149 | (lambda () 150 | (make-char-setR #f (get-index) cs))))))) 151 | 152 | 153 | 154 | ;; build-concat : re re cache -> re 155 | (define (build-concat r1 r2 cache) 156 | (cond 157 | ((eq? e r1) r2) 158 | ((eq? e r2) r1) 159 | ((or (eq? z r1) (eq? z r2)) z) 160 | (else 161 | (cache (cons 'concat (cons (re-index r1) (re-index r2))) 162 | (lambda () 163 | (make-concatR (and (re-nullable? r1) (re-nullable? r2)) 164 | (get-index) 165 | r1 r2)))))) 166 | 167 | ;; build-repeat : nat nat-or-+inf.0 re cache -> re 168 | (define (build-repeat low high r cache) 169 | (let ((low (if (< low 0) 0 low))) 170 | (cond 171 | ((eq? r e) e) 172 | ((and (= 0 low) (or (= 0 high) (eq? z r))) e) 173 | ((and (= 1 low) (= 1 high)) r) 174 | ((and (repeatR? r) 175 | (eqv? (repeatR-high r) +inf.0) 176 | (or (= 0 (repeatR-low r)) 177 | (= 1 (repeatR-low r)))) 178 | (build-repeat (* low (repeatR-low r)) 179 | +inf.0 180 | (repeatR-re r) 181 | cache)) 182 | (else 183 | (cache (cons 'repeat (cons low (cons high (re-index r)))) 184 | (lambda () 185 | (make-repeatR (or (re-nullable? r) (= 0 low)) (get-index) low high r))))))) 186 | 187 | 188 | ;; build-or : (list-of re) cache -> re 189 | (define (build-or rs cache) 190 | (let ((rs 191 | (filter 192 | (lambda (x) (not (eq? x z))) 193 | (do-simple-equiv (replace rs orR? orR-res null) re-index)))) 194 | (cond 195 | ((null? rs) z) 196 | ((null? (cdr rs)) (car rs)) 197 | ((memq (build-neg z cache) rs) (build-neg z cache)) 198 | (else 199 | (cache (cons 'or (map re-index rs)) 200 | (lambda () 201 | (make-orR (ormap re-nullable? rs) (get-index) rs))))))) 202 | 203 | ;; build-and : (list-of re) cache -> re 204 | (define (build-and rs cache) 205 | (let ((rs (do-simple-equiv (replace rs andR? andR-res null) re-index))) 206 | (cond 207 | ((null? rs) (build-neg z cache)) 208 | ((null? (cdr rs)) (car rs)) 209 | ((memq z rs) z) 210 | (else 211 | (cache (cons 'and (map re-index rs)) 212 | (lambda () 213 | (make-andR (andmap re-nullable? rs) (get-index) rs))))))) 214 | 215 | ;; build-neg : re cache -> re 216 | (define (build-neg r cache) 217 | (cond 218 | ((negR? r) (negR-re r)) 219 | (else 220 | (cache (cons 'neg (re-index r)) 221 | (lambda () 222 | (make-negR (not (re-nullable? r)) (get-index) r)))))) 223 | 224 | ;; Tests for the build-functions 225 | (test-block ((c (make-cache)) 226 | (isc is:integer-set-contents) 227 | (r1 (build-char-set (is:make-range (char->integer #\1)) c)) 228 | (r2 (build-char-set (is:make-range (char->integer #\2)) c)) 229 | (r3 (build-char-set (is:make-range (char->integer #\3)) c)) 230 | (rc (build-concat r1 r2 c)) 231 | (rc2 (build-concat r2 r1 c)) 232 | (rr (build-repeat 0 +inf.0 rc c)) 233 | (ro (build-or `(,rr ,rc ,rr) c)) 234 | (ro2 (build-or `(,rc ,rr ,z) c)) 235 | (ro3 (build-or `(,rr ,rc) c)) 236 | (ro4 (build-or `(,(build-or `(,r1 ,r2) c) 237 | ,(build-or `(,r2 ,r3) c)) c)) 238 | (ra (build-and `(,rr ,rc ,rr) c)) 239 | (ra2 (build-and `(,rc ,rr) c)) 240 | (ra3 (build-and `(,rr ,rc) c)) 241 | (ra4 (build-and `(,(build-and `(,r3 ,r2) c) 242 | ,(build-and `(,r2 ,r1) c)) c)) 243 | (rn (build-neg z c)) 244 | (rn2 (build-neg r1 c))) 245 | 246 | ((isc (char-setR-chars r1)) (isc (is:make-range (char->integer #\1)))) 247 | ((isc (char-setR-chars r2)) (isc (is:make-range (char->integer #\2)))) 248 | ((isc (char-setR-chars r3)) (isc (is:make-range (char->integer #\3)))) 249 | ((build-char-set (is:make-range) c) z) 250 | ((build-concat r1 e c) r1) 251 | ((build-concat e r1 c) r1) 252 | ((build-concat r1 z c) z) 253 | ((build-concat z r1 c) z) 254 | ((build-concat r1 r2 c) rc) 255 | ((concatR-re1 rc) r1) 256 | ((concatR-re2 rc) r2) 257 | ((concatR-re1 rc2) r2) 258 | ((concatR-re2 rc2) r1) 259 | (ro ro2) 260 | (ro ro3) 261 | (ro4 (build-or `(,r1 ,r2 ,r3) c)) 262 | ((orR-res ro) (list rc rr)) 263 | ((orR-res ro4) (list r1 r2 r3)) 264 | ((build-or null c) z) 265 | ((build-or `(,r1 ,z) c) r1) 266 | ((build-repeat 0 +inf.0 rc c) rr) 267 | ((build-repeat 0 1 z c) e) 268 | ((build-repeat 0 0 rc c) e) 269 | ((build-repeat 0 +inf.0 z c) e) 270 | ((build-repeat -1 +inf.0 z c) e) 271 | ((build-repeat 0 +inf.0 (build-repeat 0 +inf.0 rc c) c) 272 | (build-repeat 0 +inf.0 rc c)) 273 | ((build-repeat 20 20 (build-repeat 0 +inf.0 rc c) c) 274 | (build-repeat 0 +inf.0 rc c)) 275 | ((build-repeat 20 20 (build-repeat 1 +inf.0 rc c) c) 276 | (build-repeat 20 +inf.0 rc c)) 277 | ((build-repeat 1 1 rc c) rc) 278 | ((repeatR-re rr) rc) 279 | (ra ra2) 280 | (ra ra3) 281 | (ra4 (build-and `(,r1 ,r2 ,r3) c)) 282 | ((andR-res ra) (list rc rr)) 283 | ((andR-res ra4) (list r1 r2 r3)) 284 | ((build-and null c) (build-neg z c)) 285 | ((build-and `(,r1 ,z) c) z) 286 | ((build-and `(,r1) c) r1) 287 | ((build-neg r1 c) (build-neg r1 c)) 288 | ((build-neg (build-neg r1 c) c) r1) 289 | ((negR-re (build-neg r2 c)) r2) 290 | ((re-nullable? r1) #f) 291 | ((re-nullable? rc) #f) 292 | ((re-nullable? (build-concat rr rr c)) #t) 293 | ((re-nullable? rr) #t) 294 | ((re-nullable? (build-repeat 0 1 rc c)) #t) 295 | ((re-nullable? (build-repeat 1 2 rc c)) #f) 296 | ((re-nullable? (build-repeat 1 2 (build-or (list e r1) c) c)) #t) 297 | ((re-nullable? ro) #t) 298 | ((re-nullable? (build-or `(,r1 ,r2) c)) #f) 299 | ((re-nullable? (build-and `(,r1 ,e) c)) #f) 300 | ((re-nullable? (build-and `(,rr ,e) c)) #t) 301 | ((re-nullable? (build-neg r1 c)) #t) 302 | ((re-nullable? (build-neg rr c)) #f)) 303 | 304 | (test-block ((c (make-cache)) 305 | (isc is:integer-set-contents) 306 | (r1 (->re #\1 c)) 307 | (r2 (->re #\2 c)) 308 | (r3-5 (->re '(char-range #\3 #\5) c)) 309 | (r4 (build-or `(,r1 ,r2) c)) 310 | (r5 (->re `(union ,r3-5 #\7) c)) 311 | (r6 (->re #\6 c))) 312 | ((flatten-res null orR? orR-res is:union c) null) 313 | ((isc (char-setR-chars (car (flatten-res `(,r1) orR? orR-res is:union c)))) 314 | (isc (is:make-range (char->integer #\1)))) 315 | ((isc (char-setR-chars (car (flatten-res `(,r4) orR? orR-res is:union c)))) 316 | (isc (is:make-range (char->integer #\1) (char->integer #\2)))) 317 | ((isc (char-setR-chars (car (flatten-res `(,r6 ,r5 ,r4 ,r3-5 ,r2 ,r1) 318 | orR? orR-res is:union c)))) 319 | (isc (is:make-range (char->integer #\1) (char->integer #\7)))) 320 | ((flatten-res `(,r1 ,r2) andR? andR-res (lambda (x y) 321 | (let-values (((i _ __) 322 | (is:split x y))) 323 | i)) 324 | c) 325 | (list z))) 326 | 327 | ;; ->re 328 | (test-block ((c (make-cache)) 329 | (isc is:integer-set-contents) 330 | (r (->re #\a c)) 331 | (rr (->re `(concatenation ,r ,r) c)) 332 | (rrr (->re `(concatenation ,r ,rr) c)) 333 | (rrr* (->re `(repetition 0 +inf.0 ,rrr) c))) 334 | ((isc (char-setR-chars r)) (isc (is:make-range (char->integer #\a)))) 335 | ((->re "" c) e) 336 | ((->re "asdf" c) (->re `(concatenation #\a #\s #\d #\f) c)) 337 | ((->re r c) r) 338 | ((->re `(repetition 0 +inf.0 ,r) c) (build-repeat 0 +inf.0 r c)) 339 | ((->re `(repetition 1 +inf.0 ,r) c) (build-repeat 1 +inf.0 r c)) 340 | ((->re `(repetition 0 1 ,r) c) (build-repeat 0 1 r c)) 341 | ((->re `(repetition 0 1 ,rrr*) c) rrr*) 342 | ((->re `(union (union (char-range #\a #\c) 343 | (char-complement (char-range #\000 #\110) 344 | (char-range #\112 ,(integer->char max-char-num)))) 345 | (union (repetition 0 +inf.0 #\2))) c) 346 | (build-or (list (build-char-set (is:union (is:make-range 73) 347 | (is:make-range 97 99)) 348 | c) 349 | (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) 350 | c)) 351 | ((->re `(union ,rr ,rrr) c) (build-or (list rr rrr) c)) 352 | ((->re `(union ,r) c) r) 353 | ((->re `(union) c) z) 354 | ((->re `(intersection (intersection #\111 355 | (char-complement (char-range #\000 #\110) 356 | (char-range #\112 ,(integer->char max-char-num)))) 357 | (intersection (repetition 0 +inf.0 #\2))) c) 358 | (build-and (list (build-char-set (is:make-range 73) c) 359 | (build-repeat 0 +inf.0 (build-char-set (is:make-range 50) c) c)) 360 | c)) 361 | ((->re `(intersection (intersection #\000 (char-complement (char-range #\000 #\110) 362 | (char-range #\112 ,(integer->char max-char-num)))) 363 | (intersection (repetition 0 +inf.0 #\2))) c) 364 | z) 365 | ((->re `(intersection ,rr ,rrr) c) (build-and (list rr rrr) c)) 366 | ((->re `(intersection ,r) c) r) 367 | ((->re `(intersection) c) (build-neg z c)) 368 | ((->re `(complement ,r) c) (build-neg r c)) 369 | ((->re `(concatenation) c) e) 370 | ((->re `(concatenation ,rrr*) c) rrr*) 371 | (rr (build-concat r r c)) 372 | ((->re `(concatenation ,r ,rr ,rrr) c) 373 | (build-concat r (build-concat rr rrr c) c)) 374 | ((isc (char-setR-chars (->re `(char-range #\1 #\1) c))) (isc (is:make-range 49))) 375 | ((isc (char-setR-chars (->re `(char-range #\1 #\9) c))) (isc (is:make-range 49 57))) 376 | ((isc (char-setR-chars (->re `(char-range "1" "1") c))) (isc (is:make-range 49))) 377 | ((isc (char-setR-chars (->re `(char-range "1" "9") c))) (isc (is:make-range 49 57))) 378 | ((->re `(char-range "9" "1") c) z) 379 | ((isc (char-setR-chars (->re `(char-complement) c))) 380 | (isc (char-setR-chars (->re `(char-range #\000 ,(integer->char max-char-num)) c)))) 381 | ((isc (char-setR-chars (->re `(char-complement #\001 (char-range #\002 ,(integer->char max-char-num))) c))) 382 | (isc (is:make-range 0))) 383 | ) 384 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-lex/stx.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require "util.rkt" 4 | syntax/id-table) 5 | 6 | (provide parse) 7 | 8 | (define (bad-args stx num) 9 | (raise-syntax-error 10 | #f 11 | (format "incorrect number of arguments (should have ~a)" num) 12 | stx)) 13 | 14 | ;; char-range-arg: syntax-object syntax-object -> nat 15 | ;; If c contains is a character or length 1 string, returns the integer 16 | ;; for the character. Otherwise raises a syntax error. 17 | (define (char-range-arg stx containing-stx) 18 | (let ((c (syntax-e stx))) 19 | (cond 20 | ((char? c) (char->integer c)) 21 | ((and (string? c) (= (string-length c) 1)) 22 | (char->integer (string-ref c 0))) 23 | (else 24 | (raise-syntax-error 25 | #f 26 | "not a char or single-char string" 27 | containing-stx stx))))) 28 | (module+ test 29 | (check-equal? (char-range-arg #'#\1 #'here) (char->integer #\1)) 30 | (check-equal? (char-range-arg #'"1" #'here) (char->integer #\1))) 31 | 32 | (define orig-insp (variable-reference->module-declaration-inspector 33 | (#%variable-reference))) 34 | (define (disarm stx) 35 | (syntax-disarm stx orig-insp)) 36 | 37 | ;; parse : syntax-object (box (list-of syntax-object)) -> s-re (see re.rkt) 38 | ;; checks for errors and generates the plain s-exp form for s 39 | ;; Expands lex-abbrevs and applies lex-trans. 40 | (define (parse stx disappeared-uses) 41 | (let loop ([stx stx] 42 | [disappeared-uses disappeared-uses] 43 | ;; seen-lex-abbrevs: id-table 44 | [seen-lex-abbrevs (make-immutable-free-id-table)]) 45 | (let ([recur (lambda (s) 46 | (loop (syntax-rearm s stx) 47 | disappeared-uses 48 | seen-lex-abbrevs))] 49 | [recur/abbrev (lambda (s id) 50 | (loop (syntax-rearm s stx) 51 | disappeared-uses 52 | (free-id-table-set seen-lex-abbrevs id id)))]) 53 | (syntax-case (disarm stx) (repetition union intersection complement concatenation 54 | char-range char-complement) 55 | (_ 56 | (identifier? stx) 57 | (let ((expansion (syntax-local-value stx (lambda () #f)))) 58 | (unless (lex-abbrev? expansion) 59 | (raise-syntax-error 'regular-expression 60 | "undefined abbreviation" 61 | stx)) 62 | ;; Check for cycles. 63 | (when (free-id-table-ref seen-lex-abbrevs stx (lambda () #f)) 64 | (raise-syntax-error 'regular-expression 65 | "illegal lex-abbrev cycle detected" 66 | stx 67 | #f 68 | (list (free-id-table-ref seen-lex-abbrevs stx)))) 69 | (set-box! disappeared-uses (cons stx (unbox disappeared-uses))) 70 | (recur/abbrev ((lex-abbrev-get-abbrev expansion)) stx))) 71 | (_ 72 | (or (char? (syntax-e stx)) (string? (syntax-e stx))) 73 | (syntax-e stx)) 74 | ((repetition arg ...) 75 | (let ((arg-list (syntax->list (syntax (arg ...))))) 76 | (unless (= 3 (length arg-list)) 77 | (bad-args stx 2)) 78 | (let ((low (syntax-e (car arg-list))) 79 | (high (syntax-e (cadr arg-list))) 80 | (re (caddr arg-list))) 81 | (unless (and (number? low) (exact? low) (integer? low) (>= low 0)) 82 | (raise-syntax-error #f 83 | "not a non-negative exact integer" 84 | stx 85 | (car arg-list))) 86 | (unless (or (and (number? high) (exact? high) (integer? high) (>= high 0)) 87 | (eqv? high +inf.0)) 88 | (raise-syntax-error #f 89 | "not a non-negative exact integer or +inf.0" 90 | stx 91 | (cadr arg-list))) 92 | (unless (<= low high) 93 | (raise-syntax-error 94 | #f 95 | "the first argument is not less than or equal to the second argument" 96 | stx)) 97 | `(repetition ,low ,high ,(recur re))))) 98 | ((union re ...) 99 | `(union ,@(map recur (syntax->list (syntax (re ...)))))) 100 | ((intersection re ...) 101 | `(intersection ,@(map recur (syntax->list (syntax (re ...)))))) 102 | ((complement re ...) 103 | (let ((re-list (syntax->list (syntax (re ...))))) 104 | (unless (= 1 (length re-list)) 105 | (bad-args stx 1)) 106 | `(complement ,(recur (car re-list))))) 107 | ((concatenation re ...) 108 | `(concatenation ,@(map recur (syntax->list (syntax (re ...)))))) 109 | ((char-range arg ...) 110 | (let ((arg-list (syntax->list (syntax (arg ...))))) 111 | (unless (= 2 (length arg-list)) 112 | (bad-args stx 2)) 113 | (let ((i1 (char-range-arg (car arg-list) stx)) 114 | (i2 (char-range-arg (cadr arg-list) stx))) 115 | (if (<= i1 i2) 116 | `(char-range ,(integer->char i1) ,(integer->char i2)) 117 | (raise-syntax-error 118 | #f 119 | "the first argument does not precede or equal second argument" 120 | stx))))) 121 | ((char-complement arg ...) 122 | (let ((arg-list (syntax->list (syntax (arg ...))))) 123 | (unless (= 1 (length arg-list)) 124 | (bad-args stx 1)) 125 | (let ((parsed (recur (car arg-list)))) 126 | (unless (char-set? parsed) 127 | (raise-syntax-error #f 128 | "not a character set" 129 | stx 130 | (car arg-list))) 131 | `(char-complement ,parsed)))) 132 | ((op form ...) 133 | (identifier? (syntax op)) 134 | (let* ((o (syntax op)) 135 | (expansion (syntax-local-value o (lambda () #f)))) 136 | (set-box! disappeared-uses (cons o (unbox disappeared-uses))) 137 | (cond 138 | ((lex-trans? expansion) 139 | (recur ((lex-trans-f expansion) (disarm stx)))) 140 | (expansion 141 | (raise-syntax-error 'regular-expression 142 | "not a lex-trans" 143 | stx)) 144 | (else 145 | (raise-syntax-error 'regular-expression 146 | "undefined operator" 147 | stx))))) 148 | (_ 149 | (raise-syntax-error 150 | 'regular-expression 151 | "not a char, string, identifier, or (op args ...)" 152 | stx)))))) 153 | 154 | 155 | 156 | ;; char-set? : s-re -> bool 157 | ;; A char-set is an re that matches only strings of length 1. 158 | ;; char-set? is conservative. 159 | (define (char-set? s-re) 160 | (cond 161 | ((char? s-re) #t) 162 | ((string? s-re) (= (string-length s-re) 1)) 163 | ((list? s-re) 164 | (let ((op (car s-re))) 165 | (case op 166 | ((union intersection) (andmap char-set? (cdr s-re))) 167 | ((char-range char-complement) #t) 168 | ((repetition) 169 | (and (= 1 (cadr s-re) (caddr s-re)) (char-set? (cadddr s-re)))) 170 | ((concatenation) 171 | (and (= 2 (length s-re)) (char-set? (cadr s-re)))) 172 | (else #f)))) 173 | (else #f))) 174 | 175 | (module+ test 176 | (require rackunit)) 177 | (module+ test 178 | (check-equal? (char-set? #\a) #t) 179 | (check-equal? (char-set? "12") #f) 180 | (check-equal? (char-set? "1") #t) 181 | (check-equal? (char-set? '(repetition 1 2 #\1)) #f) 182 | (check-equal? (char-set? '(repetition 1 1 "12")) #f) 183 | (check-equal? (char-set? '(repetition 1 1 "1")) #t) 184 | (check-equal? (char-set? '(repetition 6 6 "1")) #f) 185 | (check-equal? (char-set? '(union "1" "2" "3")) #t) 186 | (check-equal? (char-set? '(union "1" "" "3")) #f) 187 | (check-equal? (char-set? '(intersection "1" "2" (union "3" "4"))) #t) 188 | (check-equal? (char-set? '(intersection "1" "")) #f) 189 | (check-equal? (char-set? '(complement "1")) #f) 190 | (check-equal? (char-set? '(concatenation "1" "2")) #f) 191 | (check-equal? (char-set? '(concatenation "" "2")) #f) 192 | (check-equal? (char-set? '(concatenation "1")) #t) 193 | (check-equal? (char-set? '(concatenation "12")) #f) 194 | (check-equal? (char-set? '(char-range #\1 #\2)) #t) 195 | (check-equal? (char-set? '(char-complement #\1)) #t)) 196 | 197 | ;; yikes... these test cases all have the wrong arity, now. 198 | ;; and by "now", I mean it's been broken since before we 199 | ;; moved to git. 200 | (module+ test 201 | (check-equal? (parse #'#\a null) #\a) 202 | (check-equal? (parse #'"1" null) "1") 203 | (check-equal? (parse #'(repetition 1 1 #\1) null) 204 | '(repetition 1 1 #\1)) 205 | (check-equal? (parse #'(repetition 0 +inf.0 #\1) null) '(repetition 0 +inf.0 #\1)) 206 | (check-equal? (parse #'(union #\1 (union "2") (union)) null) 207 | '(union #\1 (union "2") (union))) 208 | (check-equal? (parse #'(intersection #\1 (intersection "2") (intersection)) 209 | null) 210 | '(intersection #\1 (intersection "2") (intersection))) 211 | (check-equal? (parse #'(complement (union #\1 #\2)) 212 | null) 213 | '(complement (union #\1 #\2))) 214 | (check-equal? (parse #'(concatenation "1" "2" (concatenation)) null) 215 | '(concatenation "1" "2" (concatenation))) 216 | (check-equal? (parse #'(char-range "1" #\1) null) '(char-range #\1 #\1)) 217 | (check-equal? (parse #'(char-range #\1 "1") null) '(char-range #\1 #\1)) 218 | (check-equal? (parse #'(char-range "1" "3") null) '(char-range #\1 #\3)) 219 | (check-equal? (parse #'(char-complement (union "1" "2")) null) 220 | '(char-complement (union "1" "2"))) 221 | (check-equal? (parse #'(char-complement (repetition 1 1 "5")) null) 222 | '(char-complement (repetition 1 1 "5"))) 223 | (check-exn #rx"not a character set" 224 | (λ () (parse #'(char-complement 225 | (union "1" (repetition 2 2 "5"))) null)))) 226 | ; ) 227 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-lex/token-syntax.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; The things needed at compile time to handle definition of tokens 4 | 5 | (provide make-terminals-def terminals-def-t terminals-def? 6 | make-e-terminals-def e-terminals-def-t e-terminals-def?) 7 | (define-struct terminals-def (t)) 8 | (define-struct e-terminals-def (t)) 9 | 10 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-lex/token.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require (for-syntax racket/base 4 | "token-syntax.rkt")) 5 | 6 | ;; Defining tokens 7 | 8 | (provide define-tokens define-empty-tokens make-token token? 9 | (protect-out (rename-out [token-name real-token-name] 10 | [token-value real-token-value])) 11 | (rename-out [token-name* token-name] 12 | [token-value* token-value]) 13 | (struct-out position) 14 | (struct-out position-token)) 15 | 16 | 17 | ;; A token is either 18 | ;; - symbol 19 | ;; - (make-token symbol any) 20 | (define-struct token (name value) #:inspector (make-inspector)) 21 | 22 | ;; token-name*: token -> symbol 23 | (define (token-name* t) 24 | (cond 25 | ((symbol? t) t) 26 | ((token? t) (token-name t)) 27 | (else (raise-type-error 28 | 'token-name 29 | "symbol or struct:token" 30 | 0 31 | t)))) 32 | 33 | ;; token-value*: token -> any 34 | (define (token-value* t) 35 | (cond 36 | ((symbol? t) #f) 37 | ((token? t) (token-value t)) 38 | (else (raise-type-error 39 | 'token-value 40 | "symbol or struct:token" 41 | 0 42 | t)))) 43 | 44 | (define-for-syntax (make-ctor-name n) 45 | (datum->syntax n 46 | (string->symbol (format "token-~a" (syntax-e n))) 47 | n 48 | n)) 49 | 50 | (define-for-syntax (make-define-tokens empty?) 51 | (lambda (stx) 52 | (syntax-case stx () 53 | ((_ name (token ...)) 54 | (andmap identifier? (syntax->list (syntax (token ...)))) 55 | (with-syntax (((marked-token ...) 56 | (map values #;(make-syntax-introducer) 57 | (syntax->list (syntax (token ...)))))) 58 | (quasisyntax/loc stx 59 | (begin 60 | (define-syntax name 61 | #,(if empty? 62 | #'(make-e-terminals-def (quote-syntax (marked-token ...))) 63 | #'(make-terminals-def (quote-syntax (marked-token ...))))) 64 | #,@(map 65 | (lambda (n) 66 | (when (eq? (syntax-e n) 'error) 67 | (raise-syntax-error 68 | #f 69 | "Cannot define a token named error." 70 | stx)) 71 | (if empty? 72 | #`(define (#,(make-ctor-name n)) 73 | '#,n) 74 | #`(define (#,(make-ctor-name n) x) 75 | (make-token '#,n x)))) 76 | (syntax->list (syntax (token ...)))) 77 | #;(define marked-token #f) #;...)))) 78 | ((_ ...) 79 | (raise-syntax-error 80 | #f 81 | "must have the form (define-tokens name (identifier ...)) or (define-empty-tokens name (identifier ...))" 82 | stx))))) 83 | 84 | (define-syntax define-tokens (make-define-tokens #f)) 85 | (define-syntax define-empty-tokens (make-define-tokens #t)) 86 | 87 | (define-struct position (offset line col) 88 | #:mutable ; backward compatibility 89 | #:transparent) 90 | (define-struct position-token (token start-pos end-pos) 91 | #:mutable ; backward compatibility 92 | #:transparent) 93 | 94 | 95 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-lex/unicode-chars.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require racket/promise 4 | "util.rkt") 5 | 6 | (provide (all-defined-out)) 7 | 8 | ;; mapped-chars : (listof (list nat nat bool)) 9 | (define mapped-chars (make-known-char-range-list)) 10 | 11 | ;; get-chars-for-x : (nat -> bool) (listof (list nat nat bool)) -> (listof (cons nat nat)) 12 | (define (get-chars-for char-x? mapped-chars) 13 | (cond 14 | ((null? mapped-chars) null) 15 | (else 16 | (let* ((range (car mapped-chars)) 17 | (low (car range)) 18 | (high (cadr range)) 19 | (x (char-x? low))) 20 | (cond 21 | ((caddr range) 22 | (if x 23 | (cons (cons low high) 24 | (get-chars-for char-x? (cdr mapped-chars))) 25 | (get-chars-for char-x? (cdr mapped-chars)))) 26 | (else 27 | (let loop ((range-start low) 28 | (i (car range)) 29 | (parity x)) 30 | (cond 31 | ((> i high) 32 | (if parity 33 | (cons (cons range-start high) (get-chars-for char-x? (cdr mapped-chars))) 34 | (get-chars-for char-x? (cdr mapped-chars)))) 35 | ((eq? parity (char-x? i)) 36 | (loop range-start (add1 i) parity)) 37 | (parity 38 | (cons (cons range-start (sub1 i)) (loop i (add1 i) #f))) 39 | (else 40 | (loop i (add1 i) #t)))))))))) 41 | 42 | (define (compute-ranges x?) 43 | (delay (get-chars-for (lambda (x) (x? (integer->char x))) mapped-chars))) 44 | 45 | (define alphabetic-ranges (compute-ranges char-alphabetic?)) ;; 325 46 | (define lower-case-ranges (compute-ranges char-lower-case?)) ;; 405 47 | (define upper-case-ranges (compute-ranges char-upper-case?)) ;; 380 48 | (define title-case-ranges (compute-ranges char-title-case?)) ;; 10 49 | (define numeric-ranges (compute-ranges char-numeric?)) ;; 47 50 | (define symbolic-ranges (compute-ranges char-symbolic?)) ;; 153 51 | (define punctuation-ranges (compute-ranges char-punctuation?)) ;; 86 52 | (define graphic-ranges (compute-ranges char-graphic?)) ;; 401 53 | (define whitespace-ranges (compute-ranges char-whitespace?)) ;; 10 54 | (define blank-ranges (compute-ranges char-blank?)) ;; 9 55 | #;(define hexadecimal-ranges (compute-ranges char-hexadecimal?)) 56 | (define iso-control-ranges (compute-ranges char-iso-control?)) ;; 2 57 | 58 | 59 | 60 | (module+ test 61 | (require rackunit) 62 | (check-equal? (get-chars-for odd? '()) '()) 63 | (check-equal? (get-chars-for odd? '((1 4 #f) (8 13 #f))) 64 | '((1 . 1) (3 . 3) (9 . 9) (11 . 11) (13 . 13))) 65 | (check-equal? (get-chars-for (lambda (x) 66 | (odd? (quotient x 10))) 67 | '((1 5 #t) (17 19 #t) (21 51 #f))) 68 | '((17 . 19) (30 . 39) (50 . 51)))) 69 | 70 | 71 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-lex/util.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (provide (all-defined-out)) 4 | 5 | (define max-char-num #x10FFFF) 6 | 7 | (define-struct lex-abbrev (get-abbrev)) 8 | (define-struct lex-trans (f)) 9 | 10 | (module+ test 11 | (require rackunit)) 12 | 13 | #;(define-syntax test-block 14 | (syntax-rules () 15 | ((_ defs (code right-ans) ...) 16 | (let* defs 17 | (let ((real-ans code)) 18 | (unless (equal? real-ans right-ans) 19 | (printf "Test failed: ~e gave ~e. Expected ~e\n" 20 | 'code real-ans 'right-ans))) ...)))) 21 | 22 | (define-syntax test-block 23 | (syntax-rules () 24 | ((_ x ...) (void)))) 25 | 26 | 27 | ;; A cache is (X ( -> Y) -> Y) 28 | ;; make-cache : -> cache 29 | ;; table map Xs to Ys. If key is mapped, its value is returned. 30 | ;; Otherwise, build is invoked and its result is placed in the table and 31 | ;; returned. 32 | ;; Xs are compared with equal? 33 | (define (make-cache) 34 | (let ((table (make-hash))) 35 | (lambda (key build) 36 | (hash-ref table key 37 | (lambda () 38 | (let ((new (build))) 39 | (hash-set! table key new) 40 | new)))))) 41 | 42 | (module+ test 43 | (define cache (make-cache)) 44 | (check-equal? (cache '(s 1 2) (lambda () 9)) 9) 45 | (check-equal? (cache '(s 2 1) (lambda () 8)) 8) 46 | (check-equal? (cache '(s 1 2) (lambda () 1)) 9) 47 | (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) 48 | (lambda () 22)) 22) 49 | (check-equal? (cache (cons 's (cons 0 (cons +inf.0 10))) 50 | (lambda () 1)) 22)) 51 | 52 | 53 | 54 | ;; make-counter : -> -> nat 55 | ;; makes a function that returns a higher number by 1, each time 56 | ;; it is called. 57 | (define (make-counter) 58 | (let ((counter 0)) 59 | (lambda () 60 | (begin0 61 | counter 62 | (set! counter (add1 counter)))))) 63 | 64 | (module+ test 65 | (define c (make-counter)) 66 | (define d (make-counter)) 67 | (check-equal? (c) 0) 68 | (check-equal? (d) 0) 69 | (check-equal? (c) 1) 70 | (check-equal? (d) 1) 71 | (check-equal? (c) 2)) 72 | 73 | 74 | ;; remove-dups : (list-of X) (X -> number) -> (list-of X) 75 | ;; removes the entries from l that have the same index as a 76 | ;; previous entry. l must be grouped by indexes. 77 | (define (remove-dups l index acc) 78 | (cond 79 | ((null? l) (reverse acc)) 80 | ((null? acc) (remove-dups (cdr l) index (cons (car l) acc))) 81 | ((= (index (car acc)) (index (car l))) 82 | (remove-dups (cdr l) index acc)) 83 | (else 84 | (remove-dups (cdr l) index (cons (car l) acc))))) 85 | 86 | 87 | (module+ test 88 | (check-equal? (remove-dups '((1 2) (2 2) (1 3) (1 4) 89 | (100 4) (0 5)) cadr null) 90 | '((1 2) (1 3) (1 4) (0 5))) 91 | (check-equal? (remove-dups null error null) null)) 92 | 93 | ;; do-simple-equiv : (list-of X) (X -> nat) -> (list-of X) 94 | ;; Sorts l according to index and removes the entries with duplicate 95 | ;; indexes. 96 | (define (do-simple-equiv l index) 97 | (let ((ordered (sort l (lambda (a b) (< (index a) (index b)))))) 98 | (remove-dups ordered index null))) 99 | 100 | (module+ test 101 | (check-equal? (do-simple-equiv '((2 2) (1 4) (1 2) 102 | (100 4) (1 3) (0 5)) 103 | cadr) 104 | '((2 2) (1 3) (1 4) (0 5))) 105 | (check-equal? (do-simple-equiv null error) null)) 106 | 107 | ;; replace : (list-of X) (X -> bool) (X -> (list-of X)) (list-of X) -> 108 | ;; (list-of X) 109 | ;; If (pred? r) for some r in l, splice (get r) in place of r in the resulting 110 | ;; list. 111 | (define (replace l pred? get acc) 112 | (cond 113 | ((null? l) acc) 114 | ((pred? (car l)) (replace (cdr l) pred? get (append (get (car l)) acc))) 115 | (else (replace (cdr l) pred? get (cons (car l) acc))))) 116 | 117 | 118 | (module+ test 119 | (check-equal? (replace null void (lambda () (list 1)) null) null) 120 | (check-equal? (replace '(1 2 3 4 3 5) 121 | (lambda (x) (= x 3)) 122 | (lambda (x) (list 1 2 3)) 123 | null) 124 | '(5 1 2 3 4 1 2 3 2 1))) 125 | 126 | 127 | 128 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-yacc/grammar.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; Constructs to create and access grammars, the internal 4 | ;; representation of the input to the parser generator. 5 | 6 | (require racket/class 7 | (except-in racket/list 8 | remove-duplicates) 9 | "yacc-helper.rkt" 10 | racket/contract) 11 | 12 | ;; Each production has a unique index 0 <= index <= number of productions 13 | (define-struct prod (lhs rhs [index #:mutable] prec action) #:inspector (make-inspector)) 14 | 15 | ;; The dot-pos field is the index of the element in the rhs 16 | ;; of prod that the dot immediately precedes. 17 | ;; Thus 0 <= dot-pos <= (vector-length rhs). 18 | (define-struct item (prod dot-pos) #:inspector (make-inspector)) 19 | 20 | ;; gram-sym = (union term? non-term?) 21 | ;; Each term has a unique index 0 <= index < number of terms 22 | ;; Each non-term has a unique index 0 <= index < number of non-terms 23 | (define-struct term (sym [index #:mutable] prec) #:inspector (make-inspector)) 24 | (define-struct non-term (sym [index #:mutable]) #:inspector (make-inspector)) 25 | 26 | ;; a precedence declaration. 27 | (define-struct prec (num assoc) #:inspector (make-inspector)) 28 | 29 | (provide/contract 30 | (make-item (prod? (or/c #f natural-number/c) . -> . item?)) 31 | (make-term (symbol? (or/c #f natural-number/c) (or/c prec? #f) . -> . term?)) 32 | (make-non-term (symbol? (or/c #f natural-number/c) . -> . non-term?)) 33 | (make-prec (natural-number/c (or/c 'left 'right 'nonassoc) . -> . prec?)) 34 | (make-prod (non-term? (vectorof (or/c non-term? term?)) 35 | (or/c #f natural-number/c) (or/c #f prec?) syntax? . -> . prod?))) 36 | 37 | (provide 38 | 39 | 40 | ;; Things that work on items 41 | start-item? item-prod item->string 42 | sym-at-dot move-dot-right itemstring 46 | non-term? term? non-termbit-vector term-index non-term-index 48 | 49 | ;; Things that work on precs 50 | prec-num prec-assoc 51 | 52 | grammar% 53 | 54 | ;; Things that work on productions 55 | prod-index prod-prec prod-rhs prod-lhs prod-action) 56 | 57 | 58 | ;;---------------------- LR items -------------------------- 59 | 60 | ;; item bool 61 | ;; Lexicographic comparison on two items. 62 | (define (item bool 72 | ;; The start production always has index 0 73 | (define (start-item? i) 74 | (= 0 (non-term-index (prod-lhs (item-prod i))))) 75 | 76 | 77 | ;; move-dot-right: LR-item -> LR-item | #f 78 | ;; moves the dot to the right in the item, unless it is at its 79 | ;; rightmost, then it returns false 80 | (define (move-dot-right i) 81 | (cond 82 | ((= (item-dot-pos i) (vector-length (prod-rhs (item-prod i)))) #f) 83 | (else (make-item (item-prod i) 84 | (add1 (item-dot-pos i)))))) 85 | 86 | ;; sym-at-dot: LR-item -> gram-sym | #f 87 | ;; returns the symbol after the dot in the item or #f if there is none 88 | (define (sym-at-dot i) 89 | (let ((dp (item-dot-pos i)) 90 | (rhs (prod-rhs (item-prod i)))) 91 | (cond 92 | ((= dp (vector-length rhs)) #f) 93 | (else (vector-ref rhs dp))))) 94 | 95 | 96 | ;; print-item: LR-item -> 97 | (define (item->string it) 98 | (let ((print-sym (lambda (i) 99 | (let ((gs (vector-ref (prod-rhs (item-prod it)) i))) 100 | (cond 101 | ((term? gs) (format "~a " (term-sym gs))) 102 | (else (format "~a " (non-term-sym gs)))))))) 103 | (string-append 104 | (format "~a -> " (non-term-sym (prod-lhs (item-prod it)))) 105 | (let loop ((i 0)) 106 | (cond 107 | ((= i (vector-length (prod-rhs (item-prod it)))) 108 | (if (= i (item-dot-pos it)) 109 | ". " 110 | "")) 111 | ((= i (item-dot-pos it)) 112 | (string-append ". " (print-sym i) (loop (add1 i)))) 113 | (else (string-append (print-sym i) (loop (add1 i))))))))) 114 | 115 | ;; --------------------- Grammar Symbols -------------------------- 116 | 117 | (define (non-termstring gs) 134 | (symbol->string (gram-sym-symbol gs))) 135 | 136 | ;; term-list->bit-vector: term list -> int 137 | ;; Creates a number where the nth bit is 1 if the term with index n is in 138 | ;; the list, and whose nth bit is 0 otherwise 139 | (define (term-list->bit-vector terms) 140 | (cond 141 | ((null? terms) 0) 142 | (else 143 | (bitwise-ior (arithmetic-shift 1 (term-index (car terms))) (term-list->bit-vector (cdr terms)))))) 144 | 145 | 146 | ;; ------------------------- Grammar ------------------------------ 147 | 148 | (define grammar% 149 | (class object% 150 | (super-instantiate ()) 151 | ;; prods: production list list 152 | ;; where there is one production list per non-term 153 | (init prods) 154 | ;; init-prods: production list 155 | ;; The productions parsing can start from 156 | ;; nullable-non-terms is indexed by the non-term-index and is true iff non-term is nullable 157 | (init-field init-prods terms non-terms end-terms) 158 | 159 | ;; list of all productions 160 | (define all-prods (apply append prods)) 161 | (define num-prods (length all-prods)) 162 | (define num-terms (length terms)) 163 | (define num-non-terms (length non-terms)) 164 | 165 | (let ((count 0)) 166 | (for-each 167 | (lambda (nt) 168 | (set-non-term-index! nt count) 169 | (set! count (add1 count))) 170 | non-terms)) 171 | 172 | (let ((count 0)) 173 | (for-each 174 | (lambda (t) 175 | (set-term-index! t count) 176 | (set! count (add1 count))) 177 | terms)) 178 | 179 | (let ((count 0)) 180 | (for-each 181 | (lambda (prod) 182 | (set-prod-index! prod count) 183 | (set! count (add1 count))) 184 | all-prods)) 185 | 186 | ;; indexed by the index of the non-term - contains the list of productions for that non-term 187 | (define nt->prods 188 | (let ((v (make-vector (length prods) #f))) 189 | (for-each (lambda (prods) 190 | (vector-set! v (non-term-index (prod-lhs (car prods))) prods)) 191 | prods) 192 | v)) 193 | 194 | (define nullable-non-terms 195 | (nullable all-prods num-non-terms)) 196 | 197 | (define/public (get-num-terms) num-terms) 198 | (define/public (get-num-non-terms) num-non-terms) 199 | 200 | (define/public (get-prods-for-non-term nt) 201 | (vector-ref nt->prods (non-term-index nt))) 202 | (define/public (get-prods) all-prods) 203 | (define/public (get-init-prods) init-prods) 204 | 205 | (define/public (get-terms) terms) 206 | (define/public (get-non-terms) non-terms) 207 | 208 | (define/public (get-num-prods) num-prods) 209 | (define/public (get-end-terms) end-terms) 210 | 211 | (define/public (nullable-non-term? nt) 212 | (vector-ref nullable-non-terms (non-term-index nt))) 213 | 214 | (define/public (nullable-after-dot? item) 215 | (let* ((rhs (prod-rhs (item-prod item))) 216 | (prod-length (vector-length rhs))) 217 | (let loop ((i (item-dot-pos item))) 218 | (cond 219 | ((< i prod-length) 220 | (if (and (non-term? (vector-ref rhs i)) (nullable-non-term? (vector-ref rhs i))) 221 | (loop (add1 i)) 222 | #f)) 223 | ((= i prod-length) #t))))) 224 | 225 | (define/public (nullable-non-term-thunk) 226 | (lambda (nt) 227 | (nullable-non-term? nt))) 228 | (define/public (nullable-after-dot?-thunk) 229 | (lambda (item) 230 | (nullable-after-dot? item))))) 231 | 232 | 233 | ;; nullable: production list * int -> non-term set 234 | ;; determines which non-terminals can derive epsilon 235 | (define (nullable prods num-nts) 236 | (letrec ((nullable (make-vector num-nts #f)) 237 | (added #f) 238 | 239 | ;; possible-nullable: producion list -> production list 240 | ;; Removes all productions that have a terminal 241 | (possible-nullable 242 | (lambda (prods) 243 | (filter (lambda (prod) 244 | (vector-andmap non-term? (prod-rhs prod))) 245 | prods))) 246 | 247 | ;; set-nullables: production list -> production list 248 | ;; makes one pass through the productions, adding the ones 249 | ;; known to be nullable now to nullable and returning a list 250 | ;; of productions that we don't know about yet. 251 | (set-nullables 252 | (lambda (prods) 253 | (cond 254 | ((null? prods) null) 255 | ((vector-ref nullable 256 | (gram-sym-index (prod-lhs (car prods)))) 257 | (set-nullables (cdr prods))) 258 | ((vector-andmap (lambda (nt) 259 | (vector-ref nullable (gram-sym-index nt))) 260 | (prod-rhs (car prods))) 261 | (vector-set! nullable 262 | (gram-sym-index (prod-lhs (car prods))) 263 | #t) 264 | (set! added #t) 265 | (set-nullables (cdr prods))) 266 | (else 267 | (cons (car prods) 268 | (set-nullables (cdr prods)))))))) 269 | 270 | (let loop ((P (possible-nullable prods))) 271 | (cond 272 | ((null? P) nullable) 273 | (else 274 | (set! added #f) 275 | (let ((new-P (set-nullables P))) 276 | (if added 277 | (loop new-P) 278 | nullable))))))) 279 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-yacc/graph.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (provide digraph) 4 | 5 | (define (zero-thunk) 0) 6 | 7 | ;; digraph: 8 | ;; ('a list) * ('a -> 'a list) * ('a -> 'b) * ('b * 'b -> 'b) * (-> 'b) 9 | ;; -> ('a -> 'b) 10 | ;; DeRemer and Pennello 1982 11 | ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} 12 | ;; We use a hash-table to represent the result function 'a -> 'b set, so 13 | ;; the values of type 'a must be comparable with eq?. 14 | (define (digraph nodes edges f- union fail) 15 | (letrec [ 16 | ;; Will map elements of 'a to 'b sets 17 | (results (make-hasheq)) 18 | (f (lambda (x) (hash-ref results x fail))) 19 | 20 | ;; Maps elements of 'a to integers. 21 | (N (make-hasheq)) 22 | (get-N (lambda (x) (hash-ref N x zero-thunk))) 23 | (set-N (lambda (x d) (hash-set! N x d))) 24 | 25 | (stack null) 26 | (push (lambda (x) 27 | (set! stack (cons x stack)))) 28 | (pop (lambda () 29 | (begin0 30 | (car stack) 31 | (set! stack (cdr stack))))) 32 | (depth (lambda () (length stack))) 33 | 34 | ;; traverse: 'a -> 35 | (traverse 36 | (lambda (x) 37 | (push x) 38 | (let ((d (depth))) 39 | (set-N x d) 40 | (hash-set! results x (f- x)) 41 | (for-each (lambda (y) 42 | (when (= 0 (get-N y)) 43 | (traverse y)) 44 | (hash-set! results 45 | x 46 | (union (f x) (f y))) 47 | (set-N x (min (get-N x) (get-N y)))) 48 | (edges x)) 49 | (when (= d (get-N x)) 50 | (let loop ((p (pop))) 51 | (set-N p +inf.0) 52 | (hash-set! results p (f x)) 53 | (when (not (eq? x p)) 54 | (loop (pop))))))))] 55 | (for-each (lambda (x) 56 | (when (= 0 (get-N x)) 57 | (traverse x))) 58 | nodes) 59 | f)) 60 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-yacc/input-file-parser.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; routines for parsing the input to the parser generator and producing a 4 | ;; grammar (See grammar.rkt) 5 | 6 | (require "yacc-helper.rkt" 7 | "../private-lex/token-syntax.rkt" 8 | "grammar.rkt" 9 | racket/class 10 | racket/contract) 11 | (require (for-template racket/base)) 12 | 13 | (define (is-a-grammar%? x) (is-a? x grammar%)) 14 | (provide/contract 15 | (parse-input ((listof identifier?) (listof identifier?) (listof identifier?) 16 | (or/c #f syntax?) syntax? any/c . -> . is-a-grammar%?)) 17 | (get-term-list ((listof identifier?) . -> . (listof identifier?)))) 18 | 19 | (define stx-for-original-property (read-syntax #f (open-input-string "original"))) 20 | 21 | ;; get-args: ??? -> (values (listof syntax) (or/c #f (cons integer? stx))) 22 | (define (get-args i rhs src-pos term-defs) 23 | (let ((empty-table (make-hasheq)) 24 | (biggest-pos #f)) 25 | (hash-set! empty-table 'error #t) 26 | (for-each (lambda (td) 27 | (let ((v (syntax-local-value td))) 28 | (when (e-terminals-def? v) 29 | (for-each (lambda (s) 30 | (hash-set! empty-table (syntax->datum s) #t)) 31 | (syntax->list (e-terminals-def-t v)))))) 32 | term-defs) 33 | (let ([args 34 | (let get-args ((i i) 35 | (rhs rhs)) 36 | (cond 37 | ((null? rhs) null) 38 | (else 39 | (let ((b (car rhs)) 40 | (name (if (hash-ref empty-table (syntax->datum (car rhs)) #f) 41 | (gensym) 42 | (string->symbol (format "$~a" i))))) 43 | (cond 44 | (src-pos 45 | (let ([start-pos-id 46 | (datum->syntax b (string->symbol (format "$~a-start-pos" i)) b stx-for-original-property)] 47 | [end-pos-id 48 | (datum->syntax b (string->symbol (format "$~a-end-pos" i)) b stx-for-original-property)]) 49 | (set! biggest-pos (cons start-pos-id end-pos-id)) 50 | `(,(datum->syntax b name b stx-for-original-property) 51 | ,start-pos-id 52 | ,end-pos-id 53 | ,@(get-args (add1 i) (cdr rhs))))) 54 | (else 55 | `(,(datum->syntax b name b stx-for-original-property) 56 | ,@(get-args (add1 i) (cdr rhs)))))))))]) 57 | (values args biggest-pos)))) 58 | 59 | ;; Given the list of terminal symbols and the precedence/associativity definitions, 60 | ;; builds terminal structures (See grammar.rkt) 61 | ;; build-terms: symbol list * symbol list list -> term list 62 | (define (build-terms term-list precs) 63 | (let ((counter 0) 64 | 65 | ;;(term-list (cons (gensym) term-list)) 66 | 67 | ;; Will map a terminal symbol to its precedence/associativity 68 | (prec-table (make-hasheq))) 69 | 70 | ;; Fill the prec table 71 | (for-each 72 | (lambda (p-decl) 73 | (begin0 74 | (let ((assoc (car p-decl))) 75 | (for-each 76 | (lambda (term-sym) 77 | (hash-set! prec-table term-sym (make-prec counter assoc))) 78 | (cdr p-decl))) 79 | (set! counter (add1 counter)))) 80 | precs) 81 | 82 | ;; Build the terminal structures 83 | (map 84 | (lambda (term-sym) 85 | (make-term term-sym 86 | #f 87 | (hash-ref prec-table term-sym #f))) 88 | term-list))) 89 | 90 | ;; Retrieves the terminal symbols from a terminals-def (See terminal-syntax.rkt) 91 | ;; get-terms-from-def: identifier? -> (listof identifier?) 92 | (define (get-terms-from-def term-syn) 93 | (let ((t (syntax-local-value term-syn (lambda () #f)))) 94 | (cond 95 | ((terminals-def? t) (syntax->list (terminals-def-t t))) 96 | ((e-terminals-def? t) (syntax->list (e-terminals-def-t t))) 97 | (else 98 | (raise-syntax-error 99 | 'parser-tokens 100 | "undefined token group" 101 | term-syn))))) 102 | 103 | (define (get-term-list term-group-names) 104 | (remove-duplicates 105 | (cons (datum->syntax #f 'error) 106 | (apply append 107 | (map get-terms-from-def term-group-names))))) 108 | 109 | (define (parse-input term-defs start ends prec-decls prods src-pos) 110 | (let* ((start-syms (map syntax-e start)) 111 | 112 | (list-of-terms (map syntax-e (get-term-list term-defs))) 113 | 114 | (end-terms 115 | (map 116 | (lambda (end) 117 | (unless (memq (syntax-e end) list-of-terms) 118 | (raise-syntax-error 119 | 'parser-end-tokens 120 | (format "End token ~a not defined as a token" 121 | (syntax-e end)) 122 | end)) 123 | (syntax-e end)) 124 | ends)) 125 | 126 | ;; Get the list of terminals out of input-terms 127 | 128 | (list-of-non-terms 129 | (syntax-case prods () 130 | (((non-term production ...) ...) 131 | (begin 132 | (for-each 133 | (lambda (nts) 134 | (when (memq (syntax->datum nts) list-of-terms) 135 | (raise-syntax-error 136 | 'parser-non-terminals 137 | (format "~a used as both token and non-terminal" 138 | (syntax->datum nts)) 139 | nts))) 140 | (syntax->list (syntax (non-term ...)))) 141 | 142 | (let ((dup (duplicate-list? (syntax->datum 143 | (syntax (non-term ...)))))) 144 | (when dup 145 | (raise-syntax-error 146 | 'parser-non-terminals 147 | (format "non-terminal ~a defined multiple times" 148 | dup) 149 | prods))) 150 | 151 | (syntax->datum (syntax (non-term ...))))) 152 | (_ 153 | (raise-syntax-error 154 | 'parser-grammar 155 | "Grammar must be of the form (grammar (non-terminal productions ...) ...)" 156 | prods)))) 157 | 158 | ;; Check the precedence declarations for errors and turn them into data 159 | (precs 160 | (syntax-case prec-decls () 161 | (((type term ...) ...) 162 | (let ((p-terms 163 | (syntax->datum (syntax (term ... ...))))) 164 | (cond 165 | ((duplicate-list? p-terms) => 166 | (lambda (d) 167 | (raise-syntax-error 168 | 'parser-precedences 169 | (format "duplicate precedence declaration for token ~a" 170 | d) 171 | prec-decls))) 172 | (else 173 | (for-each 174 | (lambda (a) 175 | (for-each 176 | (lambda (t) 177 | (when (not (memq (syntax->datum t) 178 | list-of-terms)) 179 | (raise-syntax-error 180 | 'parser-precedences 181 | (format 182 | "Precedence declared for non-token ~a" 183 | (syntax->datum t)) 184 | t))) 185 | (syntax->list a))) 186 | (syntax->list (syntax ((term ...) ...)))) 187 | (for-each 188 | (lambda (type) 189 | (when (not (memq (syntax->datum type) 190 | `(left right nonassoc))) 191 | (raise-syntax-error 192 | 'parser-precedences 193 | "Associativity must be left, right or nonassoc" 194 | type))) 195 | (syntax->list (syntax (type ...)))) 196 | (syntax->datum prec-decls))))) 197 | (#f null) 198 | (_ 199 | (raise-syntax-error 200 | 'parser-precedences 201 | "Precedence declaration must be of the form (precs (assoc term ...) ...) where assoc is left, right or nonassoc" 202 | prec-decls)))) 203 | 204 | (terms (build-terms list-of-terms precs)) 205 | 206 | (non-terms (map (lambda (non-term) (make-non-term non-term #f)) 207 | list-of-non-terms)) 208 | (term-table (make-hasheq)) 209 | (non-term-table (make-hasheq))) 210 | 211 | (for-each (lambda (t) 212 | (hash-set! term-table (gram-sym-symbol t) t)) 213 | terms) 214 | 215 | (for-each (lambda (nt) 216 | (hash-set! non-term-table (gram-sym-symbol nt) nt)) 217 | non-terms) 218 | 219 | (let* ( 220 | ;; parse-prod: syntax -> gram-sym vector 221 | (parse-prod 222 | (lambda (prod-so) 223 | (syntax-case prod-so () 224 | ((prod-rhs-sym ...) 225 | (andmap identifier? (syntax->list prod-so)) 226 | (begin 227 | (for-each (lambda (t) 228 | (when (memq (syntax->datum t) end-terms) 229 | (raise-syntax-error 230 | 'parser-production-rhs 231 | (format "~a is an end token and cannot be used in a production" 232 | (syntax->datum t)) 233 | t))) 234 | (syntax->list prod-so)) 235 | (list->vector 236 | (map (lambda (s) 237 | (hash-ref 238 | term-table 239 | (syntax->datum s) 240 | (lambda () 241 | (hash-ref 242 | non-term-table 243 | (syntax->datum s) 244 | (lambda () 245 | (raise-syntax-error 246 | 'parser-production-rhs 247 | (format 248 | "~a is not declared as a terminal or non-terminal" 249 | (syntax->datum s)) 250 | s)))))) 251 | (syntax->list prod-so))))) 252 | (_ 253 | (raise-syntax-error 254 | 'parser-production-rhs 255 | "production right-hand-side must have form (symbol ...)" 256 | prod-so))))) 257 | 258 | ;; parse-action: syntax * syntax -> syntax 259 | (parse-action 260 | (lambda (rhs act) 261 | (let-values ([(args biggest) (get-args 1 (syntax->list rhs) src-pos term-defs)]) 262 | (let ([act 263 | (if biggest 264 | (with-syntax ([$n-start-pos (datum->syntax (car biggest) '$n-start-pos)] 265 | [$n-end-pos (datum->syntax (cdr biggest) '$n-end-pos)]) 266 | #`(let ([$n-start-pos #,(car biggest)] 267 | [$n-end-pos #,(cdr biggest)]) 268 | #,act)) 269 | act)]) 270 | (quasisyntax/loc act 271 | (lambda #,args 272 | #,act)))))) 273 | 274 | ;; parse-prod+action: non-term * syntax -> production 275 | (parse-prod+action 276 | (lambda (nt prod-so) 277 | (syntax-case prod-so () 278 | ((prod-rhs action) 279 | (let ((p (parse-prod (syntax prod-rhs)))) 280 | (make-prod 281 | nt 282 | p 283 | #f 284 | (let loop ((i (sub1 (vector-length p)))) 285 | (if (>= i 0) 286 | (let ((gs (vector-ref p i))) 287 | (if (term? gs) 288 | (term-prec gs) 289 | (loop (sub1 i)))) 290 | #f)) 291 | (parse-action (syntax prod-rhs) (syntax action))))) 292 | ((prod-rhs (prec term) action) 293 | (identifier? (syntax term)) 294 | (let ((p (parse-prod (syntax prod-rhs)))) 295 | (make-prod 296 | nt 297 | p 298 | #f 299 | (term-prec 300 | (hash-ref 301 | term-table 302 | (syntax->datum (syntax term)) 303 | (lambda () 304 | (raise-syntax-error 305 | 'parser-production-rhs 306 | (format 307 | "unrecognized terminal ~a in precedence declaration" 308 | (syntax->datum (syntax term))) 309 | (syntax term))))) 310 | (parse-action (syntax prod-rhs) (syntax action))))) 311 | (_ 312 | (raise-syntax-error 313 | 'parser-production-rhs 314 | "production must have form [(symbol ...) expression] or [(symbol ...) (prec symbol) expression]" 315 | prod-so))))) 316 | 317 | ;; parse-prod-for-nt: syntax -> production list 318 | (parse-prods-for-nt 319 | (lambda (prods-so) 320 | (syntax-case prods-so () 321 | ((nt productions ...) 322 | (> (length (syntax->list (syntax (productions ...)))) 0) 323 | (let ((nt (hash-ref non-term-table 324 | (syntax->datum (syntax nt))))) 325 | (map (lambda (p) (parse-prod+action nt p)) 326 | (syntax->list (syntax (productions ...)))))) 327 | (_ 328 | (raise-syntax-error 329 | 'parser-productions 330 | "A production for a non-terminal must be (non-term right-hand-side ...) with at least 1 right hand side" 331 | prods-so)))))) 332 | 333 | (for-each 334 | (lambda (sstx ssym) 335 | (unless (memq ssym list-of-non-terms) 336 | (raise-syntax-error 337 | 'parser-start 338 | (format "Start symbol ~a not defined as a non-terminal" ssym) 339 | sstx))) 340 | start start-syms) 341 | 342 | (let* ((starts (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) 343 | (end-non-terms (map (lambda (x) (make-non-term (gensym) #f)) start-syms)) 344 | (parsed-prods (map parse-prods-for-nt (syntax->list prods))) 345 | (start-prods 346 | (map (lambda (start end-non-term) 347 | (list (make-prod start (vector end-non-term) #f #f 348 | (syntax (lambda (x) x))))) 349 | starts end-non-terms)) 350 | (prods 351 | `(,@start-prods 352 | ,@(map 353 | (lambda (end-nt start-sym) 354 | (map 355 | (lambda (end) 356 | (make-prod end-nt 357 | (vector 358 | (hash-ref non-term-table start-sym) 359 | (hash-ref term-table end)) 360 | #f 361 | #f 362 | (syntax (lambda (x) x)))) 363 | end-terms)) 364 | end-non-terms start-syms) 365 | ,@parsed-prods))) 366 | 367 | (make-object grammar% 368 | prods 369 | (map car start-prods) 370 | terms 371 | (append starts (append end-non-terms non-terms)) 372 | (map (lambda (term-name) 373 | (hash-ref term-table term-name)) 374 | end-terms)))))) 375 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-yacc/lalr.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; Compute LALR lookaheads from DeRemer and Pennello 1982 4 | 5 | (require "lr0.rkt" 6 | "grammar.rkt" 7 | racket/list 8 | racket/class) 9 | 10 | (provide compute-LA) 11 | 12 | ;; compute-DR: LR0-automaton * grammar -> (trans-key -> term set) 13 | ;; computes for each state, non-term transition pair, the terminals 14 | ;; which can transition out of the resulting state 15 | ;; output term set is represented in bit-vector form 16 | (define (compute-DR a g) 17 | (lambda (tk) 18 | (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) 19 | (term-list->bit-vector 20 | (filter 21 | (lambda (term) 22 | (send a run-automaton r term)) 23 | (send g get-terms)))))) 24 | 25 | ;; compute-reads: 26 | ;; LR0-automaton * grammar -> (trans-key -> trans-key list) 27 | (define (compute-reads a g) 28 | (let ((nullable-non-terms 29 | (filter (lambda (nt) (send g nullable-non-term? nt)) 30 | (send g get-non-terms)))) 31 | (lambda (tk) 32 | (let ((r (send a run-automaton (trans-key-st tk) (trans-key-gs tk)))) 33 | (map (lambda (x) (make-trans-key r x)) 34 | (filter (lambda (non-term) (send a run-automaton r non-term)) 35 | nullable-non-terms)))))) 36 | 37 | ;; compute-read: LR0-automaton * grammar -> (trans-key -> term set) 38 | ;; output term set is represented in bit-vector form 39 | (define (compute-read a g) 40 | (let* ((dr (compute-DR a g)) 41 | (reads (compute-reads a g))) 42 | (digraph-tk->terml (send a get-mapped-non-term-keys) 43 | reads 44 | dr 45 | (send a get-num-states)))) 46 | ;; returns the list of all k such that state k transitions to state start on the 47 | ;; transitions in rhs (in order) 48 | (define (run-lr0-backward a rhs dot-pos start num-states) 49 | (let loop ((states (list start)) 50 | (i (sub1 dot-pos))) 51 | (cond 52 | ((< i 0) states) 53 | (else (loop (send a run-automaton-back states (vector-ref rhs i)) 54 | (sub1 i)))))) 55 | 56 | ;; prod->items-for-include: grammar * prod * non-term -> lr0-item list 57 | ;; returns the list of all (B -> beta . nt gamma) such that prod = (B -> beta nt gamma) 58 | ;; and gamma =>* epsilon 59 | (define (prod->items-for-include g prod nt) 60 | (let* ((rhs (prod-rhs prod)) 61 | (rhs-l (vector-length rhs))) 62 | (append (if (and (> rhs-l 0) (eq? nt (vector-ref rhs (sub1 rhs-l)))) 63 | (list (make-item prod (sub1 rhs-l))) 64 | null) 65 | (let loop ((i (sub1 rhs-l))) 66 | (cond 67 | ((and (> i 0) 68 | (non-term? (vector-ref rhs i)) 69 | (send g nullable-non-term? (vector-ref rhs i))) 70 | (if (eq? nt (vector-ref rhs (sub1 i))) 71 | (cons (make-item prod (sub1 i)) 72 | (loop (sub1 i))) 73 | (loop (sub1 i)))) 74 | (else null)))))) 75 | 76 | ;; prod-list->items-for-include: grammar * prod list * non-term -> lr0-item list 77 | ;; return the list of all (B -> beta . nt gamma) such that (B -> beta nt gamma) in prod-list 78 | ;; and gamma =>* epsilon 79 | (define (prod-list->items-for-include g prod-list nt) 80 | (apply append (map (lambda (prod) (prod->items-for-include g prod nt)) prod-list))) 81 | 82 | ;; comput-includes: lr0-automaton * grammar -> (trans-key -> trans-key list) 83 | (define (compute-includes a g) 84 | (let ((num-states (send a get-num-states)) 85 | (items-for-input-nt (make-vector (send g get-num-non-terms) null))) 86 | (for-each 87 | (lambda (input-nt) 88 | (vector-set! items-for-input-nt (non-term-index input-nt) 89 | (prod-list->items-for-include g (send g get-prods) input-nt))) 90 | (send g get-non-terms)) 91 | (lambda (tk) 92 | (let* ((goal-state (trans-key-st tk)) 93 | (non-term (trans-key-gs tk)) 94 | (items (vector-ref items-for-input-nt (non-term-index non-term)))) 95 | (trans-key-list-remove-dups 96 | (apply append 97 | (map (lambda (item) 98 | (let* ((prod (item-prod item)) 99 | (rhs (prod-rhs prod)) 100 | (lhs (prod-lhs prod))) 101 | (map (lambda (state) 102 | (make-trans-key state lhs)) 103 | (run-lr0-backward a 104 | rhs 105 | (item-dot-pos item) 106 | goal-state 107 | num-states)))) 108 | items))))))) 109 | 110 | ;; compute-lookback: lr0-automaton * grammar -> (kernel * proc -> trans-key list) 111 | (define (compute-lookback a g) 112 | (let ((num-states (send a get-num-states))) 113 | (lambda (state prod) 114 | (map (lambda (k) (make-trans-key k (prod-lhs prod))) 115 | (run-lr0-backward a (prod-rhs prod) (vector-length (prod-rhs prod)) state num-states))))) 116 | 117 | ;; compute-follow: LR0-automaton * grammar -> (trans-key -> term set) 118 | ;; output term set is represented in bit-vector form 119 | (define (compute-follow a g includes) 120 | (let ((read (compute-read a g))) 121 | (digraph-tk->terml (send a get-mapped-non-term-keys) 122 | includes 123 | read 124 | (send a get-num-states)))) 125 | 126 | ;; compute-LA: LR0-automaton * grammar -> kernel * prod -> term set 127 | ;; output term set is represented in bit-vector form 128 | (define (compute-LA a g) 129 | (let* ((includes (compute-includes a g)) 130 | (lookback (compute-lookback a g)) 131 | (follow (compute-follow a g includes))) 132 | (lambda (k p) 133 | (let* ((l (lookback k p)) 134 | (f (map follow l))) 135 | (apply bitwise-ior (cons 0 f)))))) 136 | 137 | (define (print-DR dr a g) 138 | (print-input-st-sym dr "DR" a g print-output-terms)) 139 | (define (print-Read Read a g) 140 | (print-input-st-sym Read "Read" a g print-output-terms)) 141 | (define (print-includes i a g) 142 | (print-input-st-sym i "includes" a g print-output-st-nt)) 143 | (define (print-lookback l a g) 144 | (print-input-st-prod l "lookback" a g print-output-st-nt)) 145 | (define (print-follow f a g) 146 | (print-input-st-sym f "follow" a g print-output-terms)) 147 | (define (print-LA l a g) 148 | (print-input-st-prod l "LA" a g print-output-terms)) 149 | 150 | (define (print-input-st-sym f name a g print-output) 151 | (printf "~a:\n" name) 152 | (send a for-each-state 153 | (lambda (state) 154 | (for-each 155 | (lambda (non-term) 156 | (let ((res (f (make-trans-key state non-term)))) 157 | (when (not (null? res)) 158 | (printf "~a(~a, ~a) = ~a\n" 159 | name 160 | state 161 | (gram-sym-symbol non-term) 162 | (print-output res))))) 163 | (send g get-non-terms)))) 164 | (newline)) 165 | 166 | (define (print-input-st-prod f name a g print-output) 167 | (printf "~a:\n" name) 168 | (send a for-each-state 169 | (lambda (state) 170 | (for-each 171 | (lambda (non-term) 172 | (for-each 173 | (lambda (prod) 174 | (let ((res (f state prod))) 175 | (when (not (null? res)) 176 | (printf "~a(~a, ~a) = ~a\n" 177 | name 178 | (kernel-index state) 179 | (prod-index prod) 180 | (print-output res))))) 181 | (send g get-prods-for-non-term non-term))) 182 | (send g get-non-terms))))) 183 | 184 | (define (print-output-terms r) 185 | (map 186 | (lambda (p) 187 | (gram-sym-symbol p)) 188 | r)) 189 | 190 | (define (print-output-st-nt r) 191 | (map 192 | (lambda (p) 193 | (list 194 | (kernel-index (trans-key-st p)) 195 | (gram-sym-symbol (trans-key-gs p)))) 196 | r)) 197 | 198 | ;; init-tk-map : int -> (vectorof hashtable?) 199 | (define (init-tk-map n) 200 | (let ((v (make-vector n #f))) 201 | (let loop ((i (sub1 (vector-length v)))) 202 | (when (>= i 0) 203 | (vector-set! v i (make-hasheq)) 204 | (loop (sub1 i)))) 205 | v)) 206 | 207 | ;; lookup-tk-map : (vectorof (symbol? int hashtable)) -> trans-key? -> int 208 | (define (lookup-tk-map map) 209 | (lambda (tk) 210 | (let ((st (trans-key-st tk)) 211 | (gs (trans-key-gs tk))) 212 | (hash-ref (vector-ref map (kernel-index st)) 213 | (gram-sym-symbol gs) 214 | 0)))) 215 | 216 | ;; add-tk-map : (vectorof (symbol? int hashtable)) -> trans-key int -> 217 | (define (add-tk-map map) 218 | (lambda (tk v) 219 | (let ((st (trans-key-st tk)) 220 | (gs (trans-key-gs tk))) 221 | (hash-set! (vector-ref map (kernel-index st)) 222 | (gram-sym-symbol gs) 223 | v)))) 224 | 225 | ;; digraph-tk->terml: 226 | ;; (trans-key list) * (trans-key -> trans-key list) * (trans-key -> term list) * int * int * int 227 | ;; -> (trans-key -> term list) 228 | ;; DeRemer and Pennello 1982 229 | ;; Computes (f x) = (f- x) union Union{(f y) | y in (edges x)} 230 | ;; A specialization of digraph in the file graph.rkt 231 | (define (digraph-tk->terml nodes edges f- num-states) 232 | (letrec [ 233 | ;; Will map elements of trans-key to term sets represented as bit vectors 234 | (results (init-tk-map num-states)) 235 | 236 | ;; Maps elements of trans-keys to integers. 237 | (N (init-tk-map num-states)) 238 | 239 | (get-N (lookup-tk-map N)) 240 | (set-N (add-tk-map N)) 241 | (get-f (lookup-tk-map results)) 242 | (set-f (add-tk-map results)) 243 | 244 | (stack null) 245 | (push (lambda (x) 246 | (set! stack (cons x stack)))) 247 | (pop (lambda () 248 | (begin0 249 | (car stack) 250 | (set! stack (cdr stack))))) 251 | (depth (lambda () (length stack))) 252 | 253 | ;; traverse: 'a -> 254 | (traverse 255 | (lambda (x) 256 | (push x) 257 | (let ((d (depth))) 258 | (set-N x d) 259 | (set-f x (f- x)) 260 | (for-each (lambda (y) 261 | (when (= 0 (get-N y)) 262 | (traverse y)) 263 | (set-f x (bitwise-ior (get-f x) (get-f y))) 264 | (set-N x (min (get-N x) (get-N y)))) 265 | (edges x)) 266 | (when (= d (get-N x)) 267 | (let loop ((p (pop))) 268 | (set-N p +inf.0) 269 | (set-f p (get-f x)) 270 | (unless (equal? x p) 271 | (loop (pop))))))))] 272 | (for-each (lambda (x) 273 | (when (= 0 (get-N x)) 274 | (traverse x))) 275 | nodes) 276 | get-f)) 277 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-yacc/lr0.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; Handle the LR0 automaton 4 | 5 | (require "grammar.rkt" 6 | "graph.rkt" 7 | racket/list 8 | racket/class) 9 | 10 | (provide build-lr0-automaton lr0% 11 | (struct-out trans-key) trans-key-list-remove-dups 12 | kernel-items kernel-index) 13 | 14 | ;; kernel = (make-kernel (LR1-item list) index) 15 | ;; the list must be kept sorted according to item 45 | ;; (vectorof (symbol X hashtable)) 46 | (define (build-transition-table num-states assoc) 47 | (let ((transitions (make-vector num-states #f))) 48 | (let loop ((i (sub1 (vector-length transitions)))) 49 | (when (>= i 0) 50 | (vector-set! transitions i (make-hasheq)) 51 | (loop (sub1 i)))) 52 | (for-each 53 | (lambda (trans-key/kernel) 54 | (let ((tk (car trans-key/kernel))) 55 | (hash-set! (vector-ref transitions (kernel-index (trans-key-st tk))) 56 | (gram-sym-symbol (trans-key-gs tk)) 57 | (cdr trans-key/kernel)))) 58 | assoc) 59 | transitions)) 60 | 61 | ;; reverse-assoc : (listof (cons/c trans-key? kernel?)) -> 62 | ;; (listof (cons/c trans-key? (listof kernel?))) 63 | (define (reverse-assoc assoc) 64 | (let ((reverse-hash (make-hash)) 65 | (hash-table-add! 66 | (lambda (ht k v) 67 | (hash-set! ht k (cons v (hash-ref ht k null)))))) 68 | (for-each 69 | (lambda (trans-key/kernel) 70 | (let ((tk (car trans-key/kernel))) 71 | (hash-table-add! reverse-hash 72 | (make-trans-key (cdr trans-key/kernel) 73 | (trans-key-gs tk)) 74 | (trans-key-st tk)))) 75 | assoc) 76 | (hash-map reverse-hash cons))) 77 | 78 | 79 | ;; kernel-list-remove-duplicates 80 | ;; LR0-automaton = object of class lr0% 81 | (define lr0% 82 | (class object% 83 | (super-instantiate ()) 84 | ;; term-assoc : (listof (cons/c trans-key? kernel?)) 85 | ;; non-term-assoc : (listof (cons/c trans-key? kernel?)) 86 | ;; states : (vectorof kernel?) 87 | ;; epsilons : ??? 88 | (init-field term-assoc non-term-assoc states epsilons) 89 | 90 | (define transitions (build-transition-table (vector-length states) 91 | (append term-assoc non-term-assoc))) 92 | 93 | (define reverse-term-assoc (reverse-assoc term-assoc)) 94 | (define reverse-non-term-assoc (reverse-assoc non-term-assoc)) 95 | (define reverse-transitions 96 | (build-transition-table (vector-length states) 97 | (append reverse-term-assoc reverse-non-term-assoc))) 98 | 99 | (define mapped-non-terms (map car non-term-assoc)) 100 | 101 | (define/public (get-mapped-non-term-keys) 102 | mapped-non-terms) 103 | 104 | (define/public (get-num-states) 105 | (vector-length states)) 106 | 107 | (define/public (get-epsilon-trans) 108 | epsilons) 109 | 110 | (define/public (get-transitions) 111 | (append term-assoc non-term-assoc)) 112 | 113 | ;; for-each-state : (state ->) -> 114 | ;; Iteration over the states in an automaton 115 | (define/public (for-each-state f) 116 | (let ((num-states (vector-length states))) 117 | (let loop ((i 0)) 118 | (when (< i num-states) 119 | (begin 120 | (f (vector-ref states i)) 121 | (loop (add1 i))))))) 122 | 123 | ;; run-automaton: kernel? gram-sym? -> (union kernel #f) 124 | ;; returns the state reached from state k on input s, or #f when k 125 | ;; has no transition on s 126 | (define/public (run-automaton k s) 127 | (hash-ref (vector-ref transitions (kernel-index k)) 128 | (gram-sym-symbol s) 129 | #f)) 130 | 131 | ;; run-automaton-back : (listof kernel?) gram-sym? -> (listof kernel) 132 | ;; returns the list of states that can reach k by transitioning on s. 133 | (define/public (run-automaton-back k s) 134 | (apply append 135 | (map 136 | (lambda (k) 137 | (hash-ref (vector-ref reverse-transitions (kernel-index k)) 138 | (gram-sym-symbol s) 139 | null)) 140 | k))))) 141 | 142 | (define (union comp (eq? a b) 161 | (define (kernel->string k) 162 | (apply string-append 163 | `("{" ,@(map (lambda (i) (string-append (item->string i) ", ")) 164 | (kernel-items k)) 165 | "}"))) 166 | 167 | ;; build-LR0-automaton: grammar -> LR0-automaton 168 | ;; Constructs the kernels of the sets of LR(0) items of g 169 | (define (build-lr0-automaton grammar) 170 | ; (printf "LR(0) automaton:\n") 171 | (letrec ( 172 | (epsilons (make-hash)) 173 | (grammar-symbols (append (send grammar get-non-terms) 174 | (send grammar get-terms))) 175 | ;; first-non-term: non-term -> non-term list 176 | ;; given a non-terminal symbol C, return those non-terminal 177 | ;; symbols A s.t. C -> An for some string of terminals and 178 | ;; non-terminals n where -> means a rightmost derivation in many 179 | ;; steps. Assumes that each non-term can be reduced to a string 180 | ;; of terms. 181 | (first-non-term 182 | (digraph (send grammar get-non-terms) 183 | (lambda (nt) 184 | (filter non-term? 185 | (map (lambda (prod) 186 | (sym-at-dot (make-item prod 0))) 187 | (send grammar get-prods-for-non-term nt)))) 188 | (lambda (nt) (list nt)) 189 | (union non-term LR1-item list 193 | ;; Creates a set of items containing i s.t. if A -> n.Xm is in it, 194 | ;; X -> .o is in it too. 195 | (LR0-closure 196 | (lambda (i) 197 | (cond 198 | ((null? i) null) 199 | (else 200 | (let ((next-gsym (sym-at-dot (car i)))) 201 | (cond 202 | ((non-term? next-gsym) 203 | (cons (car i) 204 | (append 205 | (apply append 206 | (map (lambda (non-term) 207 | (map (lambda (x) 208 | (make-item x 0)) 209 | (send grammar 210 | get-prods-for-non-term 211 | non-term))) 212 | (first-non-term next-gsym))) 213 | (LR0-closure (cdr i))))) 214 | (else 215 | (cons (car i) (LR0-closure (cdr i)))))))))) 216 | 217 | 218 | ;; maps trans-keys to kernels 219 | (automaton-term null) 220 | (automaton-non-term null) 221 | 222 | ;; keeps the kernels we have seen, so we can have a unique 223 | ;; list for each kernel 224 | (kernels (make-hash)) 225 | 226 | (counter 0) 227 | 228 | ;; goto: LR1-item list -> LR1-item list list 229 | ;; creates new kernels by moving the dot in each item in the 230 | ;; LR0-closure of kernel to the right, and grouping them by 231 | ;; the term/non-term moved over. Returns the kernels not 232 | ;; yet seen, and places the trans-keys into automaton 233 | (goto 234 | (lambda (kernel) 235 | (let ( 236 | ;; maps a gram-syms to a list of items 237 | (table (make-hasheq)) 238 | 239 | ;; add-item!: 240 | ;; (symbol (listof item) hashtable) item? -> 241 | ;; adds i into the table grouped with the grammar 242 | ;; symbol following its dot 243 | (add-item! 244 | (lambda (table i) 245 | (let ((gs (sym-at-dot i))) 246 | (cond 247 | (gs 248 | (let ((already 249 | (hash-ref table 250 | (gram-sym-symbol gs) 251 | null))) 252 | (unless (member i already) 253 | (hash-set! table 254 | (gram-sym-symbol gs) 255 | (cons i already))))) 256 | ((= 0 (vector-length (prod-rhs (item-prod i)))) 257 | (let ((current (hash-ref epsilons 258 | kernel 259 | null))) 260 | (hash-set! epsilons 261 | kernel 262 | (cons i current))))))))) 263 | 264 | ;; Group the items of the LR0 closure of the kernel 265 | ;; by the character after the dot 266 | (for-each (lambda (item) 267 | (add-item! table item)) 268 | (LR0-closure (kernel-items kernel))) 269 | 270 | ;; each group is a new kernel, with the dot advanced. 271 | ;; sorts the items in a kernel so kernels can be compared 272 | ;; with equal? for using the table kernels to make sure 273 | ;; only one representitive of each kernel is created 274 | (filter 275 | (lambda (x) x) 276 | (map 277 | (lambda (i) 278 | (let* ((gs (car i)) 279 | (items (cadr i)) 280 | (new #f) 281 | (new-kernel (sort 282 | (filter (lambda (x) x) 283 | (map move-dot-right items)) 284 | item ~a on ~a\n" 308 | (kernel->string kernel) 309 | (kernel->string unique-kernel) 310 | (gram-sym-symbol gs)) 311 | (if new 312 | unique-kernel 313 | #f))) 314 | (let loop ((gsyms grammar-symbols)) 315 | (cond 316 | ((null? gsyms) null) 317 | (else 318 | (let ((items (hash-ref table 319 | (gram-sym-symbol (car gsyms)) 320 | null))) 321 | (cond 322 | ((null? items) (loop (cdr gsyms))) 323 | (else 324 | (cons (list (car gsyms) items) 325 | (loop (cdr gsyms)))))))))))))) 326 | 327 | (starts 328 | (map (lambda (init-prod) (list (make-item init-prod 0))) 329 | (send grammar get-init-prods))) 330 | (startk 331 | (map (lambda (start) 332 | (let ((k (make-kernel start counter))) 333 | (hash-set! kernels start k) 334 | (set! counter (add1 counter)) 335 | k)) 336 | starts)) 337 | (new-kernels (make-queue))) 338 | 339 | (let loop ((old-kernels startk) 340 | (seen-kernels null)) 341 | (cond 342 | ((and (empty-queue? new-kernels) (null? old-kernels)) 343 | (make-object lr0% 344 | automaton-term 345 | automaton-non-term 346 | (list->vector (reverse seen-kernels)) 347 | epsilons)) 348 | ((null? old-kernels) 349 | (loop (deq! new-kernels) seen-kernels)) 350 | (else 351 | (enq! new-kernels (goto (car old-kernels))) 352 | (loop (cdr old-kernels) (cons (car old-kernels) seen-kernels))))))) 353 | 354 | (define-struct q (f l) #:mutable #:inspector (make-inspector)) 355 | (define (empty-queue? q) 356 | (null? (q-f q))) 357 | (define (make-queue) 358 | (make-q null null)) 359 | (define (enq! q i) 360 | (if (empty-queue? q) 361 | (let ((i (mcons i null))) 362 | (set-q-l! q i) 363 | (set-q-f! q i)) 364 | (begin 365 | (set-mcdr! (q-l q) (mcons i null)) 366 | (set-q-l! q (mcdr (q-l q)))))) 367 | (define (deq! q) 368 | (begin0 369 | (mcar (q-f q)) 370 | (set-q-f! q (mcdr (q-f q))))) 371 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-yacc/parser-actions.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | (require "grammar.rkt") 3 | (provide (except-out (all-defined-out) make-reduce make-reduce*) 4 | (rename-out [make-reduce* make-reduce])) 5 | 6 | ;; An action is 7 | ;; - (make-shift int) 8 | ;; - (make-reduce prod runtime-action) 9 | ;; - (make-accept) 10 | ;; - (make-goto int) 11 | ;; - (no-action) 12 | ;; A reduce contains a runtime-reduce so that sharing of the reduces can 13 | ;; be easily transferred to sharing of runtime-reduces. 14 | 15 | (define-struct action () #:inspector (make-inspector)) 16 | (define-struct (shift action) (state) #:inspector (make-inspector)) 17 | (define-struct (reduce action) (prod runtime-reduce) #:inspector (make-inspector)) 18 | (define-struct (accept action) () #:inspector (make-inspector)) 19 | (define-struct (goto action) (state) #:inspector (make-inspector)) 20 | (define-struct (no-action action) () #:inspector (make-inspector)) 21 | 22 | (define (make-reduce* p) 23 | (make-reduce p 24 | (vector (prod-index p) 25 | (gram-sym-symbol (prod-lhs p)) 26 | (vector-length (prod-rhs p))))) 27 | 28 | ;; A runtime-action is 29 | ;; non-negative-int (shift) 30 | ;; (vector int symbol int) (reduce) 31 | ;; 'accept (accept) 32 | ;; negative-int (goto) 33 | ;; #f (no-action) 34 | 35 | (define (action->runtime-action a) 36 | (cond 37 | ((shift? a) (shift-state a)) 38 | ((reduce? a) (reduce-runtime-reduce a)) 39 | ((accept? a) 'accept) 40 | ((goto? a) (- (+ (goto-state a) 1))) 41 | ((no-action? a) #f))) 42 | 43 | (define (runtime-shift? x) (and (integer? x) (>= x 0))) 44 | (define runtime-reduce? vector?) 45 | (define (runtime-accept? x) (eq? x 'accept)) 46 | (define (runtime-goto? x) (and (integer? x) (< x 0))) 47 | 48 | (define runtime-shift-state values) 49 | (define (runtime-reduce-prod-num x) (vector-ref x 0)) 50 | (define (runtime-reduce-lhs x) (vector-ref x 1)) 51 | (define (runtime-reduce-rhs-length x) (vector-ref x 2)) 52 | (define (runtime-goto-state x) (- (+ x 1))) 53 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-yacc/parser-builder.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require "input-file-parser.rkt" 4 | "grammar.rkt" 5 | "table.rkt" 6 | racket/class 7 | racket/contract) 8 | (require (for-template racket/base)) 9 | 10 | (provide/contract 11 | (build-parser (-> string? any/c any/c any/c any/c 12 | (listof identifier?) 13 | (listof identifier?) 14 | (listof identifier?) 15 | (or/c syntax? #f) 16 | syntax? 17 | (values any/c any/c any/c any/c)))) 18 | 19 | ;; fix-check-syntax : (listof identifier?) (listof identifier?) (listof identifier?) 20 | ;; (union syntax? false/c) syntax?) -> syntax? 21 | (define (fix-check-syntax input-terms start ends assocs prods) 22 | (let* ((term-binders (get-term-list input-terms)) 23 | (get-term-binder 24 | (let ((t (make-hasheq))) 25 | (for-each 26 | (lambda (term) 27 | (hash-set! t (syntax-e term) term)) 28 | term-binders) 29 | (lambda (x) 30 | (let ((r (hash-ref t (syntax-e x) #f))) 31 | (if r 32 | (syntax-local-introduce (datum->syntax r (syntax-e x) x x)) 33 | x))))) 34 | (rhs-list 35 | (syntax-case prods () 36 | (((_ rhs ...) ...) 37 | (syntax->list (syntax (rhs ... ...))))))) 38 | (with-syntax (((tmp ...) (map syntax-local-introduce term-binders)) 39 | ((term-group ...) 40 | (map (lambda (tg) 41 | (syntax-property 42 | (datum->syntax tg #f) 43 | 'disappeared-use 44 | tg)) 45 | input-terms)) 46 | ((end ...) 47 | (map get-term-binder ends)) 48 | ((start ...) 49 | (map get-term-binder start)) 50 | ((bind ...) 51 | (syntax-case prods () 52 | (((bind _ ...) ...) 53 | (syntax->list (syntax (bind ...)))))) 54 | (((bound ...) ...) 55 | (map 56 | (lambda (rhs) 57 | (syntax-case rhs () 58 | (((bound ...) (_ pbound) __) 59 | (map get-term-binder 60 | (cons (syntax pbound) 61 | (syntax->list (syntax (bound ...)))))) 62 | (((bound ...) _) 63 | (map get-term-binder 64 | (syntax->list (syntax (bound ...))))))) 65 | rhs-list)) 66 | ((prec ...) 67 | (if assocs 68 | (map get-term-binder 69 | (syntax-case assocs () 70 | (((__ term ...) ...) 71 | (syntax->list (syntax (term ... ...)))))) 72 | null))) 73 | #`(when #f 74 | (let ((bind void) ... (tmp void) ...) 75 | (void bound ... ... term-group ... start ... end ... prec ...)))))) 76 | (require racket/list "parser-actions.rkt") 77 | (define (build-parser filename 78 | src-pos 79 | suppress 80 | expected-SR-conflicts 81 | expected-RR-conflicts 82 | input-terms 83 | start 84 | end 85 | assocs 86 | prods) 87 | (let* ((grammar (parse-input input-terms start end assocs prods src-pos)) 88 | (table (build-table grammar 89 | filename 90 | suppress 91 | expected-SR-conflicts 92 | expected-RR-conflicts)) 93 | (all-tokens (make-hasheq)) 94 | (actions-code 95 | `(vector ,@(map prod-action (send grammar get-prods))))) 96 | (for-each (lambda (term) 97 | (hash-set! all-tokens (gram-sym-symbol term) #t)) 98 | (send grammar get-terms)) 99 | #;(let ((num-states (vector-length table)) 100 | (num-gram-syms (+ (send grammar get-num-terms) 101 | (send grammar get-num-non-terms))) 102 | (num-ht-entries (apply + (map length (vector->list table)))) 103 | (num-reduces 104 | (let ((ht (make-hasheq))) 105 | (for-each 106 | (lambda (x) 107 | (when (reduce? x) 108 | (hash-set! ht x #t))) 109 | (map cdr (apply append (vector->list table)))) 110 | (length (hash-map ht void))))) 111 | (printf "~a states, ~a grammar symbols, ~a hash-table entries, ~a reduces\n" 112 | num-states num-gram-syms num-ht-entries num-reduces) 113 | (printf "~a -- ~aKB, previously ~aKB\n" 114 | (/ (+ 2 num-states 115 | (* 4 num-states) (* 2 1.5 num-ht-entries) 116 | (* 5 num-reduces)) 256.0) 117 | (/ (+ 2 num-states 118 | (* 4 num-states) (* 2 2.3 num-ht-entries) 119 | (* 5 num-reduces)) 256.0) 120 | (/ (+ 2 (* num-states num-gram-syms) (* 5 num-reduces)) 256.0))) 121 | (values table 122 | all-tokens 123 | actions-code 124 | (fix-check-syntax input-terms start end assocs prods)))) 125 | 126 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-yacc/table.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; Routine to build the LALR table 4 | 5 | (require "grammar.rkt" 6 | "lr0.rkt" 7 | "lalr.rkt" 8 | "parser-actions.rkt" 9 | racket/contract 10 | racket/list 11 | racket/class) 12 | 13 | (define (is-a-grammar%? x) (is-a? x grammar%)) 14 | (provide/contract 15 | (build-table (-> is-a-grammar%? string? any/c any/c any/c 16 | (vectorof (listof (cons/c (or/c term? non-term?) action?)))))) 17 | 18 | ;; A parse-table is (vectorof (listof (cons/c gram-sym? action))) 19 | ;; A grouped-parse-table is (vectorof (listof (cons/c gram-sym? (listof action)))) 20 | 21 | ;; make-parse-table : int -> parse-table 22 | (define (make-parse-table num-states) 23 | (make-vector num-states null)) 24 | 25 | ;; table-add!: parse-table nat symbol action -> 26 | (define (table-add! table state-index symbol val) 27 | (vector-set! table state-index (cons (cons symbol val) 28 | (vector-ref table state-index)))) 29 | 30 | ;; group-table : parse-table -> grouped-parse-table 31 | (define (group-table table) 32 | (list->vector 33 | (map 34 | (lambda (state-entry) 35 | (let ((ht (make-hash))) 36 | (for-each 37 | (lambda (gs/actions) 38 | (let ((group (hash-ref ht (car gs/actions) (lambda () null)))) 39 | (unless (member (cdr gs/actions) group) 40 | (hash-set! ht (car gs/actions) (cons (cdr gs/actions) group))))) 41 | state-entry) 42 | (hash-map ht cons))) 43 | (vector->list table)))) 44 | 45 | ;; table-map : (vectorof (listof (cons/c gram-sym? X))) (gram-sym? X -> Y) -> 46 | ;; (vectorof (listof (cons/c gram-sym? Y))) 47 | (define (table-map f table) 48 | (list->vector 49 | (map 50 | (lambda (state-entry) 51 | (map 52 | (lambda (gs/X) 53 | (cons (car gs/X) (f (car gs/X) (cdr gs/X)))) 54 | state-entry)) 55 | (vector->list table)))) 56 | 57 | 58 | (define (bit-vector-for-each f bv) 59 | (letrec ((for-each 60 | (lambda (bv number) 61 | (cond 62 | ((= 0 bv) (void)) 63 | ((= 1 (bitwise-and 1 bv)) 64 | (f number) 65 | (for-each (arithmetic-shift bv -1) (add1 number))) 66 | (else (for-each (arithmetic-shift bv -1) (add1 number))))))) 67 | (for-each bv 0))) 68 | 69 | 70 | ;; print-entry: symbol action output-port -> 71 | ;; prints the action a for lookahead sym to the given port 72 | (define (print-entry sym a port) 73 | (let ((s "\t~a\t\t\t\t\t~a\t~a\n")) 74 | (cond 75 | ((shift? a) 76 | (fprintf port s sym "shift" (shift-state a))) 77 | ((reduce? a) 78 | (fprintf port s sym "reduce" (prod-index (reduce-prod a)))) 79 | ((accept? a) 80 | (fprintf port s sym "accept" "")) 81 | ((goto? a) 82 | (fprintf port s sym "goto" (goto-state a)))))) 83 | 84 | 85 | ;; count: ('a -> bool) * 'a list -> num 86 | ;; counts the number of elements in list that satisfy pred 87 | (define (count pred list) 88 | (cond 89 | ((null? list) 0) 90 | ((pred (car list)) (+ 1 (count pred (cdr list)))) 91 | (else (count pred (cdr list))))) 92 | 93 | ;; display-parser: LR0-automaton grouped-parse-table (listof prod?) output-port -> 94 | ;; Prints out the parser given by table. 95 | (define (display-parser a grouped-table prods port) 96 | (let* ((SR-conflicts 0) 97 | (RR-conflicts 0)) 98 | (for-each 99 | (lambda (prod) 100 | (fprintf port 101 | "~a\t~a\t=\t~a\n" 102 | (prod-index prod) 103 | (gram-sym-symbol (prod-lhs prod)) 104 | (map gram-sym-symbol (vector->list (prod-rhs prod))))) 105 | prods) 106 | (send a for-each-state 107 | (lambda (state) 108 | (fprintf port "State ~a\n" (kernel-index state)) 109 | (for-each (lambda (item) 110 | (fprintf port "\t~a\n" (item->string item))) 111 | (kernel-items state)) 112 | (newline port) 113 | (for-each 114 | (lambda (gs/action) 115 | (let ((sym (gram-sym-symbol (car gs/action))) 116 | (act (cdr gs/action))) 117 | (cond 118 | ((null? act) (void)) 119 | ((null? (cdr act)) 120 | (print-entry sym (car act) port)) 121 | (else 122 | (fprintf port "begin conflict:\n") 123 | (when (> (count reduce? act) 1) 124 | (set! RR-conflicts (add1 RR-conflicts))) 125 | (when (> (count shift? act) 0) 126 | (set! SR-conflicts (add1 SR-conflicts))) 127 | (map (lambda (x) (print-entry sym x port)) act) 128 | (fprintf port "end conflict\n"))))) 129 | (vector-ref grouped-table (kernel-index state))) 130 | (newline port))) 131 | 132 | (when (> SR-conflicts 0) 133 | (fprintf port "~a shift/reduce conflict~a\n" 134 | SR-conflicts 135 | (if (= SR-conflicts 1) "" "s"))) 136 | (when (> RR-conflicts 0) 137 | (fprintf port "~a reduce/reduce conflict~a\n" 138 | RR-conflicts 139 | (if (= RR-conflicts 1) "" "s"))))) 140 | 141 | ;; resolve-conflict : (listof action?) -> action? bool bool 142 | (define (resolve-conflict actions) 143 | (cond 144 | ((null? actions) (values (make-no-action) #f #f)) 145 | ((null? (cdr actions)) 146 | (values (car actions) #f #f)) 147 | (else 148 | (let ((SR-conflict? (> (count shift? actions) 0)) 149 | (RR-conflict? (> (count reduce? actions) 1))) 150 | (let loop ((current-guess #f) 151 | (rest actions)) 152 | (cond 153 | ((null? rest) (values current-guess SR-conflict? RR-conflict?)) 154 | ((shift? (car rest)) (values (car rest) SR-conflict? RR-conflict?)) 155 | ((not current-guess) 156 | (loop (car rest) (cdr rest))) 157 | ((and (reduce? (car rest)) 158 | (< (prod-index (reduce-prod (car rest))) 159 | (prod-index (reduce-prod current-guess)))) 160 | (loop (car rest) (cdr rest))) 161 | ((accept? (car rest)) 162 | (eprintf "accept/reduce or accept/shift conflicts. Check the grammar for useless cycles of productions\n") 163 | (loop current-guess (cdr rest))) 164 | (else (loop current-guess (cdr rest))))))))) 165 | 166 | ;; resolve-conflicts : grouped-parse-table bool -> parse-table 167 | (define (resolve-conflicts grouped-table 168 | suppress 169 | expected-SR-conflicts 170 | expected-RR-conflicts) 171 | (let* ((SR-conflicts 0) 172 | (RR-conflicts 0) 173 | (table (table-map 174 | (lambda (gs actions) 175 | (let-values (((action SR? RR?) 176 | (resolve-conflict actions))) 177 | (when SR? 178 | (set! SR-conflicts (add1 SR-conflicts))) 179 | (when RR? 180 | (set! RR-conflicts (add1 RR-conflicts))) 181 | action)) 182 | grouped-table))) 183 | (unless suppress 184 | (when (if expected-SR-conflicts 185 | (not (= SR-conflicts expected-SR-conflicts)) 186 | (> SR-conflicts 0)) 187 | (eprintf "~a shift/reduce conflict~a~a\n" 188 | SR-conflicts 189 | (if (= SR-conflicts 1) "" "s") 190 | (if expected-SR-conflicts 191 | (format ", expected ~a" expected-SR-conflicts) 192 | ""))) 193 | (when (if expected-RR-conflicts 194 | (not (= RR-conflicts expected-RR-conflicts)) 195 | (> RR-conflicts 0)) 196 | (eprintf "~a reduce/reduce conflict~a~a\n" 197 | RR-conflicts 198 | (if (= RR-conflicts 1) "" "s") 199 | (if expected-RR-conflicts 200 | (format ", expected ~a" expected-RR-conflicts) 201 | "")))) 202 | table)) 203 | 204 | 205 | ;; resolve-sr-conflict : (listof action) (union int #f) -> (listof action) 206 | ;; Resolves a single shift-reduce conflict, if precedences are in place. 207 | (define (resolve-sr-conflict/prec actions shift-prec) 208 | (let* ((shift (if (shift? (car actions)) 209 | (car actions) 210 | (cadr actions))) 211 | (reduce (if (shift? (car actions)) 212 | (cadr actions) 213 | (car actions))) 214 | (reduce-prec (prod-prec (reduce-prod reduce)))) 215 | (cond 216 | ((and shift-prec reduce-prec) 217 | (cond 218 | ((< (prec-num shift-prec) (prec-num reduce-prec)) 219 | (list reduce)) 220 | ((> (prec-num shift-prec) (prec-num reduce-prec)) 221 | (list shift)) 222 | ((eq? 'left (prec-assoc shift-prec)) 223 | (list reduce)) 224 | ((eq? 'right (prec-assoc shift-prec)) 225 | (list shift)) 226 | (else null))) 227 | (else actions)))) 228 | 229 | 230 | ;; resolve-prec-conflicts : parse-table -> grouped-parse-table 231 | (define (resolve-prec-conflicts table) 232 | (table-map 233 | (lambda (gs actions) 234 | (cond 235 | ((and (term? gs) 236 | (= 2 (length actions)) 237 | (or (shift? (car actions)) 238 | (shift? (cadr actions)))) 239 | (resolve-sr-conflict/prec actions (term-prec gs))) 240 | (else actions))) 241 | (group-table table))) 242 | 243 | ;; build-table: grammar string bool #f|int #f|int -> parse-table 244 | (define (build-table g 245 | file 246 | suppress 247 | expected-SR-conflicts 248 | expected-RR-conflicts) 249 | (let* ((a (build-lr0-automaton g)) 250 | (term-vector (list->vector (send g get-terms))) 251 | (end-terms (send g get-end-terms)) 252 | (table (make-parse-table (send a get-num-states))) 253 | (get-lookahead (compute-LA a g)) 254 | (reduce-cache (make-hash))) 255 | 256 | (for-each 257 | (lambda (trans-key/state) 258 | (let ((from-state-index (kernel-index (trans-key-st (car trans-key/state)))) 259 | (gs (trans-key-gs (car trans-key/state))) 260 | (to-state (cdr trans-key/state))) 261 | (table-add! table from-state-index gs 262 | (cond 263 | ((non-term? gs) 264 | (make-goto (kernel-index to-state))) 265 | ((member gs end-terms) 266 | (make-accept)) 267 | (else 268 | (make-shift 269 | (kernel-index to-state))))))) 270 | (send a get-transitions)) 271 | 272 | (send a for-each-state 273 | (lambda (state) 274 | (for-each 275 | (lambda (item) 276 | (let ((item-prod (item-prod item))) 277 | (bit-vector-for-each 278 | (lambda (term-index) 279 | (unless (start-item? item) 280 | (let ((r (hash-ref reduce-cache item-prod 281 | (lambda () 282 | (let ((r (make-reduce item-prod))) 283 | (hash-set! reduce-cache item-prod r) 284 | r))))) 285 | (table-add! table 286 | (kernel-index state) 287 | (vector-ref term-vector term-index) 288 | r)))) 289 | (get-lookahead state item-prod)))) 290 | (append (hash-ref (send a get-epsilon-trans) state (lambda () null)) 291 | (filter (lambda (item) 292 | (not (move-dot-right item))) 293 | (kernel-items state)))))) 294 | 295 | (let ((grouped-table (resolve-prec-conflicts table))) 296 | (unless (string=? file "") 297 | (with-handlers [(exn:fail:filesystem? 298 | (lambda (e) 299 | (eprintf 300 | "Cannot write debug output to file \"~a\": ~a\n" 301 | file 302 | (exn-message e))))] 303 | (call-with-output-file file 304 | (lambda (port) 305 | (display-parser a grouped-table (send g get-prods) port)) 306 | #:exists 'truncate))) 307 | (resolve-conflicts grouped-table 308 | suppress 309 | expected-SR-conflicts 310 | expected-RR-conflicts)))) 311 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/private-yacc/yacc-helper.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require racket/list 4 | "../private-lex/token-syntax.rkt") 5 | 6 | ;; General helper routines 7 | 8 | (provide duplicate-list? remove-duplicates overlap? vector-andmap display-yacc) 9 | 10 | (define (vector-andmap f v) 11 | (let loop ((i 0)) 12 | (cond 13 | ((= i (vector-length v)) #t) 14 | (else (if (f (vector-ref v i)) 15 | (loop (add1 i)) 16 | #f))))) 17 | 18 | ;; duplicate-list?: symbol list -> #f | symbol 19 | ;; returns a symbol that exists twice in l, or false if no such symbol 20 | ;; exists 21 | (define (duplicate-list? l) 22 | (letrec ((t (make-hasheq)) 23 | (dl? (lambda (l) 24 | (cond 25 | ((null? l) #f) 26 | ((hash-ref t (car l) #f) => 27 | (lambda (x) x)) 28 | (else 29 | (hash-set! t (car l) (car l)) 30 | (dl? (cdr l))))))) 31 | (dl? l))) 32 | 33 | ;; remove-duplicates: syntax-object list -> syntax-object list 34 | ;; removes the duplicates from the lists 35 | (define (remove-duplicates sl) 36 | (let ((t (make-hasheq))) 37 | (letrec ((x 38 | (lambda (sl) 39 | (cond 40 | ((null? sl) sl) 41 | ((hash-ref t (syntax->datum (car sl)) #f) 42 | (x (cdr sl))) 43 | (else 44 | (hash-set! t (syntax->datum (car sl)) #t) 45 | (cons (car sl) (x (cdr sl)))))))) 46 | (x sl)))) 47 | 48 | ;; overlap?: symbol list * symbol list -> #f | symbol 49 | ;; Returns an symbol in l1 intersect l2, or #f is no such symbol exists 50 | (define (overlap? l1 l2) 51 | (let/ec ret 52 | (let ((t (make-hasheq))) 53 | (for-each (lambda (s1) 54 | (hash-set! t s1 s1)) 55 | l1) 56 | (for-each (lambda (s2) 57 | (cond 58 | ((hash-ref t s2 #f) => 59 | (lambda (o) (ret o))))) 60 | l2) 61 | #f))) 62 | 63 | 64 | (define (display-yacc grammar tokens start precs port) 65 | (let-syntax ((p (syntax-rules () 66 | ((_ args ...) (fprintf port args ...))))) 67 | (let* ((tokens (map syntax-local-value tokens)) 68 | (eterms (filter e-terminals-def? tokens)) 69 | (terms (filter terminals-def? tokens)) 70 | (term-table (make-hasheq)) 71 | (display-rhs 72 | (lambda (rhs) 73 | (for-each (lambda (sym) (p "~a " (hash-ref term-table sym sym))) 74 | (car rhs)) 75 | (when (= 3 (length rhs)) 76 | (p "%prec ~a" (cadadr rhs))) 77 | (p "\n")))) 78 | (for-each 79 | (lambda (t) 80 | (for-each 81 | (lambda (t) 82 | (hash-set! term-table t (format "'~a'" t))) 83 | (syntax->datum (e-terminals-def-t t)))) 84 | eterms) 85 | (for-each 86 | (lambda (t) 87 | (for-each 88 | (lambda (t) 89 | (p "%token ~a\n" t) 90 | (hash-set! term-table t (format "~a" t))) 91 | (syntax->datum (terminals-def-t t)))) 92 | terms) 93 | (when precs 94 | (for-each (lambda (prec) 95 | (p "%~a " (car prec)) 96 | (for-each (lambda (tok) 97 | (p " ~a" (hash-ref term-table tok))) 98 | (cdr prec)) 99 | (p "\n")) 100 | precs)) 101 | (p "%start ~a\n" start) 102 | (p "%%\n") 103 | 104 | (for-each (lambda (prod) 105 | (let ((nt (car prod))) 106 | (p "~a: " nt) 107 | (display-rhs (cadr prod)) 108 | (for-each (lambda (rhs) 109 | (p "| ") 110 | (display-rhs rhs)) 111 | (cddr prod)) 112 | (p ";\n"))) 113 | grammar) 114 | (p "%%\n")))) 115 | 116 | -------------------------------------------------------------------------------- /parser-tools-lib/parser-tools/yacc-to-scheme.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | (require parser-tools/lex 3 | (prefix-in : parser-tools/lex-sre) 4 | parser-tools/yacc 5 | syntax/readerr 6 | racket/list) 7 | (provide trans) 8 | 9 | (define match-double-string 10 | (lexer 11 | ((:+ (:~ #\" #\\)) (append (string->list lexeme) 12 | (match-double-string input-port))) 13 | ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-double-string input-port))) 14 | (#\" null))) 15 | 16 | (define match-single-string 17 | (lexer 18 | ((:+ (:~ #\' #\\)) (append (string->list lexeme) 19 | (match-single-string input-port))) 20 | ((:: #\\ any-char) (cons (string-ref lexeme 1) (match-single-string input-port))) 21 | (#\' null))) 22 | 23 | (define-lex-abbrevs 24 | (letter (:or (:/ "a" "z") (:/ "A" "Z"))) 25 | (digit (:/ "0" "9")) 26 | (initial (:or letter (char-set "!$%&*/<=>?^_~@"))) 27 | (subsequent (:or initial digit (char-set "+-.@"))) 28 | (comment (:: "/*" (complement (:: any-string "*/" any-string)) "*/"))) 29 | 30 | (define-empty-tokens x 31 | (EOF PIPE |:| SEMI |%%| %prec)) 32 | (define-tokens y 33 | (SYM STRING)) 34 | 35 | (define get-token-grammar 36 | (lexer-src-pos 37 | ("%%" '|%%|) 38 | (":" (string->symbol lexeme)) 39 | ("%prec" (string->symbol lexeme)) 40 | (#\| 'PIPE) 41 | ((:+ (:or #\newline #\tab " " comment (:: "{" (:* (:~ "}")) "}"))) 42 | (return-without-pos (get-token-grammar input-port))) 43 | (#\; 'SEMI) 44 | (#\' (token-STRING (string->symbol (list->string (match-single-string input-port))))) 45 | (#\" (token-STRING (string->symbol (list->string (match-double-string input-port))))) 46 | ((:: initial (:* subsequent)) (token-SYM (string->symbol lexeme))))) 47 | 48 | (define (parse-grammar enter-term enter-empty-term enter-non-term) 49 | (parser 50 | (tokens x y) 51 | (src-pos) 52 | (error (lambda (tok-ok tok-name tok-value start-pos end-pos) 53 | (raise-read-error 54 | (format "Error Parsing YACC grammar at token: ~a with value: ~a" tok-name tok-value) 55 | (file-path) 56 | (position-line start-pos) 57 | (position-col start-pos) 58 | (position-offset start-pos) 59 | (- (position-offset end-pos) (position-offset start-pos))))) 60 | 61 | (end |%%|) 62 | (start gram) 63 | (grammar 64 | (gram 65 | ((production) (list $1)) 66 | ((production gram) (cons $1 $2))) 67 | (production 68 | ((SYM |:| prods SEMI) 69 | (begin 70 | (enter-non-term $1) 71 | (cons $1 $3)))) 72 | (prods 73 | ((rhs) (list `(,$1 #f))) 74 | ((rhs prec) (list `(,$1 ,$2 #f))) 75 | ((rhs PIPE prods) (cons `(,$1 #f) $3)) 76 | ((rhs prec PIPE prods) (cons `(,$1 ,$2 #f) $4))) 77 | (prec 78 | ((%prec SYM) 79 | (begin 80 | (enter-term $2) 81 | (list 'prec $2))) 82 | ((%prec STRING) 83 | (begin 84 | (enter-empty-term $2) 85 | (list 'prec $2)))) 86 | (rhs 87 | (() null) 88 | ((SYM rhs) 89 | (begin 90 | (enter-term $1) 91 | (cons $1 $2))) 92 | ((STRING rhs) 93 | (begin 94 | (enter-empty-term $1) 95 | (cons $1 $2))))))) 96 | 97 | (define (symbolstring a) (symbol->string b))) 99 | 100 | (define (trans filename) 101 | (let* ((i (open-input-file filename)) 102 | (terms (make-hasheq)) 103 | (eterms (make-hasheq)) 104 | (nterms (make-hasheq)) 105 | (enter-term 106 | (lambda (s) 107 | (when (not (hash-ref nterms s (lambda () #f))) 108 | (hash-set! terms s #t)))) 109 | (enter-empty-term 110 | (lambda (s) 111 | (when (not (hash-ref nterms s (lambda () #f))) 112 | (hash-set! eterms s #t)))) 113 | (enter-non-term 114 | (lambda (s) 115 | (hash-remove! terms s) 116 | (hash-remove! eterms s) 117 | (hash-set! nterms s #t)))) 118 | (port-count-lines! i) 119 | (file-path filename) 120 | (regexp-match "%%" i) 121 | (begin0 122 | (let ((gram ((parse-grammar enter-term enter-empty-term enter-non-term) 123 | (lambda () 124 | (let ((t (get-token-grammar i))) 125 | t))))) 126 | `(begin 127 | (define-tokens t ,(sort (hash-map terms (lambda (k v) k)) symbol 18 | ;; (vectorof (symbol runtime-action hashtable)) 19 | (define-for-syntax (convert-parse-table table) 20 | (list->vector 21 | (map 22 | (lambda (state-entry) 23 | (let ((ht (make-hasheq))) 24 | (for-each 25 | (lambda (gs/action) 26 | (hash-set! ht 27 | (gram-sym-symbol (car gs/action)) 28 | (action->runtime-action (cdr gs/action)))) 29 | state-entry) 30 | ht)) 31 | (vector->list table)))) 32 | 33 | (define-syntax (parser stx) 34 | (syntax-case stx () 35 | ((_ args ...) 36 | (let ((arg-list (syntax->list (syntax (args ...)))) 37 | (src-pos #f) 38 | (debug #f) 39 | (error #f) 40 | (tokens #f) 41 | (start #f) 42 | (end #f) 43 | (precs #f) 44 | (suppress #f) 45 | (expected-SR-conflicts #f) 46 | (expected-RR-conflicts #f) 47 | (grammar #f) 48 | (yacc-output #f)) 49 | (for-each 50 | (lambda (arg) 51 | (syntax-case* arg (debug error tokens start end precs grammar 52 | suppress src-pos yacc-output 53 | expected-SR-conflicts 54 | expected-RR-conflicts) 55 | (lambda (a b) 56 | (eq? (syntax-e a) (syntax-e b))) 57 | ((debug filename) 58 | (cond 59 | ((not (string? (syntax-e (syntax filename)))) 60 | (raise-syntax-error 61 | #f 62 | "Debugging filename must be a string" 63 | stx 64 | (syntax filename))) 65 | (debug 66 | (raise-syntax-error #f "Multiple debug declarations" stx)) 67 | (else 68 | (set! debug (syntax-e (syntax filename)))))) 69 | ((suppress) 70 | (set! suppress #t)) 71 | ((expected-SR-conflicts n) 72 | (set! expected-SR-conflicts (syntax-e (syntax n)))) 73 | ((expected-RR-conflicts n) 74 | (set! expected-RR-conflicts (syntax-e (syntax n)))) 75 | ((src-pos) 76 | (set! src-pos #t)) 77 | ((error expression) 78 | (if error 79 | (raise-syntax-error #f "Multiple error declarations" stx) 80 | (set! error (syntax expression)))) 81 | ((tokens def ...) 82 | (begin 83 | (when tokens 84 | (raise-syntax-error #f "Multiple tokens declarations" stx)) 85 | (let ((defs (syntax->list (syntax (def ...))))) 86 | (for-each 87 | (lambda (d) 88 | (unless (identifier? d) 89 | (raise-syntax-error 90 | #f 91 | "Token-group name must be an identifier" 92 | stx 93 | d))) 94 | defs) 95 | (set! tokens defs)))) 96 | ((start symbol ...) 97 | (let ((symbols (syntax->list (syntax (symbol ...))))) 98 | (for-each 99 | (lambda (sym) 100 | (unless (identifier? sym) 101 | (raise-syntax-error #f 102 | "Start symbol must be a symbol" 103 | stx 104 | sym))) 105 | symbols) 106 | (when start 107 | (raise-syntax-error #f "Multiple start declarations" stx)) 108 | (when (null? symbols) 109 | (raise-syntax-error #f 110 | "Missing start symbol" 111 | stx 112 | arg)) 113 | (set! start symbols))) 114 | ((end symbols ...) 115 | (let ((symbols (syntax->list (syntax (symbols ...))))) 116 | (for-each 117 | (lambda (sym) 118 | (unless (identifier? sym) 119 | (raise-syntax-error #f 120 | "End token must be a symbol" 121 | stx 122 | sym))) 123 | symbols) 124 | (let ((d (duplicate-list? (map syntax-e symbols)))) 125 | (when d 126 | (raise-syntax-error 127 | #f 128 | (format "Duplicate end token definition for ~a" d) 129 | stx 130 | arg)) 131 | (when (null? symbols) 132 | (raise-syntax-error 133 | #f 134 | "end declaration must contain at least 1 token" 135 | stx 136 | arg)) 137 | (when end 138 | (raise-syntax-error #f "Multiple end declarations" stx)) 139 | (set! end symbols)))) 140 | ((precs decls ...) 141 | (if precs 142 | (raise-syntax-error #f "Multiple precs declarations" stx) 143 | (set! precs (syntax/loc arg (decls ...))))) 144 | ((grammar prods ...) 145 | (if grammar 146 | (raise-syntax-error #f "Multiple grammar declarations" stx) 147 | (set! grammar (syntax/loc arg (prods ...))))) 148 | ((yacc-output filename) 149 | (cond 150 | ((not (string? (syntax-e (syntax filename)))) 151 | (raise-syntax-error #f 152 | "Yacc-output filename must be a string" 153 | stx 154 | (syntax filename))) 155 | (yacc-output 156 | (raise-syntax-error #f "Multiple yacc-output declarations" stx)) 157 | (else 158 | (set! yacc-output (syntax-e (syntax filename)))))) 159 | (_ (raise-syntax-error #f "argument must match (debug filename), (error expression), (tokens def ...), (start non-term), (end tokens ...), (precs decls ...), or (grammar prods ...)" stx arg)))) 160 | (syntax->list (syntax (args ...)))) 161 | (unless tokens 162 | (raise-syntax-error #f "missing tokens declaration" stx)) 163 | (unless error 164 | (raise-syntax-error #f "missing error declaration" stx)) 165 | (unless grammar 166 | (raise-syntax-error #f "missing grammar declaration" stx)) 167 | (unless end 168 | (raise-syntax-error #f "missing end declaration" stx)) 169 | (unless start 170 | (raise-syntax-error #f "missing start declaration" stx)) 171 | (let-values (((table all-term-syms actions check-syntax-fix) 172 | (build-parser (if debug debug "") 173 | src-pos 174 | suppress 175 | expected-SR-conflicts 176 | expected-RR-conflicts 177 | tokens 178 | start 179 | end 180 | precs 181 | grammar))) 182 | (when (and yacc-output (not (string=? yacc-output ""))) 183 | (with-handlers [(exn:fail:filesystem? 184 | (lambda (e) 185 | (eprintf 186 | "Cannot write yacc-output to file \"~a\"\n" 187 | yacc-output)))] 188 | (call-with-output-file yacc-output 189 | (lambda (port) 190 | (display-yacc (syntax->datum grammar) 191 | tokens 192 | (map syntax->datum start) 193 | (if precs 194 | (syntax->datum precs) 195 | #f) 196 | port)) 197 | #:exists 'truncate))) 198 | (with-syntax ((check-syntax-fix check-syntax-fix) 199 | (err error) 200 | (ends end) 201 | (starts start) 202 | (debug debug) 203 | (table (convert-parse-table table)) 204 | (all-term-syms all-term-syms) 205 | (actions actions) 206 | (src-pos src-pos)) 207 | (syntax 208 | (begin 209 | check-syntax-fix 210 | (parser-body debug err (quote starts) (quote ends) table all-term-syms actions src-pos))))))) 211 | (_ 212 | (raise-syntax-error #f 213 | "parser must have the form (parser args ...)" 214 | stx)))) 215 | 216 | (define (reduce-stack stack num ret-vals src-pos) 217 | (cond 218 | ((> num 0) 219 | (let* ((top-frame (car stack)) 220 | (ret-vals 221 | (if src-pos 222 | (cons (stack-frame-value top-frame) 223 | (cons (stack-frame-start-pos top-frame) 224 | (cons (stack-frame-end-pos top-frame) 225 | ret-vals))) 226 | (cons (stack-frame-value top-frame) ret-vals)))) 227 | (reduce-stack (cdr stack) (sub1 num) ret-vals src-pos))) 228 | (else (values stack ret-vals)))) 229 | 230 | ;; extract-helper : (symbol or make-token) any any -> symbol any any any 231 | (define (extract-helper tok v1 v2) 232 | (cond 233 | ((symbol? tok) 234 | (values tok #f v1 v2)) 235 | ((token? tok) 236 | (values (real-token-name tok) (real-token-value tok) v1 v2)) 237 | (else (raise-argument-error 'parser 238 | "(or/c symbol? token?)" 239 | 0 240 | tok)))) 241 | 242 | ;; well-formed-position-token?: any -> boolean 243 | ;; Returns true if pt is a position token whose position-token-token 244 | ;; is itself a token or a symbol. 245 | ;; This is meant to help raise more precise error messages when 246 | ;; a tokenizer produces an erroneous position-token wrapped twice. 247 | ;; (as often happens when omitting return-without-pos). 248 | (define (well-formed-position-token? pt) 249 | (and (position-token? pt) 250 | (let ([t (position-token-token pt)]) 251 | (or (symbol? t) 252 | (token? t))))) 253 | 254 | ;; extract-src-pos : position-token -> symbol any any any 255 | (define (extract-src-pos ip) 256 | (cond 257 | ((well-formed-position-token? ip) 258 | (extract-helper (position-token-token ip) 259 | (position-token-start-pos ip) 260 | (position-token-end-pos ip))) 261 | (else 262 | (raise-argument-error 'parser 263 | "well-formed-position-token?" 264 | 0 265 | ip)))) 266 | 267 | ;; extract-no-src-pos : (symbol or make-token) -> symbol any any any 268 | (define (extract-no-src-pos ip) 269 | (extract-helper ip #f #f)) 270 | 271 | (define-struct stack-frame (state value start-pos end-pos) #:inspector (make-inspector)) 272 | 273 | (define (make-empty-stack i) (list (make-stack-frame i #f #f #f))) 274 | 275 | ;; The table is a vector that maps each state to a hash-table that maps a 276 | ;; terminal symbol to either an accept, shift, reduce, or goto structure. 277 | ; We encode the structures according to the runtime-action data definition in 278 | ;; parser-actions.rkt 279 | (define (parser-body debug? given-err starts ends table all-term-syms actions src-pos) 280 | (define (pre-err stack-excerpt . more) 281 | (define-values (req opts) (procedure-keywords given-err)) 282 | (if (or (not opts) (member '#:stack opts)) 283 | (apply given-err #:stack stack-excerpt more) 284 | (apply given-err more))) 285 | (define (err stack tok-ok? tok val start-pos end-pos) 286 | (define stack-excerpt 287 | (map (λ (sf) (cons (stack-frame-state sf) (stack-frame-value sf))) 288 | stack)) 289 | (if src-pos 290 | (pre-err stack-excerpt tok-ok? tok val start-pos end-pos) 291 | (pre-err stack-excerpt tok-ok? tok val))) 292 | (local ((define extract 293 | (if src-pos 294 | extract-src-pos 295 | extract-no-src-pos)) 296 | 297 | (define (fix-error stack tok val start-pos end-pos get-token) 298 | (when debug? (pretty-print stack)) 299 | (local ((define (remove-input tok val start-pos end-pos) 300 | (if (memq tok ends) 301 | (raise-read-error "parser: Cannot continue after error" 302 | #f #f #f #f #f) 303 | (let ((a (find-action stack tok val start-pos end-pos))) 304 | (cond 305 | ((runtime-shift? a) 306 | ;; (printf "shift:~a\n" (runtime-shift-state a)) 307 | (cons (make-stack-frame (runtime-shift-state a) 308 | val 309 | start-pos 310 | end-pos) 311 | stack)) 312 | (else 313 | ;; (printf "discard input:~a\n" tok) 314 | (let-values (((tok val start-pos end-pos) 315 | (extract (get-token)))) 316 | (remove-input tok val start-pos end-pos)))))))) 317 | (let remove-states () 318 | (let ((a (find-action stack 'error #f start-pos end-pos))) 319 | (cond 320 | ((runtime-shift? a) 321 | ;; (printf "shift:~a\n" (runtime-shift-state a)) 322 | (set! stack 323 | (cons 324 | (make-stack-frame (runtime-shift-state a) 325 | #f 326 | start-pos 327 | end-pos) 328 | stack)) 329 | (remove-input tok val start-pos end-pos)) 330 | (else 331 | ;; (printf "discard state:~a\n" (car stack)) 332 | (cond 333 | ((< (length stack) 2) 334 | (raise-read-error "parser: Cannot continue after error" 335 | #f #f #f #f #f)) 336 | (else 337 | (set! stack (cdr stack)) 338 | (remove-states))))))))) 339 | 340 | (define (find-action stack tok val start-pos end-pos) 341 | (unless (hash-ref all-term-syms 342 | tok 343 | #f) 344 | (err stack #f tok val start-pos end-pos) 345 | (raise-read-error (format "parser: got token of unknown type ~a" tok) 346 | #f #f #f #f #f)) 347 | (hash-ref (vector-ref table (stack-frame-state (car stack))) 348 | tok 349 | #f)) 350 | 351 | (define (make-parser start-number) 352 | (lambda (get-token) 353 | (unless (and (procedure? get-token) 354 | (procedure-arity-includes? get-token 0)) 355 | (error 'get-token "expected a nullary procedure, got ~e" get-token)) 356 | (let parsing-loop ((stack (make-empty-stack start-number)) 357 | (ip (get-token))) 358 | (let-values (((tok val start-pos end-pos) 359 | (extract ip))) 360 | (let ((action (find-action stack tok val start-pos end-pos))) 361 | (cond 362 | ((runtime-shift? action) 363 | ;; (printf "shift:~a\n" (runtime-shift-state action)) 364 | (parsing-loop (cons (make-stack-frame (runtime-shift-state action) 365 | val 366 | start-pos 367 | end-pos) 368 | stack) 369 | (get-token))) 370 | ((runtime-reduce? action) 371 | ;; (printf "reduce:~a\n" (runtime-reduce-prod-num action)) 372 | (let-values (((new-stack args) 373 | (reduce-stack stack 374 | (runtime-reduce-rhs-length action) 375 | null 376 | src-pos))) 377 | (let ((goto 378 | (runtime-goto-state 379 | (hash-ref 380 | (vector-ref table (stack-frame-state (car new-stack))) 381 | (runtime-reduce-lhs action))))) 382 | (parsing-loop 383 | (cons 384 | (if src-pos 385 | (make-stack-frame 386 | goto 387 | (apply (vector-ref actions (runtime-reduce-prod-num action)) args) 388 | (if (null? args) start-pos (cadr args)) 389 | (if (null? args) 390 | end-pos 391 | (list-ref args (- (* (runtime-reduce-rhs-length action) 3) 1)))) 392 | (make-stack-frame 393 | goto 394 | (apply (vector-ref actions (runtime-reduce-prod-num action)) args) 395 | #f 396 | #f)) 397 | new-stack) 398 | ip)))) 399 | ((runtime-accept? action) 400 | ;; (printf "accept\n") 401 | (stack-frame-value (car stack))) 402 | (else 403 | (err stack #t tok val start-pos end-pos) 404 | (parsing-loop (fix-error stack tok val start-pos end-pos get-token) 405 | (get-token)))))))))) 406 | (cond 407 | ((null? (cdr starts)) (make-parser 0)) 408 | (else 409 | (let loop ((l starts) 410 | (i 0)) 411 | (cond 412 | ((null? l) null) 413 | (else (cons (make-parser i) (loop (cdr l) (add1 i)))))))))) 414 | -------------------------------------------------------------------------------- /parser-tools/LICENSE.txt: -------------------------------------------------------------------------------- 1 | parser-tools 2 | Copyright (c) 2010-2014 PLT Design Inc. 3 | 4 | This package is distributed under the GNU Lesser General Public 5 | License (LGPL). This means that you can link this package into proprietary 6 | applications, provided you follow the rules stated in the LGPL. You 7 | can also modify this package; if you distribute a modified version, 8 | you must distribute it under the terms of the LGPL, which in 9 | particular means that you must release the source code for the 10 | modified software. See http://www.gnu.org/copyleft/lesser.html 11 | for more information. 12 | -------------------------------------------------------------------------------- /parser-tools/info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define collection 'multi) 4 | 5 | (define deps '("parser-tools-lib" 6 | "parser-tools-doc")) 7 | (define implies '("parser-tools-lib" 8 | "parser-tools-doc")) 9 | 10 | (define pkg-desc "Lex- and Yacc-style parsing tools") 11 | 12 | (define pkg-authors '(mflatt)) 13 | 14 | (define license 15 | '(Apache-2.0 OR MIT)) 16 | --------------------------------------------------------------------------------