├── src ├── package.lisp ├── util.lisp ├── tokenize.lisp └── parse.lisp ├── parse-js.asd ├── as.txt ├── LICENSE └── index.html /src/package.lisp: -------------------------------------------------------------------------------- 1 | (cl:defpackage #:parse-js 2 | (:use #:cl) 3 | (:export #:token-type #:token-value #:token-line #:token-char #:token-pos 4 | #:token-newline-before #:token-comments-before 5 | #:lex-js #:parse-js #:parse-js-string #:read-js-number 6 | #:js-parse-error #:js-parse-error-line #:js-parse-error-char 7 | #:*check-for-reserved-words* #:*ecma-version*)) 8 | -------------------------------------------------------------------------------- /parse-js.asd: -------------------------------------------------------------------------------- 1 | (asdf:defsystem #:parse-js 2 | :description "JavaScript parser" 3 | :author "Marijn Haverbeke " 4 | :license "BSD" 5 | :components 6 | ((:module :src 7 | :components ((:file "package") 8 | (:file "util" :depends-on ("package")) 9 | (:file "tokenize" :depends-on ("util")) 10 | (:file "parse" :depends-on ("tokenize")))))) 11 | -------------------------------------------------------------------------------- /src/util.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:parse-js) 2 | 3 | (defmacro with-defs (&body body) 4 | (loop :for form :in body 5 | :if (and (eq (car form) 'def) (< (length form) 4)) 6 | :collect (cadr form) :into vars :and 7 | :if (caddr form) :collect `(setf ,(cadr form) ,(caddr form)) :into body :end 8 | :else :if (eq (car form) 'def) 9 | :collect (cdr form) :into funcs 10 | :else 11 | :collect form :into body 12 | :finally (return `(let ,vars (labels ,funcs ,@body))))) 13 | 14 | (defmacro defun/defs (name args &body body) 15 | `(defun ,name ,args (with-defs ,@body))) 16 | -------------------------------------------------------------------------------- /as.txt: -------------------------------------------------------------------------------- 1 | (:atom atom) 2 | (:num num) 3 | (:string str) 4 | (:name name) 5 | (:array elems) 6 | (:object properties) 7 | (:regexp expr flags) 8 | 9 | (:assign op place val) 10 | (:binary op lhs rhs) 11 | (:unary-postfix op place) 12 | (:unary-prefix op place) 13 | (:call func args) 14 | (:dot obj attr) 15 | (:sub obj attr) 16 | (:seq form1 result) 17 | (:conditional test then else) 18 | (:function name args stat*) 19 | (:new func args) 20 | 21 | (:toplevel stat*) 22 | (:block stat*) 23 | (:stat form) 24 | (:label name form) 25 | (:if test then else) 26 | (:with obj body) 27 | (:var bindings) 28 | (:defun name args stat*) 29 | (:return value) 30 | (:debugger) 31 | 32 | (:try body catch finally) 33 | (:throw expr) 34 | 35 | (:break label) 36 | (:continue label) 37 | (:while cond body) 38 | (:do cond body) 39 | (:for init cond step body) 40 | (:for-in init lhs obj body) 41 | (:switch val (case . body)*) 42 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) Marijn Haverbeke, marijnh@gmail.com 2 | 3 | This software is provided 'as-is', without any express or implied 4 | warranty. In no event will the authors be held liable for any 5 | damages arising from the use of this software. 6 | 7 | Permission is granted to anyone to use this software for any 8 | purpose, including commercial applications, and to alter it and 9 | redistribute it freely, subject to the following restrictions: 10 | 11 | 1. The origin of this software must not be misrepresented; you must 12 | not claim that you wrote the original software. If you use this 13 | software in a product, an acknowledgment in the product 14 | documentation would be appreciated but is not required. 15 | 16 | 2. Altered source versions must be plainly marked as such, and must 17 | not be misrepresented as being the original software. 18 | 19 | 3. This notice may not be removed or altered from any source 20 | distribution. 21 | -------------------------------------------------------------------------------- /index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | parse-js / A JavaScript parser 4 | 5 | 25 | 26 | 27 |

parse-js

28 | 29 |

parse-js is a Common Lisp package for parsing 30 | JavaScript — ECMAScript 32 | 3, to be more precise. It is released under a zlib-style licence. For any feedback, contact me: Marijn Haverbeke.

35 | 36 |

The library can be downloaded, 38 | checked out from the git repository, 40 | or installed with asdf-install.

42 | 43 |

News

44 | 45 |

07-02-2013: New release. More corner-case bugs fixed. 46 | Representation of for-in nodes changed to accomodate things 47 | like for (x.y in z). Array element expressions may 48 | now be nil, when parsing a literal like [1,,3].

49 | 50 |

03-01-2011: New release. Lots of conformance fixes, 51 | driven 52 | by CL-JavaScript 53 | and UglifyJS 54 | work. parse-js-string is deprecated now 55 | (parse-js accepts strings), and basic support for 56 | ECMAScript 5 has been added.

57 | 58 |

11-06-2010: Move from darcs to git for version 59 | control, update release tarball.

60 | 61 |

Reference

62 | 63 |

64 | function parse-js (input &key ecma-version strict-semicolons reserved-words) 65 |
→ syntax-tree 66 |

67 | 68 |

Reads a program from a string or a stream, and 69 | produces an abstract syntax tree, which is a nested structure 70 | consisting of lists starting with keywords. The exact format of 71 | this structure is not very well documented, but the file as.txt gives a basic description.

73 | 74 |

The keyword arguments can be used to influence the 75 | parsing mode. emca-version can be 3 or 76 | 5, and influences the standard that is followed. The 77 | default is 3. Support for version 5 is incomplete at this time. 78 | When strict-semicolons is true, the parser will 79 | complain about missing semicolons, even when they would have been 80 | inserted by 'automatic semicolon insertion' rules. Finally, if 81 | reserved-words is true, the parser will complain 82 | about 'future reserved words', such as class being 83 | used.

84 | 85 |

86 | class js-parse-error 87 |

88 | 89 |

The type of errors raised when invalid input is 90 | encountered. Inherits from simple-error, 92 | and has js-parse-error-line and 93 | js-parse-error-char accessors that can be used 94 | to read the location at which the error occurred.

95 | 96 |

97 | function lex-js (stream) 98 |
→ function 99 |

100 | 101 |

A JavaScript tokeniser. The function returned can 102 | be called repeatedly to read the next token object. See below for 103 | a description of these objects. When the end of the stream is 104 | reached, tokens with type :eof are returned.

105 | 106 |

107 | function token-type (token) 108 |
→ keyword 109 |

110 | 111 |

Reader for the type of token objects. Types are 112 | keywords (one of :num :punc :string :operator :name :atom 113 | :keyword :eof).

114 | 115 |

116 | function token-value (token) 117 |
→ value 118 |

119 | 120 |

Reader for the content of token objects. The type 121 | of this value depends on the type of the token ― it holds 122 | strings for names, for example, and numbers for number tokens.

123 | 124 |

125 | function token-line (token) 126 |
→ number 127 |

128 | 129 |

The line on which a token was read.

130 | 131 |

132 | function token-char (token) 133 |
→ number 134 |

135 | 136 |

The character at which a token starts.

137 | 138 | 139 | -------------------------------------------------------------------------------- /src/tokenize.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:parse-js) 2 | 3 | (defstruct token type value line char pos newline-before comments-before) 4 | (defun tokenp (token type value) 5 | (and (eq (token-type token) type) 6 | (eql (token-value token) value))) 7 | (defun token-type-p (token type) 8 | (eq (token-type token) type)) 9 | (defun token-id (token) 10 | (token-value token)) 11 | 12 | (defvar *line*) 13 | (defvar *char*) 14 | (defvar *position*) 15 | 16 | (define-condition js-parse-error (simple-error) 17 | ((line :initform *line* :reader js-parse-error-line) 18 | (char :initform *char* :reader js-parse-error-char))) 19 | (defmethod print-object ((err js-parse-error) stream) 20 | (call-next-method) 21 | (format stream " (line ~a, character ~a)" (js-parse-error-line err) (js-parse-error-char err))) 22 | (defun js-parse-error (control &rest args) 23 | (error 'js-parse-error :format-control control :format-arguments args)) 24 | 25 | (defparameter *operator-chars* "+-*&%=<>!?|~^") 26 | (defparameter *operators* 27 | (let ((ops (make-hash-table :test 'equal))) 28 | (dolist (op '(:in :instanceof :typeof :new :void :delete :++ :-- :+ :- :! :~ :& :|\|| :^ :* :/ :% 29 | :>> :<< :>>> :< :> :<= :>= :== :=== :!= :!== :? := :+= :-= :/= :*= :%= :>>= :<<= 30 | :>>>= :~= :%= :|\|=| :^= :&= :&& :|\|\||)) 31 | (setf (gethash (string-downcase (string op)) ops) op)) 32 | ops)) 33 | 34 | (defparameter *whitespace-chars* 35 | (concatenate '(vector character) (list #\space #\tab #.(code-char 11) #\page #\return #\newline 36 | (code-char #xa0) (code-char #x2028) (code-char #x2029)))) 37 | (defparameter *line-terminators* 38 | (concatenate '(vector character) (list #\newline #\return (code-char #x2028) (code-char #x2029)))) 39 | 40 | (defparameter *keywords* 41 | (let ((keywords (make-hash-table :test 'equal))) 42 | (dolist (word '(:break :case :catch :continue :debugger :default :delete :do :else :false 43 | :finally :for :function :if :in :instanceof :new :null :return :switch 44 | :throw :true :try :typeof :var :void :while :with)) 45 | (setf (gethash (string-downcase (string word)) keywords) word)) 46 | keywords)) 47 | (defparameter *keywords-before-expression* '(:return :new :delete :throw :else :case)) 48 | (defparameter *atom-keywords* '(:false :null :true :undefined)) 49 | (defparameter *reserved-words-ecma-3* 50 | (let ((words (make-hash-table :test 'equal))) 51 | (dolist (word '("abstract" "enum" "int" "short" "boolean" "export" "interface" "static" 52 | "byte" "extends" "long" "super" "char" "final" "native" "synchronized" 53 | "class" "float" "package" "throws" "const" "goto" "private" "transient" 54 | "debugger" "implements" "protected" "volatile" "double" "import" "public")) 55 | (setf (gethash word words) t)) 56 | words)) 57 | (defparameter *reserved-words-ecma-5* 58 | (let ((words (make-hash-table :test 'equal))) 59 | (dolist (word '("class" "enum" "extends" "super" "const" "export" "import")) 60 | (setf (gethash word words) t)) 61 | words)) 62 | (defparameter *check-for-reserved-words* nil) 63 | (defparameter *ecma-version* 3) 64 | 65 | (defun read-js-number (stream &key junk-allowed) 66 | (check-type stream stream) 67 | (flet ((peek-1 () (peek-char nil stream nil nil)) 68 | (next-1 () (read-char stream nil nil))) 69 | (read-js-number-1 #'peek-1 #'next-1 :junk-allowed junk-allowed))) 70 | 71 | (defun read-js-number-1 (peek next &key junk-allowed) 72 | (labels ((digits (radix) 73 | (with-output-to-string (out) 74 | (loop :for ch := (funcall peek) :while (and ch (digit-char-p ch radix)) :do 75 | (write-char (funcall next) out))))) 76 | (let ((minus (case (funcall peek) (#\+ (funcall next) nil) (#\- (funcall next) t))) 77 | (body (digits 10)) 78 | (*read-default-float-format* 'double-float)) 79 | (flet ((ret (x) 80 | (return-from read-js-number-1 81 | (and x (or junk-allowed (eq (funcall peek) nil)) (if minus (if (eq x :infinity) :-infinity (- x)) x))))) 82 | (cond ((and (equal body "0") (find (funcall peek) "xX") (funcall next)) 83 | (ret (parse-integer (digits 16) :junk-allowed t :radix 16))) 84 | ((find (funcall peek) ".eE") 85 | (let ((base (if (string= body "") 0 (parse-integer body))) 86 | (expt 0) (expt-neg nil)) 87 | (if (and (eql (funcall peek) #\.) (funcall next)) 88 | (let ((digs (digits 10))) 89 | (if (string= digs "") 90 | (when (string= body "") (ret nil)) 91 | (loop (handler-case 92 | (return (incf base (/ (parse-integer digs) (expt 10d0 (length digs))))) 93 | (floating-point-overflow () (setf digs (subseq digs 0 (1- (length digs))))))))) 94 | (when (equal body "") (ret nil))) 95 | (when (and (find (funcall peek) "eE") (funcall next)) 96 | (setf expt-neg (and (find (funcall peek) "+-") (eql (funcall next) #\-))) 97 | (let ((digs (digits 10))) 98 | (when (equal digs "") (ret nil)) 99 | (setf expt (parse-integer digs)))) 100 | (handler-case (ret (* base (expt 10d0 (if expt-neg (- expt) expt)))) 101 | (floating-point-overflow () (ret :infinity)) 102 | (floating-point-underflow () (ret 0d0))))) 103 | ((equal body "") (ret nil)) 104 | ((and (char= (char body 0) #\0) 105 | (loop :for i :from 1 :below (length body) :do 106 | (unless (digit-char-p (char body i)) (return nil)) 107 | :finally (return t))) 108 | (ret (parse-integer body :radix 8))) 109 | ((equal body "") (ret nil)) 110 | (t (ret (parse-integer body)))))))) 111 | 112 | (defun/defs lex-js (stream &key include-comments) 113 | (def expression-allowed t) 114 | (def newline-before nil) 115 | (def line 1) 116 | (def char 0) 117 | (def position 0) 118 | (def comments-before nil) 119 | 120 | (def start-token () 121 | (setf *line* line 122 | *char* char 123 | *position* position)) 124 | (def token (type value) 125 | (setf expression-allowed 126 | (or (and (eq type :operator) 127 | (not (member value '("++" "--") :test #'string=))) 128 | (and (eq type :keyword) 129 | (member value *keywords-before-expression*)) 130 | (and (eq type :punc) 131 | (find value "[{(,.;:")))) 132 | (prog1 (make-token :type type :value value :line *line* :char *char* :pos *position* 133 | :newline-before newline-before 134 | :comments-before (reverse comments-before)) 135 | (setf newline-before nil) 136 | (setf comments-before nil))) 137 | 138 | (def peek () 139 | (peek-char nil stream nil)) 140 | (def next (&optional eof-error in-string) 141 | (let ((ch (read-char stream eof-error))) 142 | (when ch 143 | (incf position) 144 | (if (find ch *line-terminators*) 145 | (progn 146 | (setf line (1+ line) char 0) 147 | (unless in-string (setf newline-before t))) 148 | (incf char))) 149 | ch)) 150 | 151 | (def skip-whitespace () 152 | (loop :for ch := (peek) 153 | :while (and ch (find ch *whitespace-chars*)) 154 | :do (next))) 155 | (def read-while (pred) 156 | (with-output-to-string (*standard-output*) 157 | (loop :for ch := (peek) 158 | :while (and ch (funcall pred ch)) 159 | :do (princ (next))))) 160 | 161 | (def read-num (&optional start) 162 | (let ((num (or (read-js-number-1 (lambda () (if start start (peek))) 163 | (lambda () (if start (prog1 start (setf start nil)) (next))) 164 | :junk-allowed t) 165 | (js-parse-error "Invalid syntax.")))) 166 | (token :num num))) 167 | 168 | (def handle-dot () 169 | (next) 170 | (if (digit-char-p (peek)) 171 | (read-num #\.) 172 | (token :punc #\.))) 173 | 174 | (def hex-bytes (n char) 175 | (loop :with num := 0 176 | :for pos :from (1- n) :downto 0 177 | :do (let ((digit (digit-char-p (next t) 16))) 178 | (if digit 179 | (incf num (* digit (expt 16 pos))) 180 | (js-parse-error "Invalid \\~a escape pattern." char))) 181 | :finally (return num))) 182 | (def read-escaped-char (&optional in-string) 183 | (let ((ch (next t in-string))) 184 | (case ch 185 | (#\n #\newline) (#\r #\return) (#\t #\tab) 186 | (#\b #\backspace) (#\v #.(code-char 11)) (#\f #\page) (#\0 #\null) 187 | (#\x (code-char (hex-bytes 2 #\x))) 188 | (#\u (code-char (hex-bytes 4 #\u))) 189 | (#\newline nil) 190 | (t (let ((num (digit-char-p ch 8))) 191 | (if num 192 | (loop :for nx := (digit-char-p (peek) 8) :do 193 | (when (or (not nx) (>= num 32)) (return (code-char num))) 194 | (next) 195 | (setf num (+ nx (* num 8)))) 196 | ch)))))) 197 | (def read-string () 198 | (let ((quote (next))) 199 | (handler-case 200 | (token :string 201 | (with-output-to-string (*standard-output*) 202 | (loop (let ((ch (next t))) 203 | (cond ((eql ch #\\) (let ((ch (read-escaped-char t))) (when ch (write-char ch)))) 204 | ((find ch *line-terminators*) (js-parse-error "Line terminator inside of string.")) 205 | ((eql ch quote) (return)) 206 | (t (write-char ch))))))) 207 | (end-of-file () (js-parse-error "Unterminated string constant."))))) 208 | 209 | (def add-comment (type c) 210 | (when include-comments 211 | ;; doing this instead of calling (token) as we don't want 212 | ;; to put comments-before into a comment token 213 | (push (make-token :type type 214 | :value c 215 | :line *line* 216 | :char *char* 217 | :pos *position* 218 | :newline-before newline-before) 219 | comments-before))) 220 | 221 | (def read-line-comment () 222 | (next) 223 | (if include-comments 224 | (add-comment :comment1 225 | (with-output-to-string (out) 226 | (loop :for ch := (next) 227 | :until (or (find ch *line-terminators*) (not ch)) 228 | :do (write-char ch out)))) 229 | (loop :for ch := (next) 230 | :until (or (find ch *line-terminators*) (not ch))))) 231 | 232 | (def read-multiline-comment () 233 | (next) 234 | (if include-comments 235 | (add-comment :comment2 236 | (with-output-to-string (out) 237 | (loop :with star := nil 238 | :for ch := (or (next) (js-parse-error "Unterminated comment.")) 239 | :until (and star (eql ch #\/)) 240 | :do 241 | (setf star (eql ch #\*)) 242 | (write-char ch out)))) 243 | (loop :with star := nil 244 | :for ch := (or (next) (js-parse-error "Unterminated comment.")) 245 | :until (and star (eql ch #\/)) 246 | :do (setf star (eql ch #\*))))) 247 | 248 | (def read-regexp () 249 | (handler-case 250 | (token :regexp 251 | (cons 252 | (with-output-to-string (*standard-output*) 253 | (loop :with backslash := nil :with inset := nil 254 | :for ch := (next t) :until (and (not backslash) (not inset) (eql ch #\/)) :do 255 | (unless backslash 256 | (when (eql ch #\[) (setf inset t)) 257 | (when (and inset (not backslash) (eql ch #\])) (setf inset nil))) 258 | (setf backslash (and (eql ch #\\) (not backslash))) 259 | ;; Handle \u sequences, since CL-PPCRE does not understand them. 260 | (if (and backslash (eql (peek) #\u)) 261 | (let* ((code (progn 262 | (setf backslash nil) 263 | (next) 264 | (hex-bytes 4 #\u))) 265 | (ch (code-char code))) 266 | ;; on CCL, parsing /\uFFFF/ fails because (code-char #xFFFF) returns NIL. 267 | ;; so when NIL, we better use the original sequence. 268 | (if ch 269 | (write-char ch) 270 | (format t "\\u~4,'0X" code))) 271 | (write-char ch)))) 272 | (read-while #'identifier-char-p))) 273 | (end-of-file () (js-parse-error "Unterminated regular expression.")))) 274 | 275 | (def read-operator (&optional start) 276 | (labels ((grow (str) 277 | (let ((bigger (concatenate 'string str (string (peek))))) 278 | (if (gethash bigger *operators*) 279 | (progn (next) (grow bigger)) 280 | (token :operator (gethash str *operators*)))))) 281 | (grow (or start (string (next)))))) 282 | 283 | (def handle-slash () 284 | (next) 285 | (case (peek) 286 | (#\/ (read-line-comment) 287 | (next-token)) 288 | (#\* (read-multiline-comment) 289 | (next-token)) 290 | (t (if expression-allowed 291 | (read-regexp) 292 | (read-operator "/"))))) 293 | 294 | (def identifier-char-p (ch) (or (and (alphanumericp ch) (not (find ch *whitespace-chars*))) (eql ch #\$) (eql ch #\_))) 295 | (def read-word () 296 | (let* ((unicode-escape nil) 297 | (word (with-output-to-string (*standard-output*) 298 | (loop :for ch := (peek) :do 299 | (cond ((eql ch #\\) 300 | (next) 301 | (unless (eql (next) #\u) (js-parse-error "Unrecognized escape in identifier.")) 302 | (write-char (code-char (hex-bytes 4 #\u))) 303 | (setf unicode-escape t)) 304 | ((and ch (identifier-char-p ch)) (write-char (next))) 305 | (t (return)))))) 306 | (keyword (and (not unicode-escape) (gethash word *keywords*)))) 307 | (cond ((and *check-for-reserved-words* (not unicode-escape) 308 | (gethash word (ecase *ecma-version* (3 *reserved-words-ecma-3*) (5 *reserved-words-ecma-5*)))) 309 | (js-parse-error "'~a' is a reserved word." word)) 310 | ((not keyword) (token :name word)) 311 | ((gethash word *operators*) (token :operator keyword)) 312 | ((member keyword *atom-keywords*) (token :atom keyword)) 313 | (t (token :keyword keyword))))) 314 | 315 | (def next-token (&optional force-regexp) 316 | (if force-regexp 317 | (read-regexp) 318 | (progn 319 | (skip-whitespace) 320 | (start-token) 321 | (let ((next (peek))) 322 | (cond ((not next) (token :eof "EOF")) 323 | ((digit-char-p next) (read-num)) 324 | ((find next "'\"") (read-string)) 325 | ((eql next #\.) (handle-dot)) 326 | ((find next "[]{}(),;:") (token :punc (next))) 327 | ((eql next #\/) (handle-slash)) 328 | ((find next *operator-chars*) (read-operator)) 329 | ((or (identifier-char-p next) (eql next #\\)) (read-word)) 330 | (t (js-parse-error "Unexpected character '~a'." next))))))) 331 | 332 | #'next-token) 333 | -------------------------------------------------------------------------------- /src/parse.lisp: -------------------------------------------------------------------------------- 1 | (in-package #:parse-js) 2 | 3 | (defparameter *unary-prefix* '(:typeof :void :delete :-- :++ :! :~ :- :+)) 4 | (defparameter *unary-postfix* '(:-- :++)) 5 | (defparameter *assignment* 6 | (let ((assign (make-hash-table))) 7 | (dolist (op '(:+= :-= :/= :*= :%= :>>= :<<= :>>>= :|\|=| :^= :&=)) 8 | (setf (gethash op assign) (intern (subseq (string op) 0 (1- (length (string op)))) :keyword))) 9 | (setf (gethash := assign) t) 10 | assign)) 11 | 12 | (defparameter *precedence* 13 | (let ((precs (make-hash-table))) 14 | (loop :for ops :in '((:|\|\||) (:&&) (:|\||) (:^) (:&) (:== :=== :!= :!==) 15 | (:< :> :<= :>= :in :instanceof) (:>> :<< :>>>) (:+ :-) (:* :/ :%)) 16 | :for n :from 1 17 | :do (dolist (op ops) (setf (gethash op precs) n))) 18 | precs)) 19 | 20 | (defparameter *in-function* nil) 21 | (defparameter *label-scope* nil) 22 | (defmacro with-label-scope (type label &body body) 23 | `(let ((*label-scope* (cons (cons ,type ,label) *label-scope*))) ,@body)) 24 | 25 | (defun parse-js (input &key strict-semicolons (ecma-version 3) reserved-words) 26 | (check-type input (or string stream)) 27 | (check-type ecma-version (member 3 5)) 28 | (let ((*ecma-version* ecma-version) 29 | (*check-for-reserved-words* reserved-words) 30 | (*line* 0) 31 | (*char* 0) 32 | (*position* 0)) 33 | (if (stringp input) 34 | (with-input-from-string (in input) (parse-js* in strict-semicolons)) 35 | (parse-js* input strict-semicolons)))) 36 | 37 | (defun/defs parse-js* (stream &optional strict-semicolons) 38 | (def input (if (functionp stream) stream (lex-js stream))) 39 | (def token (funcall input)) 40 | (def peeked nil) 41 | 42 | (def peek () 43 | (or peeked (setf peeked (funcall input)))) 44 | (def next () 45 | (if peeked 46 | (setf token peeked peeked nil) 47 | (setf token (funcall input))) 48 | token) 49 | (def skip (n) 50 | (dotimes (i n) (next))) 51 | 52 | (def token-error (token control &rest args) 53 | (let ((*line* (token-line token)) (*char* (token-char token))) 54 | (apply #'js-parse-error control args))) 55 | (def error* (control &rest args) 56 | (apply #'token-error token control args)) 57 | (def unexpected (token) 58 | (token-error token "Unexpected token '~a'." (token-id token))) 59 | 60 | (def expect-token (type val) 61 | (if (tokenp token type val) 62 | (next) 63 | (error* "Unexpected token '~a', expected '~a'." (token-id token) val))) 64 | (def expect (punc) 65 | (expect-token :punc punc)) 66 | (def expect-key (keyword) 67 | (expect-token :keyword keyword)) 68 | (def can-insert-semicolon () 69 | (and (not strict-semicolons) 70 | (or (token-newline-before token) 71 | (token-type-p token :eof) 72 | (tokenp token :punc #\})))) 73 | (def semicolonp () (tokenp token :punc #\;)) 74 | (def semicolon () 75 | (cond ((semicolonp) (next)) 76 | ((not (can-insert-semicolon)) (unexpected token)))) 77 | 78 | (def as (type &rest args) 79 | (cons type args)) 80 | 81 | (def parenthesised () 82 | (expect #\() (prog1 (expression) (expect #\)))) 83 | 84 | (def statement (&optional label) 85 | ;; if expecting a statement and found a slash as operator, 86 | ;; it must be a literal regexp. 87 | (when (and (eq (token-type token) :operator) 88 | (eq (token-value token) :/)) 89 | (setf peeked nil 90 | token (funcall input t))) 91 | (case (token-type token) 92 | ((:num :string :regexp :operator :atom) (simple-statement)) 93 | (:name (if (tokenp (peek) :punc #\:) 94 | (let ((label (prog1 (token-value token) (skip 2)))) 95 | (as :label label (with-label-scope :label label (statement label)))) 96 | (simple-statement))) 97 | (:punc (case (token-value token) 98 | (#\{ (next) (block*)) 99 | ((#\[ #\() (simple-statement)) 100 | (#\; (next) (as :block ())) 101 | (t (unexpected token)))) 102 | (:keyword 103 | (case (prog1 (token-value token) (next)) 104 | (:break (break/cont :break)) 105 | (:continue (break/cont :continue)) 106 | (:debugger (semicolon) (as :debugger)) 107 | (:do (let ((body (with-label-scope :loop label (statement)))) 108 | (expect-key :while) 109 | (prog1 (as :do (parenthesised) body) 110 | (semicolon)))) 111 | (:for (for* label)) 112 | (:function (function* t)) 113 | (:if (if*)) 114 | (:return (unless *in-function* (error* "'return' outside of function.")) 115 | (as :return 116 | (cond ((semicolonp) (next) nil) 117 | ((can-insert-semicolon) nil) 118 | (t (prog1 (expression) (semicolon)))))) 119 | (:switch (let ((val (parenthesised)) 120 | (cases nil)) 121 | (with-label-scope :switch label 122 | (expect #\{) 123 | (loop :until (tokenp token :punc #\}) :do 124 | (case (token-value token) 125 | (:case (next) 126 | (push (cons (prog1 (expression) (expect #\:)) nil) cases)) 127 | (:default (next) (expect #\:) (push (cons nil nil) cases)) 128 | (t (unless cases (unexpected token)) 129 | (push (statement) (cdr (car cases)))))) 130 | (next) 131 | (as :switch val (loop :for case :in (nreverse cases) :collect 132 | (cons (car case) (nreverse (cdr case)))))))) 133 | (:throw (let ((ex (expression))) (semicolon) (as :throw ex))) 134 | (:try (try*)) 135 | (:var (prog1 (var*) (semicolon))) 136 | (:while (as :while (parenthesised) (with-label-scope :loop label (statement)))) 137 | (:with (as :with (parenthesised) (statement))) 138 | (t (unexpected token)))) 139 | (t (unexpected token)))) 140 | 141 | (def simple-statement () 142 | (let ((exp (expression))) 143 | (semicolon) 144 | (as :stat exp))) 145 | 146 | (def break/cont (type) 147 | (as type (cond ((or (and (semicolonp) (next)) (can-insert-semicolon)) 148 | (unless (loop :for (ltype) :in *label-scope* :do 149 | (when (or (eq ltype :loop) (and (eq type :break) (eq ltype :switch))) 150 | (return t))) 151 | (error* "'~a' not inside a loop or switch." type)) 152 | nil) 153 | ((token-type-p token :name) 154 | (let ((name (token-value token))) 155 | (ecase type 156 | (:break (unless (some (lambda (lb) (equal (cdr lb) name)) *label-scope*) 157 | (error* "Labeled 'break' without matching labeled statement."))) 158 | (:continue (unless (find (cons :loop name) *label-scope* :test #'equal) 159 | (error* "Labeled 'continue' without matching labeled loop.")))) 160 | (next) (semicolon) 161 | name))))) 162 | 163 | (def block* () 164 | (prog1 (as :block (loop :until (tokenp token :punc #\}) 165 | :collect (statement))) 166 | (next))) 167 | 168 | (def for-in (label init lhs) 169 | (let ((obj (progn (next) (expression)))) 170 | (expect #\)) 171 | (as :for-in init lhs obj (with-label-scope :loop label (statement))))) 172 | 173 | (def regular-for (label init) 174 | (expect #\;) 175 | (let ((test (prog1 (unless (semicolonp) (expression)) (expect #\;))) 176 | (step (if (tokenp token :punc #\)) nil (expression)))) 177 | (expect #\)) 178 | (as :for init test step (with-label-scope :loop label (statement))))) 179 | 180 | (def for* (label) 181 | (expect #\() 182 | (cond ((semicolonp) (regular-for label nil)) 183 | ((tokenp token :keyword :var) 184 | (let* ((var (progn (next) (var* t))) 185 | (defs (second var))) 186 | (if (and (not (cdr defs)) (tokenp token :operator :in)) 187 | (for-in label var (as :name (caar defs))) 188 | (regular-for label var)))) 189 | (t (let ((init (expression t t))) 190 | (if (tokenp token :operator :in) 191 | (for-in label nil init) 192 | (regular-for label init)))))) 193 | 194 | (def function* (statement) 195 | (with-defs 196 | (def name (and (token-type-p token :name) 197 | (prog1 (token-value token) (next)))) 198 | (when (and statement (not name)) (unexpected token)) 199 | (expect #\() 200 | (def argnames (loop :for first := t :then nil 201 | :until (tokenp token :punc #\)) 202 | :unless first :do (expect #\,) 203 | :unless (token-type-p token :name) :do (unexpected token) 204 | :collect (prog1 (token-value token) (next)))) 205 | (next) 206 | (expect #\{) 207 | (def body (let ((*in-function* t) (*label-scope* ())) 208 | (loop :until (tokenp token :punc #\}) :collect (statement)))) 209 | (next) 210 | (as (if statement :defun :function) name argnames body))) 211 | 212 | (def if* () 213 | (let ((condition (parenthesised)) 214 | (body (statement)) 215 | else) 216 | (when (tokenp token :keyword :else) 217 | (next) 218 | (setf else (statement))) 219 | (as :if condition body else))) 220 | 221 | (def ensure-block () 222 | (expect #\{) 223 | (block*)) 224 | 225 | (def try* () 226 | (let ((body (ensure-block)) catch finally) 227 | (when (tokenp token :keyword :catch) 228 | (next) (expect #\() 229 | (unless (token-type-p token :name) (error* "Name expected.")) 230 | (let ((name (token-value token))) 231 | (next) (expect #\)) 232 | (setf catch (cons name (ensure-block))))) 233 | (when (tokenp token :keyword :finally) 234 | (next) 235 | (setf finally (ensure-block))) 236 | (as :try body catch finally))) 237 | 238 | (def vardefs (no-in) 239 | (unless (token-type-p token :name) (unexpected token)) 240 | (let ((name (token-value token)) val) 241 | (next) 242 | (when (tokenp token :operator :=) 243 | (next) (setf val (expression nil no-in))) 244 | (if (tokenp token :punc #\,) 245 | (progn (next) (cons (cons name val) (vardefs no-in))) 246 | (list (cons name val))))) 247 | 248 | (def var* (&optional no-in) 249 | (as :var (vardefs no-in))) 250 | 251 | (def new* () 252 | (let ((newexp (expr-atom nil))) 253 | (let ((args nil)) 254 | (when (tokenp token :punc #\() 255 | (next) (setf args (expr-list #\)))) 256 | (subscripts (as :new newexp args) t)))) 257 | 258 | (def expr-atom (allow-calls) 259 | (cond ((tokenp token :operator :new) (next) (new*)) 260 | ((token-type-p token :punc) 261 | (case (token-value token) 262 | (#\( (next) (subscripts (prog1 (expression) (expect #\))) allow-calls)) 263 | (#\[ (next) (subscripts (array*) allow-calls)) 264 | (#\{ (next) (subscripts (object*) allow-calls)) 265 | (t (unexpected token)))) 266 | ((tokenp token :keyword :function) 267 | (next) 268 | (subscripts (function* nil) allow-calls)) 269 | ((member (token-type token) '(:atom :num :string :regexp :name)) 270 | (let ((atom (if (eq (token-type token) :regexp) 271 | (as :regexp (car (token-value token)) (cdr (token-value token))) 272 | (as (token-type token) (token-value token))))) 273 | (subscripts (prog1 atom (next)) allow-calls))) 274 | (t (unexpected token)))) 275 | 276 | (def expr-list (closing &optional allow-trailing-comma allow-empty) 277 | (let ((elts ())) 278 | (loop :for first := t :then nil :until (tokenp token :punc closing) :do 279 | (unless first (expect #\,)) 280 | (when (and allow-trailing-comma (tokenp token :punc closing)) (return)) 281 | (push (unless (and allow-empty (tokenp token :punc #\,)) (expression nil)) elts)) 282 | (next) 283 | (nreverse elts))) 284 | 285 | (def array* () 286 | (as :array (expr-list #\] t t))) 287 | 288 | (def object* () 289 | (as :object (loop :for first := t :then nil 290 | :until (tokenp token :punc #\}) 291 | :unless first :do (expect #\,) 292 | :until (tokenp token :punc #\}) :collect 293 | (let ((name (as-property-name))) 294 | (cond ((tokenp token :punc #\:) 295 | (next) (cons name (expression nil))) 296 | ((and (eql *ecma-version* 5) (or (equal name "get") (equal name "set"))) 297 | (let ((name1 (as-property-name)) 298 | (body (progn (unless (tokenp token :punc #\() (unexpected token)) 299 | (function* nil)))) 300 | (list* name1 (if (equal name "get") :get :set) body))) 301 | (t (unexpected token)))) 302 | :finally (next)))) 303 | 304 | (def as-property-name () 305 | (if (member (token-type token) '(:num :string)) 306 | (prog1 (token-value token) (next)) 307 | (as-name))) 308 | 309 | (def as-name () 310 | (case (token-type token) 311 | (:name (prog1 (token-value token) (next))) 312 | ((:operator :keyword :atom) (prog1 (string-downcase (symbol-name (token-value token))) (next))) 313 | (t (unexpected token)))) 314 | 315 | (def subscripts (expr allow-calls) 316 | (cond ((tokenp token :punc #\.) 317 | (next) 318 | (subscripts (as :dot expr (as-name)) allow-calls)) 319 | ((tokenp token :punc #\[) 320 | (next) 321 | (let ((sub (expression))) 322 | (expect #\]) 323 | (subscripts (as :sub expr sub) allow-calls))) 324 | ((and (tokenp token :punc #\() allow-calls) 325 | (next) 326 | (let ((args (expr-list #\)))) 327 | (subscripts (as :call expr args) t))) 328 | (t expr))) 329 | 330 | (def maybe-unary (allow-calls) 331 | (if (and (token-type-p token :operator) (member (token-value token) *unary-prefix*)) 332 | (as :unary-prefix (prog1 (token-value token) (next)) (maybe-unary allow-calls)) 333 | (let ((val (expr-atom allow-calls))) 334 | (loop :while (and (token-type-p token :operator) 335 | (member (token-value token) *unary-postfix*) 336 | (not (token-newline-before token))) :do 337 | (setf val (as :unary-postfix (token-value token) val)) 338 | (next)) 339 | val))) 340 | 341 | (def expr-op (left min-prec no-in) 342 | (let* ((op (and (token-type-p token :operator) (or (not no-in) (not (eq (token-value token) :in))) 343 | (token-value token))) 344 | (prec (and op (gethash op *precedence*)))) 345 | (if (and prec (> prec min-prec)) 346 | (let ((right (progn (next) (expr-op (maybe-unary t) prec no-in)))) 347 | (expr-op (as :binary op left right) min-prec no-in)) 348 | left))) 349 | 350 | (def expr-ops (no-in) 351 | (expr-op (maybe-unary t) 0 no-in)) 352 | 353 | (def maybe-conditional (no-in) 354 | (let ((expr (expr-ops no-in))) 355 | (if (tokenp token :operator :?) 356 | (let ((yes (progn (next) (expression nil)))) 357 | (expect #\:) 358 | (as :conditional expr yes (expression nil no-in))) 359 | expr))) 360 | 361 | (def maybe-assign (no-in) 362 | (let ((left (maybe-conditional no-in))) 363 | (if (and (token-type-p token :operator) (gethash (token-value token) *assignment*)) 364 | (as :assign (gethash (token-value token) *assignment*) left (progn (next) (maybe-assign no-in))) 365 | left))) 366 | 367 | (def expression (&optional (commas t) (no-in nil)) 368 | (let ((expr (maybe-assign no-in))) 369 | (if (and commas (tokenp token :punc #\,)) 370 | (as :seq expr (progn (next) (expression))) 371 | expr))) 372 | 373 | (as :toplevel (loop :until (token-type-p token :eof) 374 | :collect (statement)))) 375 | 376 | (defun parse-js-string (&rest args) 377 | (apply 'parse-js args)) 378 | --------------------------------------------------------------------------------