├── LICENSE ├── README.md ├── _c_ast.cfg ├── c-parse.lisp ├── cl-c-parse.asd ├── filesystem.lisp ├── lex-txt.lisp ├── lex.lisp ├── lex.txt ├── package.lisp ├── preprocessor.lisp ├── rip ├── ANSI C grammar (Lex).html ├── ANSI C grammar (Yacc).html ├── Frequently Asked Questions on the ANSI C grammar.html └── README ├── test.lisp ├── test ├── GNU_compiler_builtins.h ├── hash.c └── lines.txt ├── yacc.lisp └── yacc.txt /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Giorgio Masching III 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## Exact C11 Parser in Common Lisp (No Preprocessor) (Also Includes a YACC and Lex clone) 2 | 3 | ## Usage 4 | 5 | 1. Set up the system. 6 | This generates esrap-liquid packrat parser rules for the lexer, ripped from the [exact lex specification](http://www.quut.com/c/ANSI-C-grammar-y.html), 7 | and yacc rules ripped from the [exact yacc speicification](http://www.quut.com/c/ANSI-C-grammar-l-2011.html). See yacc.txt and lex.txt. 8 | 9 | ``` 10 | (c-parse::start-up) 11 | => ...WARNING: 2 Shift/Reduce, 0 Reduce/Reduce conflicts 12 | ``` 13 | The warning is a result of the C grammar having an ambiguity with regards to YACC, but this is expected. 14 | 15 | 2. Parse your C code 16 | ``` 17 | (c-parse::c-parse c-parse::*c-test-file*) 18 | => [very long CST tree], ("Hash" "Node" "Entry" "ReturnCode") 19 | ``` 20 | 21 | ``` 22 | (c-parse::c-parse 23 | "int main() 24 | { 25 | // printf() displays the string inside quotation 26 | printf(\"Hello, World!\"); 27 | return 0; 28 | }") 29 | => ((:EXTERNAL_DECLARATION 0 30 | (:FUNCTION_DEFINITION 1 31 | (:DECLARATION_SPECIFIERS 3 (:TYPE_SPECIFIER 3 "int"<0,3>)) 32 | (:DECLARATOR 1 33 | (:DIRECT_DECLARATOR 12 (:DIRECT_DECLARATOR 0 "main"<4,8>) "("<8,9> 34 | ")"<9,10>)) 35 | (:COMPOUND_STATEMENT 1 "{"<11,12> 36 | (:BLOCK_ITEM_LIST 1 37 | (:BLOCK_ITEM_LIST 0 38 | (:BLOCK_ITEM 1 39 | (:STATEMENT 2 40 | (:EXPRESSION_STATEMENT 1 41 | (:EXPRESSION 0 42 | (:ASSIGNMENT_EXPRESSION 0 43 | (:CONDITIONAL_EXPRESSION 0 44 | (:LOGICAL_OR_EXPRESSION 0 45 | (:LOGICAL_AND_EXPRESSION 0 46 | (:INCLUSIVE_OR_EXPRESSION 0 47 | (:EXCLUSIVE_OR_EXPRESSION 0 48 | (:AND_EXPRESSION 0 49 | (:EQUALITY_EXPRESSION 0 50 | (:RELATIONAL_EXPRESSION 0 51 | (:SHIFT_EXPRESSION 0 52 | (:ADDITIVE_EXPRESSION 0 53 | (:MULTIPLICATIVE_EXPRESSION 0 54 | (:CAST_EXPRESSION 0 55 | (:UNARY_EXPRESSION 0 56 | (:POSTFIX_EXPRESSION 3 57 | (:POSTFIX_EXPRESSION 0 58 | (:PRIMARY_EXPRESSION 0 "printf"<68,74>)) 59 | "("<74,75> 60 | (:ARGUMENT_EXPRESSION_LIST 0 61 | (:ASSIGNMENT_EXPRESSION 0 62 | (:CONDITIONAL_EXPRESSION 0 63 | (:LOGICAL_OR_EXPRESSION 0 64 | (:LOGICAL_AND_EXPRESSION 0 65 | (:INCLUSIVE_OR_EXPRESSION 0 66 | (:EXCLUSIVE_OR_EXPRESSION 0 67 | (:AND_EXPRESSION 0 68 | (:EQUALITY_EXPRESSION 0 69 | (:RELATIONAL_EXPRESSION 0 70 | (:SHIFT_EXPRESSION 0 71 | (:ADDITIVE_EXPRESSION 0 72 | (:MULTIPLICATIVE_EXPRESSION 0 73 | (:CAST_EXPRESSION 0 74 | (:UNARY_EXPRESSION 0 75 | (:POSTFIX_EXPRESSION 0 76 | (:PRIMARY_EXPRESSION 2 77 | (:STRING 0 78 | "\"Hello, World!\""<75,90>)))))))))))))))))) 79 | ")"<90,91>)))))))))))))))) 80 | ";"<91,92>)))) 81 | (:BLOCK_ITEM 1 82 | (:STATEMENT 5 83 | (:JUMP_STATEMENT 4 "return"<96,102> 84 | (:EXPRESSION 0 85 | (:ASSIGNMENT_EXPRESSION 0 86 | (:CONDITIONAL_EXPRESSION 0 87 | (:LOGICAL_OR_EXPRESSION 0 88 | (:LOGICAL_AND_EXPRESSION 0 89 | (:INCLUSIVE_OR_EXPRESSION 0 90 | (:EXCLUSIVE_OR_EXPRESSION 0 91 | (:AND_EXPRESSION 0 92 | (:EQUALITY_EXPRESSION 0 93 | (:RELATIONAL_EXPRESSION 0 94 | (:SHIFT_EXPRESSION 0 95 | (:ADDITIVE_EXPRESSION 0 96 | (:MULTIPLICATIVE_EXPRESSION 0 97 | (:CAST_EXPRESSION 0 98 | (:UNARY_EXPRESSION 0 99 | (:POSTFIX_EXPRESSION 0 100 | (:PRIMARY_EXPRESSION 1 101 | (:CONSTANT 0 "0"<103,104>)))))))))))))))))) 102 | ";"<104,105>)))) 103 | "}"<106,107>)))), NIL 104 | ``` 105 | 106 | 3. Print your CST 107 | 108 | ``` 109 | (c-parse::print-csts *) 110 | => 111 | int 112 | main ( ) 113 | { 114 | 115 | 116 | printf ( "Hello, World!" ) ; 117 | return 0 ; } 118 | 119 | ``` 120 | 121 | ## Other Features 122 | 123 | This project was part of a larger effor to try and port emacs from C to another language. 124 | 125 | So it has features for: 126 | - Caching the results of the parsing in a "copy" of the filesystem 127 | - Checking the size of the generated files in a directory, 128 | - Parsing the C AST specification format for pycparser, and creating structure definitions 129 | 130 | However these are unpolished. 131 | -------------------------------------------------------------------------------- /_c_ast.cfg: -------------------------------------------------------------------------------- 1 | #----------------------------------------------------------------- 2 | # pycparser: _c_ast.cfg 3 | # 4 | # Defines the AST Node classes used in pycparser. 5 | # 6 | # Each entry is a Node sub-class name, listing the attributes 7 | # and child nodes of the class: 8 | # * - a child node 9 | # ** - a sequence of child nodes 10 | # - an attribute 11 | # 12 | # Eli Bendersky [https://eli.thegreenplace.net/] 13 | # License: BSD 14 | #----------------------------------------------------------------- 15 | 16 | # ArrayDecl is a nested declaration of an array with the given type. 17 | # dim: the dimension (for example, constant 42) 18 | # dim_quals: list of dimension qualifiers, to support C99's allowing 'const' 19 | # and 'static' within the array dimension in function declarations. 20 | ArrayDecl: [type*, dim*, dim_quals] 21 | 22 | ArrayRef: [name*, subscript*] 23 | 24 | # op: =, +=, /= etc. 25 | # 26 | Assignment: [op, lvalue*, rvalue*] 27 | 28 | BinaryOp: [op, left*, right*] 29 | 30 | Break: [] 31 | 32 | Case: [expr*, stmts**] 33 | 34 | Cast: [to_type*, expr*] 35 | 36 | # Compound statement in C99 is a list of block items (declarations or 37 | # statements). 38 | # 39 | Compound: [block_items**] 40 | 41 | # Compound literal (anonymous aggregate) for C99. 42 | # (type-name) {initializer_list} 43 | # type: the typename 44 | # init: InitList for the initializer list 45 | # 46 | CompoundLiteral: [type*, init*] 47 | 48 | # type: int, char, float, etc. see CLexer for constant token types 49 | # 50 | Constant: [type, value] 51 | 52 | Continue: [] 53 | 54 | # name: the variable being declared 55 | # quals: list of qualifiers (const, volatile) 56 | # funcspec: list function specifiers (i.e. inline in C99) 57 | # storage: list of storage specifiers (extern, register, etc.) 58 | # type: declaration type (probably nested with all the modifiers) 59 | # init: initialization value, or None 60 | # bitsize: bit field size, or None 61 | # 62 | Decl: [name, quals, storage, funcspec, type*, init*, bitsize*] 63 | 64 | DeclList: [decls**] 65 | 66 | Default: [stmts**] 67 | 68 | DoWhile: [cond*, stmt*] 69 | 70 | # Represents the ellipsis (...) parameter in a function 71 | # declaration 72 | # 73 | EllipsisParam: [] 74 | 75 | # An empty statement (a semicolon ';' on its own) 76 | # 77 | EmptyStatement: [] 78 | 79 | # Enumeration type specifier 80 | # name: an optional ID 81 | # values: an EnumeratorList 82 | # 83 | Enum: [name, values*] 84 | 85 | # A name/value pair for enumeration values 86 | # 87 | Enumerator: [name, value*] 88 | 89 | # A list of enumerators 90 | # 91 | EnumeratorList: [enumerators**] 92 | 93 | # A list of expressions separated by the comma operator. 94 | # 95 | ExprList: [exprs**] 96 | 97 | # This is the top of the AST, representing a single C file (a 98 | # translation unit in K&R jargon). It contains a list of 99 | # "external-declaration"s, which is either declarations (Decl), 100 | # Typedef or function definitions (FuncDef). 101 | # 102 | FileAST: [ext**] 103 | 104 | # for (init; cond; next) stmt 105 | # 106 | For: [init*, cond*, next*, stmt*] 107 | 108 | # name: Id 109 | # args: ExprList 110 | # 111 | FuncCall: [name*, args*] 112 | 113 | # type (args) 114 | # 115 | FuncDecl: [args*, type*] 116 | 117 | # Function definition: a declarator for the function name and 118 | # a body, which is a compound statement. 119 | # There's an optional list of parameter declarations for old 120 | # K&R-style definitions 121 | # 122 | FuncDef: [decl*, param_decls**, body*] 123 | 124 | Goto: [name] 125 | 126 | ID: [name] 127 | 128 | # Holder for types that are a simple identifier (e.g. the built 129 | # ins void, char etc. and typedef-defined types) 130 | # 131 | IdentifierType: [names] 132 | 133 | If: [cond*, iftrue*, iffalse*] 134 | 135 | # An initialization list used for compound literals. 136 | # 137 | InitList: [exprs**] 138 | 139 | Label: [name, stmt*] 140 | 141 | # A named initializer for C99. 142 | # The name of a NamedInitializer is a sequence of Nodes, because 143 | # names can be hierarchical and contain constant expressions. 144 | # 145 | NamedInitializer: [name**, expr*] 146 | 147 | # a list of comma separated function parameter declarations 148 | # 149 | ParamList: [params**] 150 | 151 | PtrDecl: [quals, type*] 152 | 153 | Return: [expr*] 154 | 155 | # name: struct tag name 156 | # decls: declaration of members 157 | # 158 | Struct: [name, decls**] 159 | 160 | # type: . or -> 161 | # name.field or name->field 162 | # 163 | StructRef: [name*, type, field*] 164 | 165 | Switch: [cond*, stmt*] 166 | 167 | # cond ? iftrue : iffalse 168 | # 169 | TernaryOp: [cond*, iftrue*, iffalse*] 170 | 171 | # A base type declaration 172 | # 173 | TypeDecl: [declname, quals, type*] 174 | 175 | # A typedef declaration. 176 | # Very similar to Decl, but without some attributes 177 | # 178 | Typedef: [name, quals, storage, type*] 179 | 180 | Typename: [name, quals, type*] 181 | 182 | UnaryOp: [op, expr*] 183 | 184 | # name: union tag name 185 | # decls: declaration of members 186 | # 187 | Union: [name, decls**] 188 | 189 | While: [cond*, stmt*] 190 | 191 | Pragma: [string] 192 | -------------------------------------------------------------------------------- /c-parse.lisp: -------------------------------------------------------------------------------- 1 | (in-package :c-parse) 2 | 3 | (defparameter *path* (asdf:system-source-directory :cl-c-parse)) 4 | ;;generated via grepping .h and .c files for "#include <" 5 | (defun whitespace-string (str) 6 | "return t if its all spaces or empty" 7 | (dotimes (i (length str)) 8 | (unless (char= #\Space (aref str i)) 9 | (return-from whitespace-string nil))) 10 | t) 11 | (defun file-lines-no-whitespace-lines (string) 12 | (remove-if #'whitespace-string 13 | (split-sequence:split-sequence #\Newline string))) 14 | (defun print-list (&optional (data *lex-txt2*)) 15 | (dolist (item data) 16 | (print item))) 17 | (defun princ-list (&optional (data *lex-txt2*)) 18 | (dolist (item data) 19 | (terpri) 20 | (princ item))) 21 | 22 | ;;both the lex and yacc file are separated into 3 sections by two "%%" 23 | ;;for use with *lex-txt2* and *yacc-txt2* 24 | (defun %%-positions (data) 25 | (let ((first-end (position "%%" data :test 'string=))) 26 | (values first-end 27 | (position "%%" data :test 'string= :start (+ 1 first-end))))) 28 | 29 | (define-esrap-env c-parse) 30 | (define-c-parse-rule lex-yacc-token-char () 31 | (|| #\_ 32 | (character-ranges 33 | (#\a #\z) 34 | (#\A #\Z)))) 35 | (define-c-parse-rule lex-yacc-token () 36 | (postimes lex-yacc-token-char)) 37 | 38 | (defun stringify (seq) 39 | "coerce sequence into a string" 40 | (coerce seq 'string)) 41 | (define-c-parse-rule lex-token-string () 42 | ;;happens to be same for yacc. FIXME:: proper names for things? 43 | (stringify (v lex-yacc-token))) 44 | 45 | (defmacro parse-with-garbage (rule text &rest rest &key &allow-other-keys) 46 | `(c-parse-parse ,rule ,text :junk-allowed t ,@rest)) 47 | 48 | (defun stringy (tree) 49 | ;;turn a tree of nil's and characters produced by esrap-liquid into a 50 | ;;string 51 | (with-output-to-string (stream) 52 | (labels ((rec (node) 53 | (when node 54 | (if (atom node) 55 | (princ node stream) 56 | (progn (rec (car node)) 57 | (rec (cdr node))))))) 58 | (rec tree)))) 59 | 60 | (defun concatenate-string (&rest rest) 61 | (%concatenate-string rest)) 62 | (defun %concatenate-string (rest) 63 | (apply 'concatenate 'string rest)) 64 | 65 | ;;;yacc and lex comments are the same? 66 | (define-c-parse-rule lex-yacc-multiline-comment () 67 | (progn-v 68 | "/*" 69 | lex-comment-end)) 70 | (define-c-parse-rule lex-comment-end-token () 71 | (progn (v #\*) 72 | (v #\/))) 73 | (define-c-parse-rule lex-comment-end () 74 | (prog1 (postimes 75 | (progn (! lex-comment-end-token) 76 | (v character)) 77 | ) 78 | (v lex-comment-end-token)) 79 | nil 80 | ) 81 | 82 | ;;;;for default testing purposes 83 | (defparameter *emacs-src-root-path* "/home/imac/install/src/emacs-mirror/emacs-master/") 84 | (defun emacsify-path (&optional (path "src/lisp.h")) 85 | (merge-pathnames path *emacs-src-root-path*)) 86 | 87 | (defparameter *testpath* 88 | #+nil 89 | (emacsify-path 90 | (merge-pathnames 91 | ;"lisp.h" 92 | "syntax.h" 93 | ;"keymap.h" 94 | "src/")) 95 | #+nil 96 | "/home/imac/install/src/pycparser-master/examples/c_files/funky.c" 97 | #+nil 98 | "/home/imac/install/src/pycparser-master/examples/c_files/hash.c" 99 | (merge-pathnames "test/hash.c" *path*)) 100 | 101 | ;;FIXME:: where to put test files? 102 | (defparameter *c-test-file* 103 | (alexandria:read-file-into-string 104 | *testpath*)) 105 | 106 | (defmacro while (condition &body body) 107 | `(do () ((not,condition)) 108 | ,@body)) 109 | 110 | (defun symbol= (sym1 sym2) 111 | (eq sym1 sym2)) 112 | 113 | (defgeneric equalp? (a b)) 114 | (defmethod equalp? ((a number) (b number)) 115 | (= a b)) 116 | (defmethod equalp? ((a cons) (b cons)) 117 | (and (equalp? (car a) (car b)) 118 | (equalp? (cdr a) (cdr b)))) 119 | (defmethod equalp? ((a t) (b t)) 120 | nil) 121 | (defmethod equalp? ((a character) (b character)) 122 | (char= a b)) 123 | (defmethod equalp? ((a string) (b string)) 124 | (string= a b)) 125 | (defmethod equalp? ((a symbol) (b symbol)) 126 | (symbol= a b)) 127 | 128 | (defparameter *include-directories* 129 | (mapcar 'emacsify-path 130 | '("src/" 131 | "lib/"))) 132 | 133 | (defparameter *c-file-type-strings* 134 | '("c" "h")) 135 | ;;;FIXME::put in the filesystem file 136 | (defun c-filetype-p (path) 137 | (let ((type (pathname-type path))) 138 | (find type *c-file-type-strings* :test 'string=))) 139 | 140 | (defun map-c-files-in-directory (&key (fun 'print) 141 | (path (first *include-directories*))) 142 | (let* ((files (uiop:directory-files path)) 143 | (only-c-files (remove-if-not 'c-filetype-p files))) 144 | (mapc fun only-c-files))) 145 | 146 | (defun get-c-files (&optional (path *emacs-src-root-path*)) 147 | (let ((acc nil)) 148 | (uiop:collect-sub*directories 149 | path 150 | (constantly t) 151 | (constantly t) 152 | (lambda (subdir) 153 | (map-c-files-in-directory :path subdir 154 | :fun 155 | (lambda (file) 156 | (push file acc))))) 157 | (return-from get-c-files (nreverse acc)))) 158 | 159 | (defun total-emacs-c-bytes (&optional (path *emacs-src-root-path*)) 160 | (total-file-bytes (get-c-files path))) 161 | 162 | (defun total-file-bytes (files-list) 163 | (reduce '+ 164 | (mapcar (lambda (path) 165 | (osicat-posix:stat-size 166 | (osicat-posix:stat path))) 167 | files-list))) 168 | 169 | (defun find-just-before (item list fun &rest rest &key &allow-other-keys) 170 | (let ((position (apply 'position-if 171 | (lambda (x) 172 | (funcall fun x item)) 173 | list rest))) 174 | (nthcdr position list))) 175 | 176 | (defun round-off (x size) 177 | (* size (round (/ x size)))) 178 | 179 | (defun get-bytesize (bytes) 180 | (let* ((exponent (1- (log bytes 1024))) 181 | (data (first (find-just-before 182 | exponent 183 | '( 184 | (0 "Bytes") 185 | (1 "Kilobytes") 186 | (2 "Megabytes") 187 | (3 "Gigabytes") 188 | (4 "Terabytes") 189 | ;;FIXME::add more sizes 190 | ) 191 | '> 192 | :key 'first)))) 193 | (destructuring-bind (exponent name) data 194 | (list (round-off (utility:floatify (/ bytes (expt 1024 exponent))) 195 | 0.25) 196 | name)))) 197 | 198 | (defun how-big-is-emacs-c-code? (&optional (path *emacs-src-root-path*)) 199 | (destructuring-bind (num name) (get-bytesize (total-emacs-c-bytes path)) 200 | (format t "~%~a ~a~%" num name)) 201 | (values)) 202 | 203 | (defun emacs-c-source () 204 | (apply 'nconc 205 | (mapcar 'get-c-files *include-directories*))) 206 | 207 | ;#+nil ;;non-parallel way 208 | (defun cached-emacs-c-files () 209 | (mapcar 'ensure-cached-token-intervals 210 | (emacs-c-source))) 211 | ;;FIXME:: lazy load lparallel kernel with deflazy? 212 | #+nil 213 | (setf lparallel:*kernel* (lparallel:make-kernel 4)) 214 | #+nil 215 | (defun submit-emacs-jobs () 216 | (lparallel:pmapc 'ensure-cached-token-intervals 217 | (emacs-c-source))) 218 | -------------------------------------------------------------------------------- /cl-c-parse.asd: -------------------------------------------------------------------------------- 1 | (asdf:defsystem #:cl-c-parse 2 | :author "terminal625" 3 | :license "MIT" 4 | :description "processed c parser" 5 | :depends-on (#:alexandria 6 | #:split-sequence 7 | #:uiop 8 | #:esrap-liquid 9 | #:utility 10 | #:yacc 11 | #:uncommon-lisp 12 | #:trivia 13 | #:fiveam 14 | #:deflazy 15 | #:osicat ;;for seeing size of files 16 | ;;#:lparallel ;;for using multiple cores to lex 17 | ) 18 | :serial t 19 | :components 20 | ((:file "package") 21 | (:file "c-parse") 22 | (:file "lex") 23 | (:file "lex-txt") 24 | (:file "yacc") 25 | (:file "filesystem") 26 | (:file "preprocessor") 27 | ;;(:file "test") 28 | )) 29 | -------------------------------------------------------------------------------- /filesystem.lisp: -------------------------------------------------------------------------------- 1 | (in-package :c-parse) 2 | 3 | ;;;;Make a subdirectory that mimics the real directories 4 | 5 | (defparameter *cache* (merge-pathnames "shadowroot/" *path*)) 6 | (defun re-root-real-path (path &optional (base *cache*)) 7 | "change the root of path to base, making sure that therer can be no outside reference. 8 | only works if path actually exists." 9 | (let ((truename (uiop:truename* path))) 10 | (unless truename 11 | (error "does not exist ~s" path)) 12 | (unless (uiop:absolute-pathname-p path) 13 | (error "not absolute:~s" path)) 14 | 15 | ;;FIXME::hack? 16 | (let ((base-directory (pathname-directory base)) 17 | (path-directory (pathname-directory truename))) 18 | (make-pathname :directory 19 | (append base-directory (rest path-directory)) 20 | :name (pathname-name truename) 21 | :type (pathname-type truename))))) 22 | 23 | (defun reroot (path &key (suffix "") 24 | (prefix "") 25 | (create nil)) 26 | (let ((reroot (re-root-real-path path))) 27 | (ensure-directories-exist reroot) 28 | ;;if its a file, touch it 29 | (let ((new (add-file-suffix suffix (add-file-prefix prefix reroot)))) 30 | (when create 31 | (unless (uiop:directory-pathname-p new) 32 | (touch-file new))) 33 | new))) 34 | 35 | (defparameter *touch-test-path* (merge-pathnames "touch.txt" *path*)) 36 | (defun touch-file (&optional (path *touch-test-path*)) 37 | (with-open-file (stream path :if-does-not-exist :create))) 38 | 39 | ;;be able to make a derived filename 40 | #+nil 41 | (defun pathname-name-and-type (&optional (path *touch-test-path*)) 42 | (let ((name (pathname-name path)) 43 | (type (pathname-type path))) 44 | (if (or name type) 45 | (concatenate-string 46 | name 47 | (if type 48 | "." 49 | nil) 50 | type)))) 51 | (defun get-directory (&optional (path *testpath*)) 52 | (make-pathname :directory (pathname-directory path))) 53 | (defun add-file-extension (extension-fun &optional (path *testpath*)) 54 | (let ((dir (get-directory path))) 55 | (merge-pathnames 56 | (make-pathname :name 57 | (funcall extension-fun (pathname-name path)) 58 | :type (pathname-type path)) 59 | dir))) 60 | 61 | ;;(ADD-FILE-SUFFIX "~") lisp.h -> ~lisp.h 62 | (defun add-file-suffix (suffix &optional (path *testpath*)) 63 | (add-file-extension (lambda (x) 64 | (concatenate-string x suffix)) 65 | path)) 66 | ;;(ADD-FILE-SUFFIX ".directive") lisp.h -> lisp.h.directive 67 | (defun add-file-prefix (prefix &optional (path *testpath*)) 68 | (add-file-extension (lambda (x) 69 | (concatenate-string prefix x)) 70 | path)) 71 | -------------------------------------------------------------------------------- /lex-txt.lisp: -------------------------------------------------------------------------------- 1 | (in-package :c-parse) 2 | ;;;;Process the lex.txt file 3 | (defparameter *lex-txt-path* 4 | (merge-pathnames "lex.txt" *path*)) 5 | (deflazy *lex-txt* () 6 | (alexandria:read-file-into-string *lex-txt-path*)) 7 | (deflazy *lex-txt2* (*lex-txt*) 8 | (file-lines-no-whitespace-lines 9 | *lex-txt*)) 10 | ;;https://docs.oracle.com/cd/E19504-01/802-5880/lex-6/index.html 11 | ;;The mandatory rules section opens with the delimiter %%. 12 | ;;If a routines section follows, another %% delimiter ends the rules section. 13 | ;;The %% delimiters must be entered at the beginning of a line, that is, without leading blanks. 14 | ;;If there is no second delimiter, the rules section is presumed to continue to the end of the program. 15 | 16 | ;;divide the lex.txt into terminals and patterns. 17 | ;;ignore the c code for check_type and comment, instead hand-coding those 18 | ;;http://dinosaur.compilertools.net/lex/index.html <- detailed explanation of lex file format 19 | (deflazy *lex-definitions-lines* ((lex *lex-txt2*)) 20 | (let (;;this is where c code starts and definitions 21 | (first-end (position "%{" lex :test 'string=)) 22 | ;;skip over the variables at the beginning for the lex program 23 | (start (position-if (lambda (str) 24 | (not (char= (aref str 0) 25 | #\%))) 26 | lex))) 27 | ;;terminals, called definitions 28 | (subseq lex start first-end))) 29 | (deflazy *lex-rules-lines* ((lex *lex-txt2*)) 30 | (multiple-value-bind (first-end second-end) (%%-positions lex) 31 | ;;patterns, called rules 32 | (subseq lex (+ 1 first-end) second-end))) 33 | ;;;; 34 | 35 | (define-c-parse-rule lex-line-def () 36 | (cap :def-name (v lex-token-string)) 37 | (v whitespace) 38 | ;;(cap :rule (v lex-rule-start)) 39 | (list 40 | (recap :def-name) 41 | (stringify (postimes character)))) 42 | (defun spec-lex-rule-rule (spec) 43 | (second spec)) 44 | (defun spec-lex-rule-name (spec) 45 | (first spec)) 46 | 47 | (define-c-parse-rule lex-line-rule () 48 | (prog1-v lex-rule-start 49 | whitespace)) 50 | 51 | ;;*defs* is a list of ("name" "rule") 52 | (defun split-lex-line-def (&optional (item "NS [a-zA-Z_]")) 53 | (destructuring-bind (name rule-string) (parse-with-garbage 'lex-line-def item) 54 | (list name (parse-with-garbage 'lex-rule-start rule-string)))) 55 | 56 | (defun split-lex-line-rule (&optional (string "asd[a-zA-Z_]fasd {return; /* */}")) 57 | (multiple-value-bind (form end) 58 | (parse-with-garbage 'lex-line-rule string) 59 | ;;;FIXME::assumes that } terminates the line, which for this file does 60 | (let ((last-bracket (position #\} string :from-end t))) 61 | (list form 62 | (subseq string (1+ end) 63 | last-bracket))))) 64 | 65 | ;;(string-a-prefix-b-p "a" "ab") -> T 66 | ;;(string-a-prefix-b-p "ac" "ab") -> 67 | (defun string-a-prefix-b-p (a b) 68 | "test whether string a is a prefix of b" 69 | (when (> (length a) 70 | (length b)) 71 | ;;(error "a is longer than b") 72 | (return-from string-a-prefix-b-p nil) 73 | ) 74 | (dotimes (index (length a)) 75 | (unless (char= (aref a index) 76 | (aref b index)) 77 | (return-from string-a-prefix-b-p nil))) 78 | t) 79 | 80 | (deflazy *processed-definitions* (*lex-definitions-lines*) 81 | (mapcar 'split-lex-line-def 82 | *lex-definitions-lines*)) 83 | (defun pipeline (&optional (def "hello [90]")) 84 | (compile-to-esrap-liquid (split-lex-line-def def))) 85 | (defun compile-to-esrap-liquid (item) 86 | (destructuring-bind (name rule) item 87 | (let ((form `(define-c-parse-rule ,(find-lex-symbol name) () 88 | ,(lex-rule-dump rule)))) 89 | form))) 90 | (defun load-processed-definitions () 91 | `(progn 92 | ,@(mapcar 93 | 'compile-to-esrap-liquid 94 | (getfnc '*processed-definitions*)))) 95 | (deflazy *processed-rules* (*lex-rules-lines*) 96 | (mapcar 'split-lex-line-rule 97 | *lex-rules-lines*)) 98 | (defparameter *syms* nil) 99 | (defun bar () 100 | (let* ((processed-rules (getfnc '*processed-rules*)) 101 | (iota (alexandria:iota (list-length processed-rules))) 102 | (syms (mapcar 'sym-name iota))) 103 | (setf *syms* syms) 104 | `(progn 105 | ,@(mapcar (lambda (name x) 106 | (let ((what-fun (parse-lex-def (second x)))) 107 | (utility:with-gensyms (parse-result) 108 | `(define-c-parse-rule ,name ,() 109 | (let ((,parse-result ,(lex-rule-dump (first x)))) 110 | (list 111 | ,parse-result 112 | ,what-fun 113 | ,(flet ((convert-to-token (x) 114 | (yacc-symbol x))) 115 | (case what-fun 116 | (:comment 117 | `(progn (v lex-comment-end) 118 | ,(convert-to-token nil))) 119 | (:check-type ;;:check-type 120 | `(cond 121 | #+nil 122 | (nil ;;FIXME::actually check for enums 123 | (quote ,(convert-to-token "ENUMERATION_CONSTANT"))) 124 | (t 125 | (quote 126 | ,(convert-to-token "IDENTIFIER")))) 127 | ;;FIXME::detect typedefs and enums 128 | ) 129 | (otherwise `(quote ,(convert-to-token what-fun))))))))))) 130 | syms 131 | processed-rules) 132 | (define-c-parse-rule lexer-foo () 133 | ;;why? it was taking around 13 to 20 seconds to compile 134 | ;;most-full-parse 135 | (v reimplemented-most-full-parse *syms*))))) 136 | 137 | (in-package :esrap-liquid) 138 | ;;change sort ->stable-sort nreverse 139 | (defmacro esrap-liquid::most-full-parse2 (clauses) 140 | (once-only (clauses) 141 | (with-gensyms (g!-result g!-the-length g!-successful-parses 142 | ;;g!-parse-errors 143 | b!-max-length 144 | b!-max-result 145 | b!-max-cap-stash 146 | b!-list-iterator) 147 | `(tracing-level 148 | (if-debug "MOST-FULL-PARSE") 149 | (multiple-value-bind (,g!-result ,g!-the-length) 150 | (let (;;,g!-parse-errors 151 | ,b!-max-result 152 | ,g!-successful-parses 153 | ,b!-max-cap-stash 154 | (,b!-max-length 0)) 155 | (dolist (,b!-list-iterator ,clauses) 156 | (the-position-boundary 157 | (print-iter-state) 158 | (with-saved-iter-state (the-iter) 159 | (with-fresh-cap-stash 160 | (handler-case (descend-with-rule ,b!-list-iterator) 161 | (internal-esrap-error (e) 162 | (declare (ignorable e)) 163 | (restore-iter-state) 164 | ;;(push e ,g!-parse-errors) 165 | ) 166 | (:no-error (res) 167 | (restore-iter-state) 168 | (when (> the-length ,b!-max-length) 169 | (setf ,b!-max-length the-length) 170 | (setf ,g!-successful-parses t) 171 | (setf ,b!-max-result res) 172 | (setf ,b!-max-cap-stash *cap-stash*)) 173 | #+nil 174 | (push (list res the-length *cap-stash*) 175 | ,g!-successful-parses))))))) 176 | (if ,g!-successful-parses 177 | (multiple-value-bind (res length stash) (values ,b!-max-result 178 | ,b!-max-length 179 | ,b!-max-cap-stash) 180 | ,(propagate-cap-stash-upwards '*cap-stash* 'stash nil) 181 | (fast-forward the-iter length) 182 | (values res length)) 183 | (progn (if-debug "|| before failing P ~a L ~a" the-position the-length) 184 | (fail-parse "MOST-FULL-PARSE failed.")))) 185 | (if-debug "MOST-FULL-PARSE aftermath ~a ~a" the-length ,g!-the-length) 186 | (incf the-length ,g!-the-length) 187 | ,g!-result))))) 188 | (in-package :c-parse) 189 | 190 | (define-c-parse-rule reimplemented-most-full-parse (syms) 191 | (esrap-liquid::most-full-parse2 syms)) 192 | 193 | (defun sym-name (x) 194 | (find-lex-symbol (format nil "LEX-GENERATED~a" x))) 195 | 196 | (defun eval-lexer () 197 | (print "loading defs:") 198 | (eval (load-processed-definitions)) 199 | (print "loading rules:") 200 | (eval (bar))) 201 | 202 | ;;;; 203 | (defun parse-lex-def (text) 204 | (parse-with-garbage 'ad-hoc-lex-read-file text)) 205 | 206 | ;;;FIXME:: fragile hack that picks out two irregular cases? 207 | ;;;or is this how to do it? 208 | (define-c-parse-rule ad-hoc-lex-read-file () 209 | (|| (progn (? whitespace) 210 | (v "comment();") 211 | :comment) 212 | (progn 213 | (? whitespace) 214 | (v "return check_type();") 215 | :check-type) 216 | lex-read-return)) 217 | 218 | (define-c-parse-rule lex-read-return () 219 | (? whitespace) 220 | (v "return") 221 | (? whitespace) 222 | (cap :thing (|| lex-read-char 223 | lex-read-token 224 | lex-token-string)) 225 | (? whitespace) 226 | (v #\;) 227 | (recap :thing)) 228 | 229 | (define-c-parse-rule lex-read-char () 230 | (progm #\' 231 | character 232 | #\')) 233 | (define-c-parse-rule lex-read-token () 234 | (progm #\( 235 | lex-token-string 236 | #\))) 237 | -------------------------------------------------------------------------------- /lex.lisp: -------------------------------------------------------------------------------- 1 | (in-package :c-parse) 2 | ;;;;implementation of the lex lexer 3 | ;;http://dinosaur.compilertools.net/lex/index.html 4 | ;;" \ [ ] ^ - ? . * + | ( ) $ / { } % < > ;;operators that need to be escaped 5 | ;;Another use of the quoting mechanism is to get a blank into an expression; 6 | ;;normally, as explained above, blanks or tabs end a rule. 7 | ;;Any blank character not contained within [] (see below) must be quoted. 8 | ;;Several normal C escapes with \ are recognized: \n is newline, \t is tab, and \b is backspace. 9 | ;;To enter \ itself, use \\. Since newline is illegal in an expression, \n must be used; 10 | ;;it is not required to escape tab and backspace. Every character but blank, tab, newline and the list above is always a text character. 11 | 12 | ;;\ - and ^ ;;special characters for [] 13 | 14 | ;;x the character "x" 15 | ;;"x" an "x", even if x is an operator. 16 | ;;\x an "x", even if x is an operator. 17 | ;;[xy] the character x or y. 18 | ;;[x-z] the characters x, y or z. 19 | ;;[^x] any character but x. 20 | ;;. any character but newline. 21 | ;;^x an x at the beginning of a line. ;;ignore 22 | ;;x an x when Lex is in start condition y. ;;ignore 23 | ;;x$ an x at the end of a line. ;;ignore 24 | ;;x? an optional x. 25 | ;;x* 0,1,2, ... instances of x. 26 | ;;x+ 1,2,3, ... instances of x. 27 | ;;x|y an x or a y. 28 | ;;(x) an x. 29 | ;;x/y an x but only if followed by y. ;;ignore 30 | ;;{xx} the translation of xx from the definitions section. 31 | ;;x{m,n} m through n occurrences of x 32 | 33 | ;;| repeats the lex rule to the next listed rule 34 | (utility:eval-always 35 | (defparameter *lex-special-chars* 36 | '((#\t #\tab) 37 | (#\n #\Newline) 38 | (#\b #\backspace) 39 | (#\v #\vt) 40 | (#\f #\formfeed) ;;FIXME - see below 41 | (#\r #\return) ;;FIXME -> what chars are allowed? 42 | ))) 43 | 44 | (defun escaped-char-to-char (char) 45 | ;;Several normal C escapes with \ are recognized: \n is newline, \t is tab, and \b is backspace. 46 | (utility:etouq 47 | `(case char 48 | ,@*lex-special-chars* 49 | (otherwise char)))) 50 | 51 | (define-c-parse-rule lex-number () 52 | (read-from-string (stringify 53 | (postimes 54 | (character-ranges 55 | (#\0 #\9)))))) 56 | 57 | (define-c-parse-rule lex-char-or-escaped-char () 58 | (|| lex-char 59 | (progn (v #\\) 60 | (let ((char (v character))) 61 | (escaped-char-to-char char))))) 62 | (utility:eval-always 63 | ;;FIXME:misnomer. not a regular expression 64 | (defparameter *lex-regex-operators* 65 | (coerce 66 | "\"\\[]^-?.*+|()$/{}%<>" 67 | 'list))) 68 | 69 | (flet ((escape (escaped-char char) 70 | (if escaped-char 71 | (format nil "\\~A" escaped-char) 72 | (string char)))) 73 | ;;;;different contexts have different escape seqences 74 | (defun char-to-escaped-char-string (char) 75 | ;;used in string rule 76 | (let ((escaped-char 77 | (utility:etouq 78 | `(case char 79 | ,@(mapcar 'reverse *lex-special-chars*) 80 | (otherwise nil))))) 81 | (escape escaped-char char))) 82 | (defun char-to-escaped-char (char) 83 | ;;used as expression 84 | (let ((escaped-char 85 | (utility:etouq 86 | `(case char 87 | ,@(mapcar 'reverse *lex-special-chars*) 88 | (,*lex-regex-operators* char) 89 | (otherwise nil))))) 90 | (escape escaped-char char))) 91 | (defun char-to-escaped-char-character-class (char) 92 | ;;used in character class 93 | (let ((escaped-char 94 | (utility::etouq 95 | `(case char 96 | (#\\ #\\) 97 | (#\] #\]) 98 | (#\? #\?) ;;FIXME::characters added here on a case by case basis? 99 | ,@(mapcar 'reverse *lex-special-chars*) 100 | (otherwise nil))))) 101 | (escape escaped-char char)))) 102 | 103 | (define-c-parse-rule lex-char () 104 | ;;" \ [ ] ^ - ? . * + | ( ) $ / { } % < > ;;operators that need to be escaped 105 | (! (utility:etouq `(|| ,@*lex-regex-operators*))) 106 | (v character)) 107 | 108 | (define-c-parse-rule lex-string () 109 | (progm #\" 110 | (stringify (utility:etouq 111 | `(times (|| lex-char-or-escaped-char 112 | (|| ,@(set-difference *lex-regex-operators* 113 | '(#\" #\\))))))) 114 | #\")) 115 | 116 | (progn 117 | (struct-to-clos:struct->class 118 | (defstruct lex-character-range 119 | start 120 | end)) 121 | (defun print-lex-character-range (stream object) 122 | (format stream "~a-~a" 123 | (char-to-escaped-char-character-class (lex-character-range-start object)) 124 | (char-to-escaped-char-character-class (lex-character-range-end object)))) 125 | (set-pprint-dispatch 'lex-character-range 'print-lex-character-range)) 126 | 127 | (define-c-parse-rule lex-character-range () 128 | ;;http://dinosaur.compilertools.net/lex/index.html 129 | ;;The - character indicates ranges. 130 | (cap :start (v lex-char-or-escaped-char)) 131 | (v #\-) 132 | (cap :end (v lex-char-or-escaped-char)) 133 | (make-lex-character-range 134 | :start (recap :start) 135 | :end (recap :end))) 136 | 137 | (defmacro with-write-parens ((stream) &body body) 138 | `(prog2 139 | (write-char #\( ,stream) 140 | (progn ,@body) 141 | (write-char #\) ,stream))) 142 | 143 | (progn 144 | (struct-to-clos:struct->class 145 | (defstruct lex-character-class 146 | negated-p 147 | (chars nil))) 148 | (defun print-lex-character-class (stream object) 149 | (;;with-write-parens (stream) 150 | progn 151 | (write-char #\[ stream) 152 | (when (lex-character-class-negated-p object) 153 | (write-char #\^ stream)) 154 | (dolist (item (lex-character-class-chars object)) 155 | (etypecase item 156 | (character 157 | (write-string (char-to-escaped-char-character-class item) 158 | stream)) 159 | (lex-character-range 160 | (print-lex-character-range stream item)))) 161 | (write-char #\] stream))) 162 | (set-pprint-dispatch 'lex-character-class 'print-lex-character-class)) 163 | (defun set-character-class-char (obj &rest data) 164 | (setf (lex-character-class-chars obj) data)) 165 | 166 | (define-c-parse-rule lex-rule-character-class () 167 | ;;http://dinosaur.compilertools.net/lex/index.html 168 | ;;In character classes, the ^ operator must appear as the first character after the left bracket; 169 | ;;it indicates that the resulting string is to be complemented with respect to the computer character set. Thus 170 | (v #\[) 171 | (cap :negation (? #\^)) 172 | (cap :chars 173 | ;;FIXME::what characters are allowed where? 174 | (utility:etouq 175 | `(times (|| lex-character-range 176 | lex-char-or-escaped-char 177 | ,@(set-difference *lex-regex-operators* 178 | '(#\])))))) 179 | (v #\]) 180 | (make-lex-character-class 181 | :negated-p (recap :negation) 182 | :chars (recap :chars))) 183 | (defparameter *print-raw* nil 184 | "toggle printing lex-sequence as a dot or a string. ") 185 | (progn 186 | (defparameter *lex-rule-repeat-infinity* :infinity 187 | "signify that the rule should repeat forever") 188 | (struct-to-clos:struct->class 189 | (defstruct lex-rule-repeat 190 | rule 191 | min 192 | (max *lex-rule-repeat-infinity*))) 193 | (defun print-lex-rule-repeat (stream object) 194 | (;;with-write-parens (stream) 195 | progn 196 | (write (lex-rule-repeat-rule object) :stream stream) 197 | (let ((min (lex-rule-repeat-min object)) 198 | (max (lex-rule-repeat-max object))) 199 | (flet ((single-char (x) 200 | (write-char x stream))) 201 | (cond ((and 202 | (not *print-raw*) 203 | (eql min 0) 204 | (eql max 1)) 205 | (single-char #\?)) 206 | ((and 207 | (not *print-raw*) 208 | (eql min 0) 209 | (eql max *lex-rule-repeat-infinity*)) 210 | (single-char #\*)) 211 | ((and 212 | (not *print-raw*) 213 | (eql min 1) 214 | (eql max *lex-rule-repeat-infinity*)) 215 | (single-char #\+)) 216 | (t 217 | (format stream "{~a,~a}" min max))))))) 218 | (set-pprint-dispatch 'lex-rule-repeat 'print-lex-rule-repeat)) 219 | 220 | (define-c-parse-rule lex-rule-? (rule) 221 | (v #\?) 222 | (make-lex-rule-repeat 223 | :rule rule 224 | :min 0 225 | :max 1)) 226 | (define-c-parse-rule lex-rule-* (rule) 227 | (v #\*) 228 | (make-lex-rule-repeat 229 | :rule rule 230 | :min 0 231 | :max *lex-rule-repeat-infinity*)) 232 | (define-c-parse-rule lex-rule-+ (rule) 233 | (v #\+) 234 | (make-lex-rule-repeat 235 | :rule rule 236 | :min 1 237 | :max *lex-rule-repeat-infinity*)) 238 | 239 | (progn 240 | (struct-to-clos:struct->class 241 | (defstruct lex-rule-reference 242 | string)) 243 | (defun print-lex-rule-reference (stream object) 244 | ;;FIXME::what characters can tokens consist of? 245 | (;;with-write-parens (stream) 246 | progn 247 | (format stream "{~a}" 248 | (lex-rule-reference-string object)))) 249 | (set-pprint-dispatch 'lex-rule-reference 'print-lex-rule-reference)) 250 | 251 | (define-c-parse-rule lex-rule-definition () 252 | (make-lex-rule-reference 253 | :string 254 | (progm #\{ 255 | lex-token-string 256 | #\}))) 257 | (define-c-parse-rule lex-rule-occurences (rule) 258 | (v #\{) 259 | (cap :min (v lex-number)) 260 | (v #\,) 261 | (cap :max (v lex-number)) 262 | (v #\}) 263 | (make-lex-rule-repeat 264 | :rule rule 265 | :min (recap :min) 266 | :max (recap :max))) 267 | 268 | (define-c-parse-rule white-char () 269 | (|| #\Newline #\Space #\tab)) 270 | (define-c-parse-rule whitespace () 271 | (postimes white-char)) 272 | (progn 273 | ;;FIXME::lex-rule, which handles sequences, is becoming dumping ground for 274 | ;;irregular lex syntax like strings and the dot ->. 275 | (struct-to-clos:struct->class 276 | (defstruct lex-rule 277 | data 278 | ;;dot 279 | (print-as-dot nil) 280 | ;;characters 281 | (with-parens nil) 282 | ;;strings and chars 283 | (string-print-as-char-p nil) 284 | string-data 285 | (string-p nil))) 286 | (defun print-lex-rule (stream object) 287 | ;;FIXME::what characters can tokens consist of? 288 | (flet ((print-stuff () 289 | (dolist (item (lex-rule-data object)) 290 | (format stream "~a" item)))) 291 | (cond (;;for the . operator 292 | (and (not *print-raw*) 293 | (lex-rule-print-as-dot object)) 294 | ;;FIXME::dots are converted into lex-rule sequences. 295 | ;;have separate special object for shortening? 296 | (write-char #\. stream)) 297 | (;; for strings and characters 298 | (and (not *print-raw*) 299 | (lex-rule-string-p object)) 300 | (let ((str (lex-rule-string-data object))) 301 | (cond ((and (= 1 (length str)) 302 | (lex-rule-string-print-as-char-p object)) 303 | (write-string (char-to-escaped-char (aref str 0)) 304 | stream)) 305 | (t 306 | (write-char #\" stream) 307 | (let ((str str)) 308 | (dotimes (index (length str)) 309 | (write-string (char-to-escaped-char-string (aref str index)) 310 | stream))) 311 | (write-char #\" stream))))) 312 | (;;if this was read with parentheses? 313 | t 314 | (if (lex-rule-with-parens object) 315 | (with-write-parens (stream) 316 | (print-stuff)) 317 | (print-stuff)))))) 318 | (set-pprint-dispatch 'lex-rule 'print-lex-rule)) 319 | (define-c-parse-rule lex-rule-parentheses () 320 | (let ((lex-rule-sequence 321 | (progm #\( 322 | lex-rule-sequence 323 | #\)))) 324 | (setf (lex-rule-with-parens lex-rule-sequence) t) 325 | lex-rule-sequence)) 326 | 327 | 328 | (define-c-parse-rule lex-rule-all-but-newline-rule () 329 | (v #\.) 330 | (make-lex-rule 331 | :print-as-dot t 332 | :data 333 | (list 334 | (match-one-char 335 | #\Newline 336 | (make-lex-character-class 337 | :negated-p t))))) 338 | 339 | (defun match-one-char (char &optional (character-class-rule 340 | (make-lex-character-class))) 341 | "create a sequence rule that matches one character" 342 | (set-character-class-char 343 | character-class-rule 344 | char) 345 | (make-lex-rule-repeat 346 | :rule character-class-rule 347 | :min 1 348 | :max 1)) 349 | ;;the string object covers both strings and individual characters 350 | (defun match-string (string &optional (print-as-char nil)) 351 | (make-lex-rule 352 | :string-data string 353 | :string-p t 354 | :string-print-as-char-p print-as-char 355 | :data 356 | (map 'list 357 | (lambda (char) 358 | (match-one-char char)) 359 | string))) 360 | (define-c-parse-rule lex-rule-string () 361 | (match-string (v lex-string))) 362 | (define-c-parse-rule lex-rule-char () 363 | (match-string (string (v lex-char-or-escaped-char)) t)) 364 | 365 | (progn 366 | (struct-to-clos:struct->class 367 | (defstruct lex-rule-or)) 368 | (defun print-lex-rule-or (stream object) 369 | (declare (ignorable object)) 370 | (format stream "|")) 371 | (set-pprint-dispatch 'lex-rule-or 'print-lex-rule-or)) 372 | (defparameter *bar-token* (make-lex-rule-or)) 373 | (define-c-parse-rule lex-rule-vertical-bar () 374 | (v #\|) 375 | *bar-token*) 376 | 377 | (define-c-parse-rule lex-atom (&optional (toplevel nil)) 378 | (when toplevel 379 | (! whitespace)) 380 | (let ((rule 381 | (|| 382 | lex-rule-char 383 | lex-rule-character-class 384 | lex-rule-string 385 | lex-rule-all-but-newline-rule 386 | lex-rule-parentheses 387 | lex-rule-definition))) 388 | ;;; 389 | (block out 390 | (loop 391 | (setf rule 392 | (|| (v lex-rule-? rule) 393 | (v lex-rule-* rule) 394 | (v lex-rule-+ rule) 395 | (v lex-rule-occurences rule) 396 | (return-from out rule))))))) 397 | 398 | (define-c-parse-rule lex-rule-sequence (&optional (toplevel nil)) 399 | (make-lex-rule 400 | :data 401 | (prog1 (list* (? (v lex-atom toplevel)) 402 | (times 403 | (|| 404 | lex-rule-vertical-bar 405 | (v lex-atom toplevel))))))) 406 | 407 | (define-c-parse-rule lex-rule-start () 408 | (v lex-rule-sequence t)) 409 | 410 | ;;character classes 411 | ;;strings <- can be replaced by a special lex-rule with all character-classes of length 1 412 | ;;numerical repetition 413 | ;;references 414 | ;;sequencing 415 | ;;options 416 | ;;all-but-newline <- not necessary? a character class? 417 | 418 | ;;"foo" -> ([f]{1,1}[o]{1,1}[o]{1,1}) 419 | ;;. -> ([^\n]{1,1}) 420 | 421 | ;;lex-rule-sequence sequencing -> concatenate + list-v? 422 | ;;lex-rule-or option -> || 423 | ;;lex-rule-repeat repeat -> times 424 | ;;lex-character-class -> [! with character] characters, || character-ranges 425 | ;;references -> references to other rules 426 | 427 | ;;;FIXME:: nasty hacks to dump esrap-liquid prettily 428 | (defparameter *v-wrap-necessary* t) 429 | (defmacro with-v-wrap-off (&body body) 430 | `(let ((*v-wrap-necessary* nil)) 431 | ,@body)) 432 | (defmacro with-v-wrap-on (&body body) 433 | `(let ((*v-wrap-necessary* t)) 434 | ,@body)) 435 | (defun lex-rule-dump-wrap (arg) 436 | (with-v-wrap-off 437 | (lex-rule-dump arg))) 438 | (defgeneric lex-rule-dump (node)) 439 | ;;sequencing 440 | (defun divide-by-token (list token) 441 | ;;(divide-by-token '(1 2 3 4 5 3234 234 3 4) 3) -> ((1 2) (4 5 3234 234) (4)) 442 | (let ((list-list ()) 443 | (current-list)) 444 | (flet ((save-current-list () 445 | (push (nreverse current-list) list-list))) 446 | (dolist (item list) 447 | (if (eql token item) 448 | (progn (save-current-list) 449 | (setf current-list nil)) 450 | (push item current-list))) 451 | (save-current-list)) 452 | (nreverse list-list))) 453 | (defmethod lex-rule-dump ((node lex-rule)) 454 | ;;deletable optimization, exits prematurely 455 | ;;#+nil 456 | (flet ((exit (n) 457 | (return-from lex-rule-dump n))) 458 | (let ((str (lex-rule-string-data node))) 459 | (when (lex-rule-string-p node) 460 | (let ((data 461 | (if (lex-rule-string-print-as-char-p node) 462 | (aref str 0) 463 | str))) 464 | (exit (if *v-wrap-necessary* 465 | `(v ,data) 466 | data)))))) 467 | 468 | ;;each lex rule's data is a list of sub atoms and bars denoting choice. 469 | ;;divide by token divides the list of sub atoms by the bars 470 | (let ((undumped (divide-by-token (lex-rule-data node) *bar-token*))) 471 | (flet ((do-it () 472 | (flet ((sub-or (list) 473 | 474 | (let ((items 475 | (mapcar 'lex-rule-dump list))) 476 | ;;optimization, deletable 477 | (when (and ;;(not *v-wrap-necessary*) 478 | (= 1 (length items))) 479 | (return-from sub-or (first items))) 480 | `(list-v ,@items)))) 481 | (let ((answer 482 | (mapcar #'sub-or undumped))) 483 | (case (length answer) 484 | (1 (first answer)) 485 | (otherwise `(|| ,@answer))))))) 486 | (case (length undumped) 487 | (1 (with-v-wrap-on (do-it))) 488 | (otherwise (with-v-wrap-off (do-it))))))) 489 | (defmethod lex-rule-dump ((node lex-rule-repeat)) 490 | (let ((min (lex-rule-repeat-min node)) 491 | (max (lex-rule-repeat-max node)) 492 | (subexpr (lex-rule-dump-wrap (lex-rule-repeat-rule node)))) 493 | ;;optimization 494 | (flet ((end (n) 495 | (return-from lex-rule-dump n))) 496 | (cond 497 | ((eql min max) 498 | (case (utility:any min max) 499 | (0 (end nil)) 500 | (1 (end subexpr)) ;;repeat exactly one time, repetition uneccessary 501 | )) 502 | ((and (eql min 0) 503 | (eql max 1)) 504 | (end `(? ,subexpr))) 505 | ((and (eql min 1) 506 | (eql max *lex-rule-repeat-infinity*)) 507 | (end `(postimes ,subexpr))))) 508 | 509 | `(times ,subexpr 510 | ,@(unless (zerop min) 511 | `(:from ,min)) 512 | ,@(if (eql max *lex-rule-repeat-infinity*) 513 | nil 514 | `(:upto ,max))))) 515 | (defmethod lex-rule-dump ((node lex-character-class)) 516 | (let ((chars (lex-character-class-chars node))) 517 | (let ((char-rules (remove-if-not 'characterp chars)) 518 | (range-rules 519 | (mapcar (lambda (range) 520 | `(,(lex-character-range-start range) 521 | ,(lex-character-range-end range))) 522 | (remove-if-not 'lex-character-range-p chars)))) 523 | (let ((rules (append range-rules char-rules))) 524 | (let ((rules-form `(character-ranges ,@rules))) 525 | (cond ((lex-character-class-negated-p node) 526 | `(progn 527 | (! ,rules-form) 528 | (v character))) 529 | (t 530 | ;;optimization 531 | (when (and (= 1 (length char-rules)) 532 | (zerop (length range-rules))) 533 | (let ((char (first char-rules))) 534 | (return-from lex-rule-dump 535 | (if *v-wrap-necessary* 536 | `(v ,char) 537 | char)))) 538 | rules-form))))))) 539 | (defparameter *some-symbols* (make-package "LEX-C-PARSE-SYMBOLS")) 540 | (defun find-lex-symbol (string) 541 | (intern string *some-symbols*)) 542 | (defmethod lex-rule-dump ((node lex-rule-reference)) 543 | `(v ,(find-lex-symbol (lex-rule-reference-string node)))) 544 | -------------------------------------------------------------------------------- /lex.txt: -------------------------------------------------------------------------------- 1 | %e 1019 2 | %p 2807 3 | %n 371 4 | %k 284 5 | %a 1213 6 | %o 1117 7 | 8 | O [0-7] 9 | D [0-9] 10 | NZ [1-9] 11 | L [a-zA-Z_] 12 | A [a-zA-Z_0-9] 13 | H [a-fA-F0-9] 14 | HP (0[xX]) 15 | E ([Ee][+-]?{D}+) 16 | P ([Pp][+-]?{D}+) 17 | FS (f|F|l|L) 18 | IS (((u|U)(l|L|ll|LL)?)|((l|L|ll|LL)(u|U)?)) 19 | CP (u|U|L) 20 | SP (u8|u|U|L) 21 | ES (\\(['"\?\\abfnrtv]|[0-7]{1,3}|x[a-fA-F0-9]+)) 22 | WS [ \t\v\n\f] 23 | 24 | %{ 25 | #include 26 | #include "y.tab.h" 27 | 28 | extern void yyerror(const char *); /* prints grammar violation message */ 29 | 30 | extern int sym_type(const char *); /* returns type from symbol table */ 31 | 32 | #define sym_type(identifier) IDENTIFIER /* with no symbol table, fake it */ 33 | 34 | static void comment(void); 35 | static int check_type(void); 36 | %} 37 | 38 | %% 39 | "/*" { comment(); } 40 | "//".* { /* consume //-comment */ } 41 | 42 | "auto" { return(AUTO); } 43 | "break" { return(BREAK); } 44 | "case" { return(CASE); } 45 | "char" { return(CHAR); } 46 | "const" { return(CONST); } 47 | "continue" { return(CONTINUE); } 48 | "default" { return(DEFAULT); } 49 | "do" { return(DO); } 50 | "double" { return(DOUBLE); } 51 | "else" { return(ELSE); } 52 | "enum" { return(ENUM); } 53 | "extern" { return(EXTERN); } 54 | "float" { return(FLOAT); } 55 | "for" { return(FOR); } 56 | "goto" { return(GOTO); } 57 | "if" { return(IF); } 58 | "inline" { return(INLINE); } 59 | "int" { return(INT); } 60 | "long" { return(LONG); } 61 | "register" { return(REGISTER); } 62 | "restrict" { return(RESTRICT); } 63 | "return" { return(RETURN); } 64 | "short" { return(SHORT); } 65 | "signed" { return(SIGNED); } 66 | "sizeof" { return(SIZEOF); } 67 | "static" { return(STATIC); } 68 | "struct" { return(STRUCT); } 69 | "switch" { return(SWITCH); } 70 | "typedef" { return(TYPEDEF); } 71 | "union" { return(UNION); } 72 | "unsigned" { return(UNSIGNED); } 73 | "void" { return(VOID); } 74 | "volatile" { return(VOLATILE); } 75 | "while" { return(WHILE); } 76 | "_Alignas" { return ALIGNAS; } 77 | "_Alignof" { return ALIGNOF; } 78 | "_Atomic" { return ATOMIC; } 79 | "_Bool" { return BOOL; } 80 | "_Complex" { return COMPLEX; } 81 | "_Generic" { return GENERIC; } 82 | "_Imaginary" { return IMAGINARY; } 83 | "_Noreturn" { return NORETURN; } 84 | "_Static_assert" { return STATIC_ASSERT; } 85 | "_Thread_local" { return THREAD_LOCAL; } 86 | "__func__" { return FUNC_NAME; } 87 | 88 | {L}{A}* { return check_type(); } 89 | 90 | {HP}{H}+{IS}? { return I_CONSTANT; } 91 | {NZ}{D}*{IS}? { return I_CONSTANT; } 92 | "0"{O}*{IS}? { return I_CONSTANT; } 93 | {CP}?"'"([^'\\\n]|{ES})+"'" { return I_CONSTANT; } 94 | 95 | {D}+{E}{FS}? { return F_CONSTANT; } 96 | {D}*"."{D}+{E}?{FS}? { return F_CONSTANT; } 97 | {D}+"."{E}?{FS}? { return F_CONSTANT; } 98 | {HP}{H}+{P}{FS}? { return F_CONSTANT; } 99 | {HP}{H}*"."{H}+{P}{FS}? { return F_CONSTANT; } 100 | {HP}{H}+"."{P}{FS}? { return F_CONSTANT; } 101 | 102 | ({SP}?\"([^"\\\n]|{ES})*\"{WS}*)+ { return STRING_LITERAL; } 103 | 104 | "..." { return ELLIPSIS; } 105 | ">>=" { return RIGHT_ASSIGN; } 106 | "<<=" { return LEFT_ASSIGN; } 107 | "+=" { return ADD_ASSIGN; } 108 | "-=" { return SUB_ASSIGN; } 109 | "*=" { return MUL_ASSIGN; } 110 | "/=" { return DIV_ASSIGN; } 111 | "%=" { return MOD_ASSIGN; } 112 | "&=" { return AND_ASSIGN; } 113 | "^=" { return XOR_ASSIGN; } 114 | "|=" { return OR_ASSIGN; } 115 | ">>" { return RIGHT_OP; } 116 | "<<" { return LEFT_OP; } 117 | "++" { return INC_OP; } 118 | "--" { return DEC_OP; } 119 | "->" { return PTR_OP; } 120 | "&&" { return AND_OP; } 121 | "||" { return OR_OP; } 122 | "<=" { return LE_OP; } 123 | ">=" { return GE_OP; } 124 | "==" { return EQ_OP; } 125 | "!=" { return NE_OP; } 126 | ";" { return ';'; } 127 | ("{"|"<%") { return '{'; } 128 | ("}"|"%>") { return '}'; } 129 | "," { return ','; } 130 | ":" { return ':'; } 131 | "=" { return '='; } 132 | "(" { return '('; } 133 | ")" { return ')'; } 134 | ("["|"<:") { return '['; } 135 | ("]"|":>") { return ']'; } 136 | "." { return '.'; } 137 | "&" { return '&'; } 138 | "!" { return '!'; } 139 | "~" { return '~'; } 140 | "-" { return '-'; } 141 | "+" { return '+'; } 142 | "*" { return '*'; } 143 | "/" { return '/'; } 144 | "%" { return '%'; } 145 | "<" { return '<'; } 146 | ">" { return '>'; } 147 | "^" { return '^'; } 148 | "|" { return '|'; } 149 | "?" { return '?'; } 150 | 151 | {WS}+ { /* whitespace separates tokens */ } 152 | . { /* discard bad characters */ } 153 | 154 | %% 155 | 156 | int yywrap(void) /* called at end of input */ 157 | { 158 | return 1; /* terminate now */ 159 | } 160 | 161 | static void comment(void) 162 | { 163 | int c; 164 | 165 | while ((c = input()) != 0) 166 | if (c == '*') 167 | { 168 | while ((c = input()) == '*') 169 | ; 170 | 171 | if (c == '/') 172 | return; 173 | 174 | if (c == 0) 175 | break; 176 | } 177 | yyerror("unterminated comment"); 178 | } 179 | 180 | static int check_type(void) 181 | { 182 | switch (sym_type(yytext)) 183 | { 184 | case TYPEDEF_NAME: /* previously defined */ 185 | return TYPEDEF_NAME; 186 | case ENUMERATION_CONSTANT: /* previously defined */ 187 | return ENUMERATION_CONSTANT; 188 | default: /* includes undefined */ 189 | return IDENTIFIER; 190 | } 191 | } 192 | 193 | -------------------------------------------------------------------------------- /package.lisp: -------------------------------------------------------------------------------- 1 | (defpackage :c-parse 2 | (:use :cl :esrap-liquid :yacc :trivia :deflazy)) 3 | (in-package :c-parse) 4 | -------------------------------------------------------------------------------- /preprocessor.lisp: -------------------------------------------------------------------------------- 1 | (in-package :c-parse) 2 | 3 | ;;be loud when doing things 4 | (defparameter *verbose* 2) 5 | ;;different verbosity levels? 6 | 7 | (defun verbose-enough (n) 8 | (and (numberp *verbose*) 9 | (> *verbose* n))) 10 | (defun path-for-original (path) 11 | (reroot path :suffix "_original__")) 12 | 13 | (defun cache-those-originals (path) 14 | "save a copy of a file in the shadowroot, returning the path to the shadowroot file" 15 | (let ((new-path (path-for-original path))) 16 | (when *verbose* 17 | (format t "caching original files:~%for ~a ~%at ~a~%" path new-path)) 18 | (uiop:copy-file 19 | path 20 | new-path) 21 | new-path)) 22 | ;;;;C preprocessor 23 | ;;;;ignore trigraphs 24 | ;;;;non-portably use newlines to iindicate line breaks, not mac or windows 25 | ;;;;lines which end in backslash are joined 26 | ;;LF -> unix, CR LF ->DOS,VMS, CR ->classic mac os 27 | ;;just support unix? uiop:read-file-lines 28 | 29 | (defun line-attach-p (line) 30 | (let ((len (length line))) 31 | (unless (zerop len) 32 | (char= #\\ (uiop:last-char line))))) 33 | (defun all-but-last-char-string (str) 34 | (let ((len (length str))) 35 | (if (zerop len) 36 | "" 37 | (subseq str 0 (1- len))))) 38 | (defun attach (lines end) 39 | (%concatenate-string 40 | (nconc (mapcar 'all-but-last-char-string lines) 41 | (list end)))) 42 | (defun join-lines-list (&optional (file-lines '("bar\\" "foo"))) 43 | (let ((acc nil) 44 | (lines-acc)) 45 | (dolist (line file-lines) 46 | (if (line-attach-p line) 47 | (push line lines-acc) 48 | (progn 49 | (push (attach (nreverse lines-acc) line) 50 | acc) 51 | (setf lines-acc nil)))) 52 | (when lines-acc 53 | (push (attach (nreverse (butlast lines-acc)) 54 | (car (last lines-acc))) 55 | acc)) 56 | (nreverse acc))) 57 | (defun path-for-joined-lines (path) 58 | (reroot path :suffix "_no_continued_lines__")) 59 | (defun ensure-cached-original (&optional (file *testpath*)) 60 | (let ((original-path (path-for-original file))) 61 | ;;FIXME::better way to ensure things? a pipeline? 62 | (unless (file-exists-p original-path) 63 | (setf original-path (cache-those-originals file))) 64 | original-path)) 65 | ;;;;FIXME::put the file prefix/suffix code somewhere? 66 | (defun cache-those-joined-lines (&optional (file *testpath*)) 67 | (let ((original-path (ensure-cached-original file))) 68 | ;;FIXME::better way to ensure things? a pipeline? 69 | (let* ((file-lines (uiop:read-file-lines original-path)) 70 | (list (join-lines-list file-lines)) 71 | (path (path-for-joined-lines file))) 72 | (when (verbose-enough 5) 73 | (format t "caching joined lines:~%for ~a ~%at ~a~%" file path)) 74 | (with-open-file (output 75 | path 76 | :direction :output :if-exists :supersede :if-does-not-exist :create) 77 | (let ((len (list-length list)) 78 | (count 0)) 79 | (dolist (line list) 80 | (when (< 0 count (1- len)) 81 | (write-char #\Newline output)) 82 | (write-string line output) 83 | (incf count)))) 84 | path))) 85 | 86 | (define-c-parse-rule //comment () 87 | (progn-v #\/ 88 | #\/ 89 | (times (progn (! #\Newline) 90 | (v character))))) 91 | 92 | (define-c-parse-rule white-char-no-newline () 93 | (|| #\Space #\tab)) 94 | (define-c-parse-rule whitespace-no-newline () 95 | (postimes white-char-no-newline) 96 | nil) 97 | 98 | (define-c-parse-rule directive () 99 | (progn-v (times white-char-no-newline) 100 | #\# 101 | (prog1 (stringy (times (progn (! #\Newline) 102 | (|| 103 | ;;one line whitespace 104 | (progn (v whitespace-no-newline) 105 | #\Space) 106 | (progn (v lex-yacc-multiline-comment) 107 | #\Space) 108 | ;;just eat all the characters 109 | (v character))))) 110 | (? #\Newline)))) 111 | 112 | (define-c-parse-rule thing () 113 | (|| directive 114 | (progn (|| whitespace-no-newline 115 | lex-yacc-multiline-comment 116 | //comment 117 | character) 118 | nil))) 119 | ;;FIXME:: non-consing esrap-liquid? 120 | (defparameter *acc* nil) 121 | (defun get-directives (&optional (fun 'per-iter) (text *c-test-file*)) 122 | (catch 'out 123 | (let ((start 0)) 124 | (loop (multiple-value-bind (directive length) 125 | (parse-with-garbage 'thing text :start start) 126 | (when (eql 0 length) 127 | (throw 'out nil)) 128 | (when directive 129 | ;;(print (list directive length)) 130 | (funcall fun directive start length)) 131 | (incf start length))))) 132 | (values)) 133 | (defun per-iter (directive start end) 134 | (terpri) 135 | (princ directive) (print (list start end)) 136 | (push directive *acc*)) 137 | 138 | (defun path-for-cached-directive-intervals (path) 139 | (reroot path :suffix "_directive_interval__")) 140 | 141 | (defun file-exists-p (&optional (path *testpath*)) 142 | (probe-file path)) 143 | 144 | ;;;;FIXME::macro for caching? 145 | (defun ensure-cached-joined-lines (&optional (path *testpath*)) 146 | (let ((joined-lines (path-for-joined-lines path))) 147 | (unless (file-exists-p joined-lines) 148 | (setf joined-lines (cache-those-joined-lines path))) 149 | joined-lines)) 150 | (defun cache-those-directives (&optional (path *testpath*)) 151 | ;;depends on the lines being joined 152 | (let ((joined-lines (ensure-cached-joined-lines path))) 153 | (let ((text (alexandria:read-file-into-string joined-lines)) 154 | (cache-path (path-for-cached-directive-intervals path))) 155 | (when *verbose* 156 | (format t "caching directive intervals:~%for ~a ~%at ~a~%" path cache-path)) 157 | (with-open-file (output cache-path :direction :output :if-exists :supersede :if-does-not-exist :create) 158 | (get-directives 159 | (lambda (directive start end) 160 | (when (verbose-enough 4) 161 | (format *standard-output* "~%caching: start: ~a end: ~a ~% ~a" start end directive)) 162 | (princ (list start end) output) 163 | (write-char #\newline output)) 164 | text)) 165 | cache-path))) 166 | 167 | (defun ensure-cached-directive-intervals (&optional (path *testpath*)) 168 | (let ((cache-path (path-for-cached-directive-intervals path))) 169 | (unless (file-exists-p cache-path) 170 | (setf cache-path 171 | (cache-those-directives path))) 172 | cache-path)) 173 | ;;list of (start length) forms. start and length are integers 174 | (defun get-cached-directive-intervals (&optional (path *testpath*)) 175 | (let ((cache-path (ensure-cached-directive-intervals path))) 176 | (uiop:with-safe-io-syntax () 177 | (uiop:read-file-forms cache-path)))) 178 | 179 | (defun read-n-characters (times &optional (stream *standard-input*)) 180 | (with-output-to-string (string-stream) 181 | (loop :repeat times :do 182 | (write-char (read-char stream) 183 | string-stream)))) 184 | 185 | (defun read-character-section-from-file (start length &optional (path (path-for-joined-lines *testpath*))) 186 | (with-open-file (stream path) 187 | (unless 188 | (file-position stream start) 189 | (error "could not move the file position to ~a ~%for ~a" start path)) 190 | (read-n-characters length stream))) 191 | 192 | ;;for checking whether the intervals are legit. should start with whitespace, then 193 | ;;# pound, then can span multiple lines with multi-line comments, then terminate in newline 194 | (defun test-cached-intervals (&optional (path *testpath*)) 195 | (let ((intervals 196 | (get-cached-directive-intervals path)) 197 | ;;;need to keep track of which file were reading from 198 | (joined-lines (path-for-joined-lines path))) 199 | (mapcar (lambda (interval) 200 | (destructuring-bind (start length) interval 201 | (read-character-section-from-file start length joined-lines))) 202 | intervals))) 203 | 204 | ;;add more functions to delete and recreate as necessary 205 | (defun delete-all-cache (&optional (path *testpath*)) 206 | (mapc 'uiop:delete-file-if-exists 207 | (list 208 | (path-for-original path) 209 | (path-for-joined-lines path) 210 | (path-for-cached-directive-intervals path) 211 | (path-for-no-directives path) 212 | (path-for-token-intervals path)))) 213 | 214 | (defun path-for-no-directives (path) 215 | (reroot path :suffix "_no_directives__")) 216 | 217 | (defun get-anti-intervals (intervals end) 218 | ;;intervals are (start length) 219 | ;;return a list of (start length) pairs to iterate over 220 | ;; (do-anti-intervals '((0 1) (2 3)) 10) 0 | 2 3 4-> ((1 1) (5 5)) 1 | 5 6 7 8 9 221 | (let ((start 0) 222 | (acc nil)) 223 | (flet ((foo (start interval) 224 | (let ((length (- interval start))) 225 | (unless (= 0 length) ;;throw away empty intervals 226 | (push (list start length) acc))))) 227 | (dolist (interval intervals) 228 | (destructuring-bind (interval-start length) interval 229 | (foo start interval-start) 230 | (setf start (+ interval-start length)))) 231 | (foo start end)) 232 | (nreverse acc))) 233 | 234 | ;;because sometimes we work with bork strings and files 235 | (defun thing-length (thing) 236 | (typecase thing 237 | (stream (file-length thing)) 238 | (otherwise (length thing)))) 239 | 240 | (defun cache-those-no-directives (&optional (path *testpath*)) 241 | ;;depends on the lines being joined 242 | (let ((joined-lines (ensure-cached-joined-lines path))) 243 | (let ((text (alexandria:read-file-into-string joined-lines)) 244 | (intervals (get-cached-directive-intervals path)) 245 | (new-cache-path (path-for-no-directives path))) 246 | (when *verbose* 247 | (format *standard-output* "~%caching no-directives-file:~%for: ~a~%at: ~a" path new-cache-path)) 248 | (with-open-file (output new-cache-path :direction :output :if-exists 249 | :supersede :if-does-not-exist :create) 250 | (let ((anti-intervals 251 | (get-anti-intervals intervals 252 | (thing-length text))) 253 | (position 0)) 254 | (flet ((advance (&optional char) 255 | (if char 256 | (write-char char output) 257 | (let ((oldchar (aref text position))) 258 | (cond ((char= #\Newline oldchar) 259 | (write-char #\Newline output)) 260 | ((char= #\Tab oldchar) 261 | (write-char #\Tab output)) 262 | (t 263 | (write-char #\Space output))))) 264 | (incf position))) 265 | (dolist (spec anti-intervals) 266 | (destructuring-bind (start len) spec 267 | (while (not (= start position)) 268 | (advance)) 269 | (loop :for i :from start :below (+ start len) 270 | :do (advance (aref text position)))))))) 271 | new-cache-path))) 272 | 273 | (defun ensure-cached-no-directives (&optional (path *testpath*)) 274 | (let ((path-for-no-directives (path-for-no-directives path))) 275 | (unless (file-exists-p path-for-no-directives) 276 | (setf path-for-no-directives (cache-those-no-directives path))) 277 | path-for-no-directives)) 278 | (defun path-for-token-intervals (path) 279 | (reroot path :suffix "_token_intervals__")) 280 | (defun cache-those-lexed-tokens (&optional (path *testpath*)) 281 | (let ((path-for-no-directives (ensure-cached-no-directives path))) 282 | (let ((no-directives-text (alexandria:read-file-into-string path-for-no-directives)) 283 | (new-cache-path (path-for-token-intervals path))) 284 | (with-open-file (output new-cache-path :direction :output :if-exists 285 | :supersede :if-does-not-exist :create) 286 | (keep-lexing no-directives-text 287 | (lambda (token-type value) 288 | (let ((start (character-section-start value)) 289 | (end (character-section-end value))) 290 | (let ((*package* *yacc-package*)) 291 | (write (list start (- end start) token-type) 292 | :stream output 293 | :readably t 294 | :case :downcase) 295 | (write-char #\Newline output)))))) 296 | (return-from cache-those-lexed-tokens new-cache-path)))) 297 | 298 | (defun keep-lexing (text &optional (fun (lambda (token-type value) 299 | (print (list token-type value))))) 300 | (let ((lexer-fun (lex-for-cl-yacc text :end (length text))) 301 | (alive-p t)) 302 | (while alive-p 303 | (multiple-value-bind (token-type value) (funcall lexer-fun) 304 | (cond ((null token-type) 305 | (setf alive-p nil)) 306 | (t (funcall fun token-type value)))))) 307 | (values)) 308 | (defun ensure-cached-token-intervals (&optional (path *testpath*)) 309 | (let ((path-for-token-intervals (path-for-token-intervals path))) 310 | (unless (file-exists-p path-for-token-intervals) 311 | (setf path-for-token-intervals (cache-those-lexed-tokens path))) 312 | path-for-token-intervals)) 313 | (defun get-cached-token-intervals (&optional (path *testpath*)) 314 | (let ((path-for-token-intervals (ensure-cached-token-intervals path))) 315 | (uiop:with-safe-io-syntax (:package *yacc-package*) 316 | (uiop:read-file-forms path-for-token-intervals)))) 317 | 318 | (defun lex-for-cl-yacc-cached (&key 319 | (path *testpath*) 320 | (string (alexandria:read-file-into-string 321 | (ensure-cached-no-directives 322 | path))) 323 | (start 0) 324 | (end most-positive-fixnum);;FIXME::real end designator 325 | ) 326 | (ensure-cached-token-intervals path) 327 | (let* ((data (get-cached-token-intervals path)) 328 | (iter 329 | (find-just-before start data '>= :key 'car))) 330 | (lambda () 331 | (if iter 332 | (let ((spec (pop iter))) 333 | (destructuring-bind (start length token) spec 334 | (let ((this-token-end (+ start length))) 335 | (if (> this-token-end end) 336 | (values nil nil) 337 | (let ((str (subseq string start this-token-end))) 338 | (multiple-value-bind (token string) (correct-token-if-identifier token str) 339 | (values token 340 | (make-character-section 341 | :start start 342 | :end this-token-end 343 | :data string 344 | )))))))) 345 | (values nil nil))))) 346 | -------------------------------------------------------------------------------- /rip/ANSI C grammar (Lex).html: -------------------------------------------------------------------------------- 1 | 2 | ANSI C grammar (Lex) 3 | 4 | 9 | 10 |

ANSI C grammar, Lex specification

11 | 12 | (This Lex file is accompanied by a matching Yacc file.) 13 |

14 | In 1985, Jeff Lee published his Yacc grammar based on a draft version of 15 | the ANSI C standard, along with a supporting Lex specification. Tom 16 | Stockfisch reposted those files to net.sources in 1987; as mentioned in 17 | the answer to question 17.25 18 | of the comp.lang.c FAQ, they used to be 19 | available from ftp.uu.net as usenet/net.sources/ansi.c.grammar.Z. 20 |

21 | The version you see here has been updated based on the 2011 ISO C 22 | standard. 23 | (The previous version's Lex 24 | and 25 | Yacc 26 | files for ANSI C9X still exist as archived copies.) 27 |

28 | It is assumed that translation phases 1..5 have already been completed, 29 | including preprocessing and _Pragma processing. 30 | The Lex rule for string literals will perform concatenation (translation phase 6). 31 | Transliteration of universal character names (\uHHHH or \UHHHHHHHH) must 32 | have been done by either the preprocessor or a replacement for the 33 | input() macro used by Lex (or the YY_INPUT function used by Flex) 34 | to read characters. 35 | Although comments should have been changed to space characters during 36 | translation phase 3, there are Lex rules for them anyway. 37 |

38 | I want to keep this version as close to the current C Standard grammar 39 | as possible; please let me know if you discover 40 | discrepancies.
41 | (There is an FAQ 42 | for this grammar that you might want to read first.) 43 |

44 | jutta@pobox.com, 2012 45 |

46 | Last edit: 47 | 2012-12-19 DAGwyn@aol.com 48 |


49 | Note: The following %-parameters are the minimum sizes needed for real Lex. 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 |
%e number of parsed tree nodes
%p number of positions
%n number of states
%k number of packed character classes
%a number of transitions
%o size of output array
58 |
59 |

60 |

 61 | %e  1019
 62 | %p  2807
 63 | %n  371
 64 | %k  284
 65 | %a  1213
 66 | %o  1117
 67 | 
 68 | O   [0-7]
 69 | D   [0-9]
 70 | NZ  [1-9]
 71 | L   [a-zA-Z_]
 72 | A   [a-zA-Z_0-9]
 73 | H   [a-fA-F0-9]
 74 | HP  (0[xX])
 75 | E   ([Ee][+-]?{D}+)
 76 | P   ([Pp][+-]?{D}+)
 77 | FS  (f|F|l|L)
 78 | IS  (((u|U)(l|L|ll|LL)?)|((l|L|ll|LL)(u|U)?))
 79 | CP  (u|U|L)
 80 | SP  (u8|u|U|L)
 81 | ES  (\\(['"\?\\abfnrtv]|[0-7]{1,3}|x[a-fA-F0-9]+))
 82 | WS  [ \t\v\n\f]
 83 | 
 84 | %{
 85 | #include <stdio.h>
 86 | #include "y.tab.h"
 87 | 
 88 | extern void yyerror(const char *);  /* prints grammar violation message */
 89 | 
 90 | extern int sym_type(const char *);  /* returns type from symbol table */
 91 | 
 92 | #define sym_type(identifier) IDENTIFIER /* with no symbol table, fake it */
 93 | 
 94 | static void comment(void);
 95 | static int check_type(void);
 96 | %}
 97 | 
 98 | %%
 99 | "/*"                                    { comment(); }
100 | "//".*                                    { /* consume //-comment */ }
101 | 
102 | "auto"					{ return(AUTO); }
103 | "break"					{ return(BREAK); }
104 | "case"					{ return(CASE); }
105 | "char"					{ return(CHAR); }
106 | "const"					{ return(CONST); }
107 | "continue"				{ return(CONTINUE); }
108 | "default"				{ return(DEFAULT); }
109 | "do"					{ return(DO); }
110 | "double"				{ return(DOUBLE); }
111 | "else"					{ return(ELSE); }
112 | "enum"					{ return(ENUM); }
113 | "extern"				{ return(EXTERN); }
114 | "float"					{ return(FLOAT); }
115 | "for"					{ return(FOR); }
116 | "goto"					{ return(GOTO); }
117 | "if"					{ return(IF); }
118 | "inline"				{ return(INLINE); }
119 | "int"					{ return(INT); }
120 | "long"					{ return(LONG); }
121 | "register"				{ return(REGISTER); }
122 | "restrict"				{ return(RESTRICT); }
123 | "return"				{ return(RETURN); }
124 | "short"					{ return(SHORT); }
125 | "signed"				{ return(SIGNED); }
126 | "sizeof"				{ return(SIZEOF); }
127 | "static"				{ return(STATIC); }
128 | "struct"				{ return(STRUCT); }
129 | "switch"				{ return(SWITCH); }
130 | "typedef"				{ return(TYPEDEF); }
131 | "union"					{ return(UNION); }
132 | "unsigned"				{ return(UNSIGNED); }
133 | "void"					{ return(VOID); }
134 | "volatile"				{ return(VOLATILE); }
135 | "while"					{ return(WHILE); }
136 | "_Alignas"                              { return ALIGNAS; }
137 | "_Alignof"                              { return ALIGNOF; }
138 | "_Atomic"                               { return ATOMIC; }
139 | "_Bool"                                 { return BOOL; }
140 | "_Complex"                              { return COMPLEX; }
141 | "_Generic"                              { return GENERIC; }
142 | "_Imaginary"                            { return IMAGINARY; }
143 | "_Noreturn"                             { return NORETURN; }
144 | "_Static_assert"                        { return STATIC_ASSERT; }
145 | "_Thread_local"                         { return THREAD_LOCAL; }
146 | "__func__"                              { return FUNC_NAME; }
147 | 
148 | {L}{A}*					{ return check_type(); }
149 | 
150 | {HP}{H}+{IS}?				{ return I_CONSTANT; }
151 | {NZ}{D}*{IS}?				{ return I_CONSTANT; }
152 | "0"{O}*{IS}?				{ return I_CONSTANT; }
153 | {CP}?"'"([^'\\\n]|{ES})+"'"		{ return I_CONSTANT; }
154 | 
155 | {D}+{E}{FS}?				{ return F_CONSTANT; }
156 | {D}*"."{D}+{E}?{FS}?			{ return F_CONSTANT; }
157 | {D}+"."{E}?{FS}?			{ return F_CONSTANT; }
158 | {HP}{H}+{P}{FS}?			{ return F_CONSTANT; }
159 | {HP}{H}*"."{H}+{P}{FS}?			{ return F_CONSTANT; }
160 | {HP}{H}+"."{P}{FS}?			{ return F_CONSTANT; }
161 | 
162 | ({SP}?\"([^"\\\n]|{ES})*\"{WS}*)+	{ return STRING_LITERAL; }
163 | 
164 | "..."					{ return ELLIPSIS; }
165 | ">>="					{ return RIGHT_ASSIGN; }
166 | "<<="					{ return LEFT_ASSIGN; }
167 | "+="					{ return ADD_ASSIGN; }
168 | "-="					{ return SUB_ASSIGN; }
169 | "*="					{ return MUL_ASSIGN; }
170 | "/="					{ return DIV_ASSIGN; }
171 | "%="					{ return MOD_ASSIGN; }
172 | "&="					{ return AND_ASSIGN; }
173 | "^="					{ return XOR_ASSIGN; }
174 | "|="					{ return OR_ASSIGN; }
175 | ">>"					{ return RIGHT_OP; }
176 | "<<"					{ return LEFT_OP; }
177 | "++"					{ return INC_OP; }
178 | "--"					{ return DEC_OP; }
179 | "->"					{ return PTR_OP; }
180 | "&&"					{ return AND_OP; }
181 | "||"					{ return OR_OP; }
182 | "<="					{ return LE_OP; }
183 | ">="					{ return GE_OP; }
184 | "=="					{ return EQ_OP; }
185 | "!="					{ return NE_OP; }
186 | ";"					{ return ';'; }
187 | ("{"|"<%")				{ return '{'; }
188 | ("}"|"%>")				{ return '}'; }
189 | ","					{ return ','; }
190 | ":"					{ return ':'; }
191 | "="					{ return '='; }
192 | "("					{ return '('; }
193 | ")"					{ return ')'; }
194 | ("["|"<:")				{ return '['; }
195 | ("]"|":>")				{ return ']'; }
196 | "."					{ return '.'; }
197 | "&"					{ return '&'; }
198 | "!"					{ return '!'; }
199 | "~"					{ return '~'; }
200 | "-"					{ return '-'; }
201 | "+"					{ return '+'; }
202 | "*"					{ return '*'; }
203 | "/"					{ return '/'; }
204 | "%"					{ return '%'; }
205 | "<"					{ return '<'; }
206 | ">"					{ return '>'; }
207 | "^"					{ return '^'; }
208 | "|"					{ return '|'; }
209 | "?"					{ return '?'; }
210 | 
211 | {WS}+					{ /* whitespace separates tokens */ }
212 | .					{ /* discard bad characters */ }
213 | 
214 | %%
215 | 
216 | int yywrap(void)        /* called at end of input */
217 | {
218 |     return 1;           /* terminate now */
219 | }
220 | 
221 | static void comment(void)
222 | {
223 |     int c;
224 | 
225 |     while ((c = input()) != 0)
226 |         if (c == '*')
227 |         {
228 |             while ((c = input()) == '*')
229 |                 ;
230 | 
231 |             if (c == '/')
232 |                 return;
233 | 
234 |             if (c == 0)
235 |                 break;
236 |         }
237 |     yyerror("unterminated comment");
238 | }
239 | 
240 | static int check_type(void)
241 | {
242 |     switch (sym_type(yytext))
243 |     {
244 |     case TYPEDEF_NAME:                /* previously defined */
245 |         return TYPEDEF_NAME;
246 |     case ENUMERATION_CONSTANT:        /* previously defined */
247 |         return ENUMERATION_CONSTANT;
248 |     default:                          /* includes undefined */
249 |         return IDENTIFIER;
250 |     }
251 | }
252 | 
253 | 254 | 255 | -------------------------------------------------------------------------------- /rip/ANSI C grammar (Yacc).html: -------------------------------------------------------------------------------- 1 | 2 | 3 | ANSI C grammar (Yacc) 4 | 5 | 10 | 11 | 12 |

ANSI C Yacc grammar

13 | 14 | (This Yacc file is accompanied by a matching Lex file.)
15 |

16 | In 1985, Jeff Lee published his Yacc grammar based on a draft version 17 | of the ANSI C standard, along with a supporting Lex specification. 18 | Tom Stockfisch reposted those files to net.sources in 1987; 19 | as mentioned in the answer to question 17.25 20 | of the comp.lang.c FAQ, they used to be available from ftp.uu.net as 21 | usenet/net.sources/ansi.c.grammar.Z. 22 |

23 | The version you see here has been updated based on the 2011 ISO C standard. 24 | (The previous version's Lex 25 | and 26 | Yacc files for ANSI C9X still exist 27 | as archived copies.) 28 |

29 | This grammar assumes that translation phases 1..5 have already been completed, 30 | including preprocessing and _Pragma processing. 31 | The Lex rule for string literals will perform concatenation (translation phase 6). 32 | Transliteration of universal character names (\uHHHH or \UHHHHHHHH) must 33 | have been done by either the preprocessor or a replacement for the 34 | input() macro used by Lex (or the YY_INPUT function used by Flex) 35 | to read characters. 36 | Although comments should 37 | have been changed to space characters during translation phase 3, there 38 | are Lex rules for them anyway. 39 |

40 | I want to keep this version as close to the current C Standard grammar 41 | as possible; please let me know if you discover 42 | discrepancies.
43 | (There is an FAQ 44 | for this grammar that you might want to read first.) 45 |

46 | jutta@pobox.com, 2012 47 |

48 | Last edit: 49 | 2012-12-18 DAGwyn@aol.com 50 |


51 | Note: There are two shift/reduce conflicts, correctly resolved by default: 52 |
 53 |   IF '(' expression ')' statement _ ELSE statement
 54 | 
55 | and 56 |
 57 |   ATOMIC _ '(' type_name ')'
 58 | 
59 | where "_" has been used to flag the points of ambiguity. 60 |

61 |


62 |

63 |

 64 | %token	IDENTIFIER I_CONSTANT F_CONSTANT STRING_LITERAL FUNC_NAME SIZEOF
 65 | %token	PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP
 66 | %token	AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN
 67 | %token	SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN
 68 | %token	XOR_ASSIGN OR_ASSIGN
 69 | %token	TYPEDEF_NAME ENUMERATION_CONSTANT
 70 | 
 71 | %token	TYPEDEF EXTERN STATIC AUTO REGISTER INLINE
 72 | %token	CONST RESTRICT VOLATILE
 73 | %token	BOOL CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE VOID
 74 | %token	COMPLEX IMAGINARY 
 75 | %token	STRUCT UNION ENUM ELLIPSIS
 76 | 
 77 | %token	CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN
 78 | 
 79 | %token	ALIGNAS ALIGNOF ATOMIC GENERIC NORETURN STATIC_ASSERT THREAD_LOCAL
 80 | 
 81 | %start translation_unit
 82 | %%
 83 | 
 84 | primary_expression
 85 | 	: IDENTIFIER
 86 | 	| constant
 87 | 	| string
 88 | 	| '(' expression ')'
 89 | 	| generic_selection
 90 | 	;
 91 | 
 92 | constant
 93 | 	: I_CONSTANT		/* includes character_constant */
 94 | 	| F_CONSTANT
 95 | 	| ENUMERATION_CONSTANT	/* after it has been defined as such */
 96 | 	;
 97 | 
 98 | enumeration_constant		/* before it has been defined as such */
 99 | 	: IDENTIFIER
100 | 	;
101 | 
102 | string
103 | 	: STRING_LITERAL
104 | 	| FUNC_NAME
105 | 	;
106 | 
107 | generic_selection
108 | 	: GENERIC '(' assignment_expression ',' generic_assoc_list ')'
109 | 	;
110 | 
111 | generic_assoc_list
112 | 	: generic_association
113 | 	| generic_assoc_list ',' generic_association
114 | 	;
115 | 
116 | generic_association
117 | 	: type_name ':' assignment_expression
118 | 	| DEFAULT ':' assignment_expression
119 | 	;
120 | 
121 | postfix_expression
122 | 	: primary_expression
123 | 	| postfix_expression '[' expression ']'
124 | 	| postfix_expression '(' ')'
125 | 	| postfix_expression '(' argument_expression_list ')'
126 | 	| postfix_expression '.' IDENTIFIER
127 | 	| postfix_expression PTR_OP IDENTIFIER
128 | 	| postfix_expression INC_OP
129 | 	| postfix_expression DEC_OP
130 | 	| '(' type_name ')' '{' initializer_list '}'
131 | 	| '(' type_name ')' '{' initializer_list ',' '}'
132 | 	;
133 | 
134 | argument_expression_list
135 | 	: assignment_expression
136 | 	| argument_expression_list ',' assignment_expression
137 | 	;
138 | 
139 | unary_expression
140 | 	: postfix_expression
141 | 	| INC_OP unary_expression
142 | 	| DEC_OP unary_expression
143 | 	| unary_operator cast_expression
144 | 	| SIZEOF unary_expression
145 | 	| SIZEOF '(' type_name ')'
146 | 	| ALIGNOF '(' type_name ')'
147 | 	;
148 | 
149 | unary_operator
150 | 	: '&'
151 | 	| '*'
152 | 	| '+'
153 | 	| '-'
154 | 	| '~'
155 | 	| '!'
156 | 	;
157 | 
158 | cast_expression
159 | 	: unary_expression
160 | 	| '(' type_name ')' cast_expression
161 | 	;
162 | 
163 | multiplicative_expression
164 | 	: cast_expression
165 | 	| multiplicative_expression '*' cast_expression
166 | 	| multiplicative_expression '/' cast_expression
167 | 	| multiplicative_expression '%' cast_expression
168 | 	;
169 | 
170 | additive_expression
171 | 	: multiplicative_expression
172 | 	| additive_expression '+' multiplicative_expression
173 | 	| additive_expression '-' multiplicative_expression
174 | 	;
175 | 
176 | shift_expression
177 | 	: additive_expression
178 | 	| shift_expression LEFT_OP additive_expression
179 | 	| shift_expression RIGHT_OP additive_expression
180 | 	;
181 | 
182 | relational_expression
183 | 	: shift_expression
184 | 	| relational_expression '<' shift_expression
185 | 	| relational_expression '>' shift_expression
186 | 	| relational_expression LE_OP shift_expression
187 | 	| relational_expression GE_OP shift_expression
188 | 	;
189 | 
190 | equality_expression
191 | 	: relational_expression
192 | 	| equality_expression EQ_OP relational_expression
193 | 	| equality_expression NE_OP relational_expression
194 | 	;
195 | 
196 | and_expression
197 | 	: equality_expression
198 | 	| and_expression '&' equality_expression
199 | 	;
200 | 
201 | exclusive_or_expression
202 | 	: and_expression
203 | 	| exclusive_or_expression '^' and_expression
204 | 	;
205 | 
206 | inclusive_or_expression
207 | 	: exclusive_or_expression
208 | 	| inclusive_or_expression '|' exclusive_or_expression
209 | 	;
210 | 
211 | logical_and_expression
212 | 	: inclusive_or_expression
213 | 	| logical_and_expression AND_OP inclusive_or_expression
214 | 	;
215 | 
216 | logical_or_expression
217 | 	: logical_and_expression
218 | 	| logical_or_expression OR_OP logical_and_expression
219 | 	;
220 | 
221 | conditional_expression
222 | 	: logical_or_expression
223 | 	| logical_or_expression '?' expression ':' conditional_expression
224 | 	;
225 | 
226 | assignment_expression
227 | 	: conditional_expression
228 | 	| unary_expression assignment_operator assignment_expression
229 | 	;
230 | 
231 | assignment_operator
232 | 	: '='
233 | 	| MUL_ASSIGN
234 | 	| DIV_ASSIGN
235 | 	| MOD_ASSIGN
236 | 	| ADD_ASSIGN
237 | 	| SUB_ASSIGN
238 | 	| LEFT_ASSIGN
239 | 	| RIGHT_ASSIGN
240 | 	| AND_ASSIGN
241 | 	| XOR_ASSIGN
242 | 	| OR_ASSIGN
243 | 	;
244 | 
245 | expression
246 | 	: assignment_expression
247 | 	| expression ',' assignment_expression
248 | 	;
249 | 
250 | constant_expression
251 | 	: conditional_expression	/* with constraints */
252 | 	;
253 | 
254 | declaration
255 | 	: declaration_specifiers ';'
256 | 	| declaration_specifiers init_declarator_list ';'
257 | 	| static_assert_declaration
258 | 	;
259 | 
260 | declaration_specifiers
261 | 	: storage_class_specifier declaration_specifiers
262 | 	| storage_class_specifier
263 | 	| type_specifier declaration_specifiers
264 | 	| type_specifier
265 | 	| type_qualifier declaration_specifiers
266 | 	| type_qualifier
267 | 	| function_specifier declaration_specifiers
268 | 	| function_specifier
269 | 	| alignment_specifier declaration_specifiers
270 | 	| alignment_specifier
271 | 	;
272 | 
273 | init_declarator_list
274 | 	: init_declarator
275 | 	| init_declarator_list ',' init_declarator
276 | 	;
277 | 
278 | init_declarator
279 | 	: declarator '=' initializer
280 | 	| declarator
281 | 	;
282 | 
283 | storage_class_specifier
284 | 	: TYPEDEF	/* identifiers must be flagged as TYPEDEF_NAME */
285 | 	| EXTERN
286 | 	| STATIC
287 | 	| THREAD_LOCAL
288 | 	| AUTO
289 | 	| REGISTER
290 | 	;
291 | 
292 | type_specifier
293 | 	: VOID
294 | 	| CHAR
295 | 	| SHORT
296 | 	| INT
297 | 	| LONG
298 | 	| FLOAT
299 | 	| DOUBLE
300 | 	| SIGNED
301 | 	| UNSIGNED
302 | 	| BOOL
303 | 	| COMPLEX
304 | 	| IMAGINARY	  	/* non-mandated extension */
305 | 	| atomic_type_specifier
306 | 	| struct_or_union_specifier
307 | 	| enum_specifier
308 | 	| TYPEDEF_NAME		/* after it has been defined as such */
309 | 	;
310 | 
311 | struct_or_union_specifier
312 | 	: struct_or_union '{' struct_declaration_list '}'
313 | 	| struct_or_union IDENTIFIER '{' struct_declaration_list '}'
314 | 	| struct_or_union IDENTIFIER
315 | 	;
316 | 
317 | struct_or_union
318 | 	: STRUCT
319 | 	| UNION
320 | 	;
321 | 
322 | struct_declaration_list
323 | 	: struct_declaration
324 | 	| struct_declaration_list struct_declaration
325 | 	;
326 | 
327 | struct_declaration
328 | 	: specifier_qualifier_list ';'	/* for anonymous struct/union */
329 | 	| specifier_qualifier_list struct_declarator_list ';'
330 | 	| static_assert_declaration
331 | 	;
332 | 
333 | specifier_qualifier_list
334 | 	: type_specifier specifier_qualifier_list
335 | 	| type_specifier
336 | 	| type_qualifier specifier_qualifier_list
337 | 	| type_qualifier
338 | 	;
339 | 
340 | struct_declarator_list
341 | 	: struct_declarator
342 | 	| struct_declarator_list ',' struct_declarator
343 | 	;
344 | 
345 | struct_declarator
346 | 	: ':' constant_expression
347 | 	| declarator ':' constant_expression
348 | 	| declarator
349 | 	;
350 | 
351 | enum_specifier
352 | 	: ENUM '{' enumerator_list '}'
353 | 	| ENUM '{' enumerator_list ',' '}'
354 | 	| ENUM IDENTIFIER '{' enumerator_list '}'
355 | 	| ENUM IDENTIFIER '{' enumerator_list ',' '}'
356 | 	| ENUM IDENTIFIER
357 | 	;
358 | 
359 | enumerator_list
360 | 	: enumerator
361 | 	| enumerator_list ',' enumerator
362 | 	;
363 | 
364 | enumerator	/* identifiers must be flagged as ENUMERATION_CONSTANT */
365 | 	: enumeration_constant '=' constant_expression
366 | 	| enumeration_constant
367 | 	;
368 | 
369 | atomic_type_specifier
370 | 	: ATOMIC '(' type_name ')'
371 | 	;
372 | 
373 | type_qualifier
374 | 	: CONST
375 | 	| RESTRICT
376 | 	| VOLATILE
377 | 	| ATOMIC
378 | 	;
379 | 
380 | function_specifier
381 | 	: INLINE
382 | 	| NORETURN
383 | 	;
384 | 
385 | alignment_specifier
386 | 	: ALIGNAS '(' type_name ')'
387 | 	| ALIGNAS '(' constant_expression ')'
388 | 	;
389 | 
390 | declarator
391 | 	: pointer direct_declarator
392 | 	| direct_declarator
393 | 	;
394 | 
395 | direct_declarator
396 | 	: IDENTIFIER
397 | 	| '(' declarator ')'
398 | 	| direct_declarator '[' ']'
399 | 	| direct_declarator '[' '*' ']'
400 | 	| direct_declarator '[' STATIC type_qualifier_list assignment_expression ']'
401 | 	| direct_declarator '[' STATIC assignment_expression ']'
402 | 	| direct_declarator '[' type_qualifier_list '*' ']'
403 | 	| direct_declarator '[' type_qualifier_list STATIC assignment_expression ']'
404 | 	| direct_declarator '[' type_qualifier_list assignment_expression ']'
405 | 	| direct_declarator '[' type_qualifier_list ']'
406 | 	| direct_declarator '[' assignment_expression ']'
407 | 	| direct_declarator '(' parameter_type_list ')'
408 | 	| direct_declarator '(' ')'
409 | 	| direct_declarator '(' identifier_list ')'
410 | 	;
411 | 
412 | pointer
413 | 	: '*' type_qualifier_list pointer
414 | 	| '*' type_qualifier_list
415 | 	| '*' pointer
416 | 	| '*'
417 | 	;
418 | 
419 | type_qualifier_list
420 | 	: type_qualifier
421 | 	| type_qualifier_list type_qualifier
422 | 	;
423 | 
424 | 
425 | parameter_type_list
426 | 	: parameter_list ',' ELLIPSIS
427 | 	| parameter_list
428 | 	;
429 | 
430 | parameter_list
431 | 	: parameter_declaration
432 | 	| parameter_list ',' parameter_declaration
433 | 	;
434 | 
435 | parameter_declaration
436 | 	: declaration_specifiers declarator
437 | 	| declaration_specifiers abstract_declarator
438 | 	| declaration_specifiers
439 | 	;
440 | 
441 | identifier_list
442 | 	: IDENTIFIER
443 | 	| identifier_list ',' IDENTIFIER
444 | 	;
445 | 
446 | type_name
447 | 	: specifier_qualifier_list abstract_declarator
448 | 	| specifier_qualifier_list
449 | 	;
450 | 
451 | abstract_declarator
452 | 	: pointer direct_abstract_declarator
453 | 	| pointer
454 | 	| direct_abstract_declarator
455 | 	;
456 | 
457 | direct_abstract_declarator
458 | 	: '(' abstract_declarator ')'
459 | 	| '[' ']'
460 | 	| '[' '*' ']'
461 | 	| '[' STATIC type_qualifier_list assignment_expression ']'
462 | 	| '[' STATIC assignment_expression ']'
463 | 	| '[' type_qualifier_list STATIC assignment_expression ']'
464 | 	| '[' type_qualifier_list assignment_expression ']'
465 | 	| '[' type_qualifier_list ']'
466 | 	| '[' assignment_expression ']'
467 | 	| direct_abstract_declarator '[' ']'
468 | 	| direct_abstract_declarator '[' '*' ']'
469 | 	| direct_abstract_declarator '[' STATIC type_qualifier_list assignment_expression ']'
470 | 	| direct_abstract_declarator '[' STATIC assignment_expression ']'
471 | 	| direct_abstract_declarator '[' type_qualifier_list assignment_expression ']'
472 | 	| direct_abstract_declarator '[' type_qualifier_list STATIC assignment_expression ']'
473 | 	| direct_abstract_declarator '[' type_qualifier_list ']'
474 | 	| direct_abstract_declarator '[' assignment_expression ']'
475 | 	| '(' ')'
476 | 	| '(' parameter_type_list ')'
477 | 	| direct_abstract_declarator '(' ')'
478 | 	| direct_abstract_declarator '(' parameter_type_list ')'
479 | 	;
480 | 
481 | initializer
482 | 	: '{' initializer_list '}'
483 | 	| '{' initializer_list ',' '}'
484 | 	| assignment_expression
485 | 	;
486 | 
487 | initializer_list
488 | 	: designation initializer
489 | 	| initializer
490 | 	| initializer_list ',' designation initializer
491 | 	| initializer_list ',' initializer
492 | 	;
493 | 
494 | designation
495 | 	: designator_list '='
496 | 	;
497 | 
498 | designator_list
499 | 	: designator
500 | 	| designator_list designator
501 | 	;
502 | 
503 | designator
504 | 	: '[' constant_expression ']'
505 | 	| '.' IDENTIFIER
506 | 	;
507 | 
508 | static_assert_declaration
509 | 	: STATIC_ASSERT '(' constant_expression ',' STRING_LITERAL ')' ';'
510 | 	;
511 | 
512 | statement
513 | 	: labeled_statement
514 | 	| compound_statement
515 | 	| expression_statement
516 | 	| selection_statement
517 | 	| iteration_statement
518 | 	| jump_statement
519 | 	;
520 | 
521 | labeled_statement
522 | 	: IDENTIFIER ':' statement
523 | 	| CASE constant_expression ':' statement
524 | 	| DEFAULT ':' statement
525 | 	;
526 | 
527 | compound_statement
528 | 	: '{' '}'
529 | 	| '{'  block_item_list '}'
530 | 	;
531 | 
532 | block_item_list
533 | 	: block_item
534 | 	| block_item_list block_item
535 | 	;
536 | 
537 | block_item
538 | 	: declaration
539 | 	| statement
540 | 	;
541 | 
542 | expression_statement
543 | 	: ';'
544 | 	| expression ';'
545 | 	;
546 | 
547 | selection_statement
548 | 	: IF '(' expression ')' statement ELSE statement
549 | 	| IF '(' expression ')' statement
550 | 	| SWITCH '(' expression ')' statement
551 | 	;
552 | 
553 | iteration_statement
554 | 	: WHILE '(' expression ')' statement
555 | 	| DO statement WHILE '(' expression ')' ';'
556 | 	| FOR '(' expression_statement expression_statement ')' statement
557 | 	| FOR '(' expression_statement expression_statement expression ')' statement
558 | 	| FOR '(' declaration expression_statement ')' statement
559 | 	| FOR '(' declaration expression_statement expression ')' statement
560 | 	;
561 | 
562 | jump_statement
563 | 	: GOTO IDENTIFIER ';'
564 | 	| CONTINUE ';'
565 | 	| BREAK ';'
566 | 	| RETURN ';'
567 | 	| RETURN expression ';'
568 | 	;
569 | 
570 | translation_unit
571 | 	: external_declaration
572 | 	| translation_unit external_declaration
573 | 	;
574 | 
575 | external_declaration
576 | 	: function_definition
577 | 	| declaration
578 | 	;
579 | 
580 | function_definition
581 | 	: declaration_specifiers declarator declaration_list compound_statement
582 | 	| declaration_specifiers declarator compound_statement
583 | 	;
584 | 
585 | declaration_list
586 | 	: declaration
587 | 	| declaration_list declaration
588 | 	;
589 | 
590 | %%
591 | #include <stdio.h>
592 | 
593 | void yyerror(const char *s)
594 | {
595 | 	fflush(stdout);
596 | 	fprintf(stderr, "*** %s\n", s);
597 | }
598 | 
599 | 600 | 601 | -------------------------------------------------------------------------------- /rip/Frequently Asked Questions on the ANSI C grammar.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | Frequently Asked Questions on the ANSI C grammar 4 | 5 | 10 | 11 | 12 |

13 | (Context: C - yacc - lex) 14 |
15 | Frequently Asked Questions about the ANSI C Yacc Grammar

17 |
18 |

Q: How current is this? Are you still maintaining this?

19 |
20 | As of 2017, yes. There are some versions of the grammars 21 | floating around in various states of disrepair that I can't 22 | do anything about. The current one is at 23 | www.quut.com/c/ANSI-C-grammar-y.html. 25 | (I didn't own the previous hosts, but do control quut.com, so there's 26 | some chance of this URL being more stable than the others.) 27 | 28 |
29 |
30 |

Q: So, can I use this to make derived work?

31 |
32 | Yes, you can use this to make derived work with or without attribution; 33 | both I and the grammar's original poster are fine with that. 34 | (It's just a grammar. We've saved you some typing, but that's about it; 35 | the bulk of the work is still left.) 36 |
37 | 38 |
39 |

Q: Your grammar is broken! I'm getting a "shift-reduce conflict"!

40 |
This is known as the dangling else problem. Theoretically, 41 | there are two possible parse trees for 42 |
 43 | 	if (A) if (B) stmt1; else stmt2;
 44 | 
45 | In the correct parse tree, the "else" belongs to the "if (B)". 46 |
47 | 48 |
49 |
50 | In the incorrect but theoretically possible parse tree, the else 51 | belongs to the if (A). 52 |
53 |
54 | 55 |
56 |
57 | 58 | Programming languages usually* resolve this 59 | by attaching the else to the innermost open if. (This is the 60 | way many human languages deal with similar ambiguities as well.) 61 | C does it that way, too, and so do yacc and its descendants - 62 | it's safe to ignore the warning message, as long as you know 63 | your grammar and know what it means. 64 |

65 | A second shift-reduce conflict was introduced in C11 with the new 66 | "_Atomic" type-specifier (if followed by parentheses) or type-qualifier. 67 |

68 | You can make Yacc not print messages about shift/reduce conflicts 69 | by announcing how many there are in the header: 70 |

71 | %expect 2 72 |
73 | tells it to ignore one such warning. (So why didn't I include 74 | one in the grammar? Because this isn't a finished tool, just 75 | a part. I expect users to either just read or to heavily edit 76 | the grammar, and there's no point in polishing it beforehand.) 77 |

78 |


79 | * Every once in a while, a language designer is 80 | fed up with the ambiguity and decides to eradicate it once 81 | and for all. Some otherwise Algol-like languages solve the 82 | problem by always requiring curly braces around their if/else 83 | parts (perl, sieve), or by using indentation itself as a 84 | bracketing mechanism (python). 85 | 86 |
87 | 88 |
89 |

Q: Your grammar is broken! It doesn't parse this C program here!

90 |
To parse full-fledged C source code, you'll need at least 91 | a preprocessor and a semantic analyser. For example, you'll need to 92 | parse C declarations to keep track of which words are names of types 93 | and which ones are still available as new identifiers. 94 | You'll need to keep track of scopes, and update your tables of what 95 | identifiers mean as their declarations move in and out of scope. 96 |

97 | Writing a semantic analyser for C isn't easy. 98 |

99 | From far away, the grammar looks like all you need to do is 100 | to compile and run it (surely, with this much Yacc input, how 101 | much work can there be left?), but that's misleading. 102 |

103 |
104 | If you find anything else wrong, send email to jutta@pobox.com. Thanks! 105 |
106 | -------------------------------------------------------------------------------- /rip/README: -------------------------------------------------------------------------------- 1 | http://www.quut.com/c/ANSI-C-grammar-l-2011.html 2 | http://www.quut.com/c/ANSI-C-grammar-y.html 3 | http://www.quut.com/c/ANSI-C-grammar-FAQ.html 4 | 5 | I do not own any of these files, just saving them in case they go down. -------------------------------------------------------------------------------- /test.lisp: -------------------------------------------------------------------------------- 1 | (in-package :c-parse) 2 | 3 | (fiveam:in-suite* c-parse) 4 | 5 | (defun run-tests () 6 | (let ((results (fiveam:run 'c-parse))) 7 | (fiveam:explain! results) 8 | (unless (fiveam:results-status results) 9 | (error "Tests failed.")))) 10 | 11 | (defun regex-character-class-to-esrap-liquid (regex) 12 | (lex-rule-dump (parse-with-garbage 'lex-rule-character-class regex))) 13 | (fiveam:test what 14 | (fiveam:is (char= (c-parse-parse 'lex-char-or-escaped-char "\\\\") 15 | #\\)) 16 | (fiveam:is (char= (c-parse-parse 'lex-char-or-escaped-char "\\t") 17 | #\Tab)) 18 | (fiveam:is (string= (c-parse-parse 'lex-string "\"234\\t234\"") 19 | "234 234")) 20 | (fiveam:is (string= (char-to-escaped-char #\Newline) 21 | "\\n")) 22 | (fiveam:is (string= (char-to-escaped-char #\s) 23 | "s")) 24 | (fiveam:is (char= (parse-with-garbage 25 | (regex-character-class-to-esrap-liquid "[^a-zA-Z_0-9]") 26 | "$")) 27 | #\$) 28 | (fiveam:is (char= (parse-with-garbage 29 | (regex-character-class-to-esrap-liquid "[a-zA-Z_0-9]") 30 | "S")) 31 | #\S)) 32 | 33 | (defun test-parse-rules (&optional (rules *lex-rules-lines*)) 34 | (mapcar (lambda (text) 35 | (parse-with-garbage 'lex-rule-start text)) 36 | rules)) 37 | 38 | 39 | ;;run split-lex-2 to set the dynamic variables 40 | (defun test-lines (&optional (rule 'lex-rule-start) (rules *lex-rules-lines*)) 41 | (let ((correct 0) 42 | (wrong 0)) 43 | (terpri) 44 | (mapc (lambda (text) 45 | (let* ((obj (parse-with-garbage rule text)) 46 | (a (princ-to-string 47 | obj))) 48 | (flet ((dump () 49 | (princ a) 50 | (terpri) 51 | (princ text) 52 | (terpri))) 53 | (cond ((string-a-prefix-b-p 54 | a 55 | text) 56 | (progn 57 | (format t "~%same:~%") 58 | (dump)) 59 | (incf correct)) 60 | (t 61 | (incf wrong) 62 | (format t "~%DIFFERENT:~%") 63 | (dump) 64 | ; (inspect obj) 65 | ))))) 66 | rules) 67 | (format t "correct: ~a wrong: ~a~%" correct wrong) 68 | (list wrong correct))) 69 | 70 | (defun teststuff () 71 | (test-lines) 72 | (test-lines 'lex-rule-start 73 | (mapcar 'spec-lex-rule-rule 74 | (mapcar 75 | (lambda (item) 76 | (parse-with-garbage 'lex-line-def item)) 77 | *lex-definitions-lines*)))) 78 | 79 | (defun test-things (&optional not-pretty) 80 | (let ((*print-raw* not-pretty)) 81 | (teststuff)) 82 | (values)) 83 | 84 | 85 | 86 | ;;;end 87 | ;;(string-thing lex-token-type yacc-token-type) 88 | ;;;; 89 | (defun lex (string &optional (stream *standard-output*)) 90 | (let ((start 0)) 91 | (loop 92 | (multiple-value-bind (result len) 93 | (parse-with-garbage 'lexer-foo string :start start) 94 | (when (zerop len) 95 | (return)) 96 | (destructuring-bind (string-thing ignorable yacc-token-type) result 97 | (declare (ignorable string-thing yacc-token-type ignorable)) 98 | ;;(write-char (char-code-object yacc-token-type) stream) 99 | (princ (stringy (car result)) stream) 100 | ) 101 | (incf start len))))) 102 | 103 | (defun lex2 (string) 104 | (with-output-to-string (stream) 105 | (lex string stream))) 106 | 107 | ;;FIXME:: hack -> using unicode characters to represent tokens, thus simplifyng tokens 108 | #+nil 109 | (progn 110 | (defparameter *char-code-pointer* nil) 111 | (defparameter *objects-to-characters* nil) 112 | (defun reset-char-code-object-table () 113 | (setf *char-code-pointer* 32) 114 | (setf *objects-to-characters* (make-hash-table :test 'equal))) 115 | (reset-char-code-object-table) 116 | (defun char-code-object (obj) 117 | (let ((there? (gethash obj *objects-to-characters*))) 118 | (unless there? 119 | (let ((new (code-char *char-code-pointer*))) 120 | (setf (gethash obj *objects-to-characters*) 121 | new) 122 | (setf there? new)) 123 | (incf *char-code-pointer*)) 124 | there?))) 125 | 126 | (define-c-parse-rule left-recursion? () 127 | (progn-v left-recursion? 128 | #\( 129 | character 130 | #\))) 131 | 132 | ;;32 -> 126 inclusive 133 | ;;0 -> 126 - 32 = 94 134 | ;;0 -> 94 inclusive = mod 95 135 | (defun ascii-increment (char) 136 | (let ((code (char-code char))) 137 | (code-char (+ 32 (mod (+ 1 (- code 32)) 95))))) 138 | 139 | #+nil 140 | (get-directives 'per-iter 141 | (alexandria:read-file-into-string 142 | (ensure-cached-no-directives 143 | "/home/imac/install/src/emacs-mirror/emacs-master/src/xdisp.c") 144 | )) 145 | (defparameter *cpp-test-path* "/home/imac/install/src/emacs-mirror/emacs-master/src/bytecode.c") 146 | (defun reroot-cpp (&optional (path *cpp-test-path*)) 147 | (reroot path :prefix "_cpp_")) 148 | 149 | (defun cpp-it (&optional (path *cpp-test-path*)) 150 | (uiop:run-program (print (cppbar path)) 151 | :output *standard-output* :error-output *standard-output*)) 152 | 153 | (defun cpp-include-directories-foo () 154 | (stringy (mapcar (lambda (x) 155 | (format nil " -I~a " x)) 156 | *include-directories*))) 157 | 158 | (defparameter *gnu-compiler-builtins-header* 159 | (merge-pathnames "test/GNU_compiler_builtins.h" *path*)) 160 | (defparameter *gnu-compiler-builtins-header-include-flag* 161 | (format nil " -include ~a " (uiop:unix-namestring *gnu-compiler-builtins-header*))) 162 | (defun cppbar (&optional (path *cpp-test-path*)) 163 | (let ((infile (uiop:unix-namestring path)) 164 | (outfile (uiop:unix-namestring (reroot-cpp path))) 165 | (flags (stringy 166 | (list 167 | ;;from man cpp 168 | ;;" -CC " ;;preserve comments 169 | " -P " ;;no line information 170 | *gnu-compiler-builtins-header-include-flag* 171 | "-std=c99" 172 | ;;" -E " 173 | #+nil 174 | "-fdirectives-only " ;; do not expand macros 175 | ;;"-fdebug-cpp" ;;token information? 176 | )))) 177 | (format nil "cpp ~a ~a ~a -o ~a " (cpp-include-directories-foo) flags infile outfile))) 178 | 179 | (defparameter *pycparser-src-path* 180 | ;;"/home/imac/install/src/pycparser-master/" 181 | ) 182 | (defparameter *pycparser-c-ast-cfg* 183 | (merge-pathnames "_c_ast.cfg" *path*) 184 | ;; (merge-pathnames "pycparser/_c_ast.cfg" *pycparser-src-path*) 185 | ) 186 | 187 | (deflazy:deflazy pycparser-c-ast-cfg () 188 | (remove-if 189 | (lambda (x) 190 | ;;;comment lines start with # 191 | (char= (aref x 0) 192 | #\# 193 | )) 194 | (file-lines-no-whitespace-lines 195 | (alexandria:read-file-into-string *pycparser-c-ast-cfg*)))) 196 | 197 | (defun compile-lex-def-to-esrap-liquid (&optional (rule-string "[a-zA-Z]*yolo")) 198 | (lex-rule-dump 199 | (parse-with-garbage 'lex-rule-start rule-string))) 200 | 201 | (defun wot89 () 202 | `(progn 203 | (define-c-parse-rule pycparser-cfg-name () 204 | (stringy ,(compile-lex-def-to-esrap-liquid "[a-zA-Z_]+"))))) 205 | (define-c-parse-rule pycparser-cfg () 206 | (let ((name (v pycparser-cfg-name))) 207 | (list* 208 | name 209 | (progn-v 210 | #\: 211 | #\Space 212 | (progm #\[ 213 | pycparser-c-ast-cfg-params 214 | #\]))))) 215 | (define-c-parse-rule pycparser-c-ast-cfg-params () 216 | (remove 217 | nil 218 | (times 219 | (|| 220 | (list-v pycparser-cfg-name 221 | (list-length (times #\*))) 222 | (progn (v ", ") nil))))) 223 | (deflazy pycparser-cfg-ast-nodes () 224 | (eval (wot89)) 225 | (values)) 226 | (defun parse-pycparser-c-ast-def (&optional (string "Union: [name, decls**]")) 227 | (getfnc 'pycparser-cfg-ast-nodes) 228 | (parse-with-garbage 'pycparser-cfg string)) 229 | 230 | (defun get-parsed-c-ast-defs () 231 | (let ((text (deflazy:getfnc 'pycparser-c-ast-cfg))) 232 | (mapcar 'parse-pycparser-c-ast-def 233 | text))) 234 | 235 | (defun dump-defstruct-from-pycparser-c-ast-def (def) 236 | (destructuring-bind (name &rest params) def 237 | (let ((name (json-to-lisp-symbol name)) 238 | (conc-name (utility:symbolicate2 (list (string name) ".") 239 | (symbol-package name)))) 240 | `(struct-to-clos:struct->class 241 | (defstruct (,name (:conc-name ,conc-name)) 242 | ,@ 243 | (mapcar 'json-to-lisp-symbol 244 | (mapcar 'first params))))))) 245 | 246 | (defun gen-pycparser-node-objets () 247 | (let ((data (get-parsed-c-ast-defs))) 248 | `(progn 249 | ,@(mapcar 'dump-defstruct-from-pycparser-c-ast-def data)))) 250 | 251 | ;;(defun eval-pycparser-node-objects ) 252 | 253 | ;;ripped from pycparser-master/pycparser/_c_ast.cfg 254 | ;;# * - a child node 255 | ;;# ** - a sequence of child nodes 256 | ;;# - an attribute 257 | 258 | ;;;;lispify and delispify are inverses 259 | (defun lispify (string) 260 | ;;;prefix all uppercase letters with a dash, and make lowercase 261 | (let ((acc nil)) 262 | (dotimes (index (length string)) 263 | (let ((char (aref string index))) 264 | (if (upper-case-p char) 265 | (progn 266 | (push #\- acc) 267 | (push (char-downcase char) acc)) 268 | (push char acc)))) 269 | (string-upcase (stringy (nreverse acc))))) 270 | (defun delispify (string) 271 | (let ((acc nil) 272 | (index 0) 273 | (len (length string))) 274 | (while (> len index) 275 | (let ((char (aref string index))) 276 | (if (char= #\- char) 277 | (progn 278 | (push (char-upcase (aref string (+ 1 index))) acc) 279 | (incf index 2)) 280 | (progn (push (char-downcase char) acc) 281 | (incf index 1))))) 282 | (stringy (nreverse acc)))) 283 | 284 | (progn 285 | (defparameter *json-to-lisp-names* 286 | (make-hash-table :test 'equal)) 287 | (defparameter *lisp-names-to-json* 288 | (make-hash-table :test 'eq))) 289 | (defparameter *json-name-package* (or (find-package :json-name) 290 | (make-package :json-name :use '(:cl)))) 291 | (defun make-and-export-sym (string package) 292 | (let ((sym (intern string 293 | package))) 294 | (export 295 | sym 296 | package) 297 | sym)) 298 | (defun json-to-lisp-symbol (string &optional (hash *json-to-lisp-names*) 299 | (package *json-name-package*) 300 | (otherhash *lisp-names-to-json*)) 301 | (symbol-macrolet ((place (gethash string hash))) 302 | (or place 303 | (let* ((newstr (lispify string)) 304 | (sym (make-and-export-sym newstr package))) 305 | (setf place sym) 306 | (setf (gethash sym otherhash) string) 307 | sym)))) 308 | (defun lisp-symbol-to-json (sym &optional (otherhash *lisp-names-to-json*)) 309 | (or (gethash sym otherhash) 310 | (error "no associated json name: ~a" sym))) 311 | -------------------------------------------------------------------------------- /test/GNU_compiler_builtins.h: -------------------------------------------------------------------------------- 1 | //https://stackoverflow.com/questions/28983726/pycparser-not-working-on-preprocessed-code 2 | typedef struct __builtin_va_list { } __builtin_va_list; 3 | 4 | #define __attribute__(x) 5 | #define __extension__ 6 | //https://github.com/eliben/pycparser/issues/210 7 | #define __asm__(x) 8 | #define __const const 9 | #define __inline__ inline 10 | #define __inline inline 11 | #define __restrict 12 | #define __signed__ signed 13 | #define __GNUC_VA_LIST 14 | #define __gnuc_va_list char 15 | //https://stackoverflow.com/questions/3385515/static-assert-in-c 16 | #define _Static_assert(COND,MSG) 17 | 18 | #define _Noreturn 19 | //C11 20 | #define _Alignas(x) 21 | #define _Alignof(x) 0 22 | 23 | //https://stackoverflow.com/questions/400116/what-is-the-purpose-and-return-type-of-the-builtin-offsetof-operator 24 | #define offsetof(type, member) (int)(&((type *)0)->member) // C definition, not C++ 25 | #define __builtin_offsetof(type, member) offsetof(type, member) 26 | -------------------------------------------------------------------------------- /test/hash.c: -------------------------------------------------------------------------------- 1 | /* 2 | ** C implementation of a hash table ADT 3 | */ 4 | typedef enum tagReturnCode {SUCCESS, FAIL} ReturnCode; 5 | 6 | 7 | typedef struct tagEntry 8 | { 9 | char* key; 10 | char* value; 11 | } Entry; 12 | 13 | 14 | 15 | typedef struct tagNode 16 | { 17 | Entry* entry; 18 | 19 | struct tagNode* next; 20 | } Node; 21 | 22 | 23 | typedef struct tagHash 24 | { 25 | unsigned int table_size; 26 | 27 | Node** heads; 28 | 29 | } Hash; 30 | 31 | 32 | static unsigned int hash_func(const char* str, unsigned int table_size) 33 | { 34 | unsigned int hash_value; 35 | unsigned int a = 127; 36 | 37 | for (hash_value = 0; *str != 0; ++str) 38 | hash_value = (a*hash_value + *str) % table_size; 39 | 40 | return hash_value; 41 | } 42 | 43 | 44 | ReturnCode HashCreate(Hash** hash, unsigned int table_size) 45 | { 46 | unsigned int i; 47 | 48 | if (table_size < 1) 49 | return FAIL; 50 | 51 | // 52 | // Allocate space for the Hash 53 | // 54 | if (((*hash) = malloc(sizeof(**hash))) == NULL) 55 | return FAIL; 56 | 57 | // 58 | // Allocate space for the array of list heads 59 | // 60 | if (((*hash)->heads = malloc(table_size*sizeof(*((*hash)->heads)))) == NULL) 61 | return FAIL; 62 | 63 | // 64 | // Initialize Hash info 65 | // 66 | for (i = 0; i < table_size; ++i) 67 | { 68 | (*hash)->heads[i] = NULL; 69 | } 70 | 71 | (*hash)->table_size = table_size; 72 | 73 | return SUCCESS; 74 | } 75 | 76 | 77 | ReturnCode HashInsert(Hash* hash, const Entry* entry) 78 | { 79 | unsigned int index = hash_func(entry->key, hash->table_size); 80 | Node* temp = hash->heads[index]; 81 | 82 | HashRemove(hash, entry->key); 83 | 84 | if ((hash->heads[index] = malloc(sizeof(Node))) == NULL) 85 | return FAIL; 86 | 87 | hash->heads[index]->entry = malloc(sizeof(Entry)); 88 | hash->heads[index]->entry->key = malloc(strlen(entry->key)+1); 89 | hash->heads[index]->entry->value = malloc(strlen(entry->value)+1); 90 | strcpy(hash->heads[index]->entry->key, entry->key); 91 | strcpy(hash->heads[index]->entry->value, entry->value); 92 | 93 | hash->heads[index]->next = temp; 94 | 95 | return SUCCESS; 96 | } 97 | 98 | 99 | 100 | const Entry* HashFind(const Hash* hash, const char* key) 101 | { 102 | unsigned int index = hash_func(key, hash->table_size); 103 | Node* temp = hash->heads[index]; 104 | 105 | while (temp != NULL) 106 | { 107 | if (!strcmp(key, temp->entry->key)) 108 | return temp->entry; 109 | 110 | temp = temp->next; 111 | } 112 | 113 | return NULL; 114 | } 115 | 116 | 117 | ReturnCode HashRemove(Hash* hash, const char* key) 118 | { 119 | unsigned int index = hash_func(key, hash->table_size); 120 | Node* temp1 = hash->heads[index]; 121 | Node* temp2 = temp1; 122 | 123 | while (temp1 != NULL) 124 | { 125 | if (!strcmp(key, temp1->entry->key)) 126 | { 127 | if (temp1 == hash->heads[index]) 128 | hash->heads[index] = hash->heads[index]->next; 129 | else 130 | temp2->next = temp1->next; 131 | 132 | free(temp1->entry->key); 133 | free(temp1->entry->value); 134 | free(temp1->entry); 135 | free(temp1); 136 | temp1 = NULL; 137 | 138 | return SUCCESS; 139 | } 140 | 141 | temp2 = temp1; 142 | temp1 = temp1->next; 143 | } 144 | 145 | return FAIL; 146 | } 147 | 148 | 149 | void HashPrint(Hash* hash, void (*PrintFunc)(char*, char*)) 150 | { 151 | unsigned int i; 152 | 153 | if (hash == NULL || hash->heads == NULL) 154 | return; 155 | 156 | for (i = 0; i < hash->table_size; ++i) 157 | { 158 | Node* temp = hash->heads[i]; 159 | 160 | while (temp != NULL) 161 | { 162 | PrintFunc(temp->entry->key, temp->entry->value); 163 | temp = temp->next; 164 | } 165 | } 166 | } 167 | 168 | 169 | 170 | void HashDestroy(Hash* hash) 171 | { 172 | unsigned int i; 173 | 174 | if (hash == NULL) 175 | return; 176 | 177 | for (i = 0; i < hash->table_size; ++i) 178 | { 179 | Node* temp = hash->heads[i]; 180 | 181 | while (temp != NULL) 182 | { 183 | Node* temp2 = temp; 184 | 185 | free(temp->entry->key); 186 | free(temp->entry->value); 187 | free(temp->entry); 188 | 189 | temp = temp->next; 190 | 191 | free(temp2); 192 | } 193 | } 194 | 195 | free(hash->heads); 196 | hash->heads = NULL; 197 | 198 | free(hash); 199 | } 200 | 201 | -------------------------------------------------------------------------------- /test/lines.txt: -------------------------------------------------------------------------------- 1 | hello\ 2 | world\ 3 | bar 4 | 5 | foo 6 | 7 | bar\\\ 8 | ww 9 | 10 | -------------------------------------------------------------------------------- /yacc.lisp: -------------------------------------------------------------------------------- 1 | (in-package :c-parse) 2 | 3 | ;;;;process the yacc.txt file 4 | (defparameter *yacc-txt-path* 5 | (merge-pathnames "yacc.txt" *path*)) 6 | (deflazy *yacc-txt* () 7 | (alexandria:read-file-into-string *yacc-txt-path*)) 8 | (deflazy *yacc-txt2* (*yacc-txt*) 9 | (file-lines-no-whitespace-lines 10 | *yacc-txt*)) 11 | (defun split-yacc (&optional (yacc (getfnc '*yacc-txt2*))) 12 | (multiple-value-bind (first second) 13 | (values (subseq yacc 0 first) 14 | (let ((value (subseq yacc (+ 1 first) second)) 15 | acc) 16 | ;;intersperse newlines again and concatenate for esrap-liquid 17 | (dolist (val value) 18 | (push val acc) 19 | (push 20 | '(#\newline) 21 | acc)) 22 | (%concatenate-string (nreverse acc)))))) 23 | (deflazy %%-positions ((yacc *yacc-txt2*)) 24 | (multiple-value-list (%%-positions yacc))) 25 | (deflazy *yacc-tokens-lines* (%%-positions (yacc *yacc-txt2*)) 26 | (destructuring-bind (first second) %%-positions 27 | (declare (ignore second)) 28 | (subseq yacc 0 first))) 29 | (deflazy *yacc-definitions* (%%-positions (yacc *yacc-txt2*)) 30 | (destructuring-bind (first second) %%-positions 31 | (let ((value (subseq yacc (+ 1 first) second)) 32 | acc) 33 | ;;intersperse newlines again and concatenate for esrap-liquid 34 | (dolist (val value) 35 | (push val acc) 36 | (push 37 | '(#\newline) 38 | acc)) 39 | (%concatenate-string (nreverse acc))))) 40 | ;;;; 41 | 42 | 43 | (define-c-parse-rule yacc-token-line () 44 | (v "%token") 45 | (times (progn-v whitespace 46 | lex-token-string))) 47 | (define-c-parse-rule yacc-start-line () 48 | (v "%start") 49 | (progn-v whitespace 50 | lex-token-string)) 51 | 52 | ;; "return (values # start)" 53 | (deflazy *yacc-token-strings* (*yacc-tokens-lines*) 54 | (alexandria:flatten 55 | (mapcar (lambda (line) 56 | (parse-with-garbage 'yacc-token-line line)) 57 | *yacc-tokens-lines*))) 58 | (deflazy *yacc-start-string* (*yacc-tokens-lines*) 59 | (first 60 | (alexandria:flatten 61 | (mapcar (lambda (line) 62 | (parse-with-garbage 'yacc-start-line line)) 63 | *yacc-tokens-lines*)))) 64 | 65 | ;;FIXME::does not handle // comments 66 | (define-c-parse-rule yacc-whitespace-or-comment () 67 | (|| whitespace 68 | lex-yacc-multiline-comment)) 69 | (define-c-parse-rule yacc-whitespace-or-comments () 70 | (postimes yacc-whitespace-or-comment)) 71 | 72 | (define-c-parse-rule yacc-grammar-one-expansion () 73 | (v yacc-whitespace-or-comments) 74 | (|| lex-read-char 75 | lex-token-string)) 76 | (define-c-parse-rule yacc-rule-aux () 77 | (times yacc-grammar-one-expansion)) 78 | (define-c-parse-rule yacc-rule () 79 | (list* (prog1 (v lex-token-string) 80 | (v yacc-whitespace-or-comments)) 81 | (prog1 (list* 82 | (progn 83 | (v #\:) 84 | (v yacc-rule-aux)) 85 | (times (progn-v yacc-whitespace-or-comments 86 | #\| 87 | yacc-rule-aux))) 88 | (v yacc-whitespace-or-comments) 89 | (v #\;) 90 | (v #\Newline)))) 91 | (define-c-parse-rule yacc-rules () 92 | (times yacc-rule)) 93 | 94 | (deflazy *yacc-grammar* (*yacc-definitions*) 95 | (parse-with-garbage 'yacc-rules *yacc-definitions*)) 96 | 97 | (defparameter *yacc-terminal-chars* nil) ;;because characters are terminals. FIXME:documentation 98 | (defparameter *yacc-package* 99 | (let ((name "YACC-SYMBOLS")) 100 | (or (find-package name) 101 | (make-package name)))) 102 | (defun yacc-symbol (x) 103 | (if x 104 | (let ((string (etypecase x 105 | (string (format nil "$~a" x)) 106 | (character 107 | (pushnew x *yacc-terminal-chars*) 108 | (format nil "C~a" x)) 109 | (symbol (format nil "S~a" x))))) 110 | (intern string *yacc-package*)) 111 | x)) 112 | 113 | (utility:eval-always 114 | (progn 115 | (struct-to-clos:struct->class 116 | ;;represent a contiguous region of memory 117 | (defstruct character-section 118 | data 119 | start 120 | end)) 121 | (defun print-character-section (stream object) 122 | (write (character-section-data object) :stream stream) 123 | (format stream "<~a,~a>" 124 | (character-section-start object) 125 | (character-section-end object))) 126 | (set-pprint-dispatch 'character-section 'print-character-section))) 127 | (defmethod equalp? ((a character-section) (b character-section)) 128 | (and (equalp? (character-section-data a) (character-section-data b)) 129 | (equalp? (character-section-start a) (character-section-start b)) 130 | (equalp? (character-section-end a) (character-section-end b)))) 131 | #+nil 132 | "A parser consumes the output of a lexer, that produces a stream of terminals. CL-Yacc 133 | expects the lexer to be a function of no arguments (a 134 | thunk 135 | ) that returns two values: the next 136 | terminal symbol, and the value of the symbol, which will be passed to the action associated with 137 | a production. At the end of the input, the lexer should return 138 | nil 139 | ." 140 | (defun correct-token-if-identifier (token-type string-thing) 141 | (let ((string (stringy string-thing))) 142 | (values 143 | (if (and 144 | ;;was marked as an IDENTIFIER 145 | (symbol= token-type (load-time-value (yacc-symbol "IDENTIFIER"))) 146 | ;;and was found to be a typedef 147 | (member string *typedef-env* :test 'string=)) 148 | (load-time-value (yacc-symbol "TYPEDEF_NAME")) 149 | token-type) 150 | string))) 151 | (defun lex-for-cl-yacc (string &key (start 0) (end nil)) 152 | ;;cl-yacc accepts a function that returns two values. the token type and the value 153 | ;;when there are no more tokens, it should return (values nil nil) [?FIXME::document?] 154 | (lambda () 155 | (block out 156 | (tagbody try-again 157 | (multiple-value-bind (result len) 158 | (parse-with-garbage 'lexer-foo string :start start) 159 | (let ((old-pos start) 160 | (new-pos (+ start len))) 161 | (setf start new-pos) 162 | (when (or 163 | ;;went too far, farther than end 164 | (and end (> new-pos end)) 165 | ;;failed to lex. length 0 means no solution for lexing was found 166 | (zerop len)) 167 | (return-from out (values nil nil))) 168 | (destructuring-bind (string-thing ignorable yacc-token-type) result 169 | (declare (ignorable string-thing yacc-token-type ignorable)) 170 | ;;(write-char (char-code-object yacc-token-type) stream) 171 | ;;(princ (stringy (car result)) stream) 172 | 173 | ;;to skip over whitespace 174 | (when (not yacc-token-type) 175 | (go try-again)) 176 | 177 | (multiple-value-bind (token string) 178 | (correct-token-if-identifier yacc-token-type string-thing) 179 | (return-from out 180 | (values token 181 | (make-character-section 182 | :data string 183 | :start old-pos 184 | :end new-pos) 185 | )))))))))) 186 | (deflazy *yacc-start-symbol* (*yacc-start-string*) 187 | (yacc-symbol *yacc-start-string*)) 188 | 189 | (defun aux-fun234 (n) 190 | ;;;add a closure at the end of the grammar which lists the dump name and 191 | ;;;choice count 192 | (let ((name (first n)) 193 | (count 0)) 194 | ;;FIXME::keyword parameter strings are not upcase in the specification. may change? 195 | (let ((dump-name (utility:keywordify (string-upcase name)))) 196 | (list* name 197 | (let ((acc nil)) 198 | (dolist (item (rest n)) 199 | (push 200 | (append item 201 | `((lambda (&rest rest) 202 | (list* ,dump-name ,count rest)))) 203 | acc) 204 | (incf count)) 205 | (nreverse acc)))))) 206 | ;;to output with grammar rule and form number from grammar 207 | (deflazy *yacc-grammar-info* (*yacc-grammar*) 208 | (mapcar 'aux-fun234 *yacc-grammar*)) 209 | 210 | (defun tree-map (fn tree &key (max-depth -1)) 211 | "replace each list-element in tree with (funcall fn list-element)" 212 | ;;(tree-map (lambda (x) (* x x)) '(1 2 (2 3) . foo)) -> (1 4 (4 9) . FOO) 213 | ;;FIXME:: max-depth is a hack to prevent the function from running too deep 214 | (labels ((rec (tree depth) 215 | (cond ((atom tree) (funcall fn tree)) 216 | (t (cons (let ((item (first tree))) 217 | (if (= depth max-depth) 218 | item 219 | (rec item (+ 1 depth)))) 220 | (let ((rest (rest tree))) 221 | (if (and rest 222 | (listp rest)) 223 | (rec rest depth) 224 | rest))))))) 225 | (rec tree 0))) 226 | (deflazy *yacc-grammar-symbols* (*yacc-grammar-info*) 227 | (tree-map (lambda (x) 228 | (if (functionp x) 229 | x 230 | (yacc-symbol x))) 231 | *yacc-grammar-info* 232 | :max-depth 3)) 233 | (deflazy *yacc-token-symbols* (*yacc-token-strings*) 234 | (tree-map 'yacc-symbol 235 | (append *yacc-token-strings* 236 | *yacc-terminal-chars*))) 237 | 238 | (defparameter *c* nil) 239 | (defun gen-parser-code () 240 | `(define-parser *c* 241 | (:start-symbol 242 | ;;don't 243 | ;;,*yacc-start-symbol* 244 | ,(yacc-symbol "external_declaration") 245 | ;;we don't parse the whole thing, we can't just parse the whoe translation_unit 246 | ;;because of typedef declarations 247 | ) 248 | (:terminals ,(getfnc '*yacc-token-symbols*)) 249 | ,@(getfnc '*yacc-grammar-symbols*))) 250 | (defun parse-external-declaration-unknown-length 251 | (lex-fun &key (start 0) (end most-positive-fixnum ;;FIXME::nonportable? 252 | ;;(length string) 253 | )) 254 | ;;lexfun is a function that lexes from start to end 255 | ;;cl-yacc will error if there are more tokens coming in after an external-declaration 256 | ;;so we catch the error, inspect what it was looking at, that should be the end 257 | ;;of the external-declaration 258 | (block out 259 | (tagbody try-again 260 | (handler-case 261 | (progn 262 | (return-from out 263 | (if (= start end) 264 | (values nil end) 265 | (values (yacc:parse-with-lexer (funcall lex-fun start end) *c*) 266 | end)))) 267 | (yacc-parse-error (c) 268 | (when (eq nil (first (yacc-parse-error-expected-terminals c))) 269 | (setf end (character-section-start (yacc-parse-error-value c))) 270 | (go try-again)) 271 | (print (list start end)) 272 | (error c) 273 | #+nil 274 | (print "what???")))))) 275 | 276 | (defparameter *typedef-env* nil) 277 | (defparameter *parsed* nil) 278 | 279 | ;;;FIXME:: use incremental lengths, not absolute string positions 280 | (defun keep-parsing (&key (path *testpath* path-supplied-p) 281 | (string (alexandria:read-file-into-string (ensure-cached-no-directives path)) 282 | string-supplied-p) 283 | (save-result nil)) 284 | (when (and path-supplied-p string-supplied-p) 285 | (error "specify a path or a string")) 286 | (setf *typedef-env* nil) 287 | (let ((start 0) 288 | (end (length string)) 289 | (lex-fun 290 | (if path-supplied-p 291 | (lambda (start end) 292 | (lex-for-cl-yacc-cached :path path :string string :start start :end end)) 293 | (lambda (start end) 294 | (lex-for-cl-yacc string :start start :end end)))) 295 | (results nil)) 296 | (block exit 297 | (loop 298 | (multiple-value-bind (cst where) 299 | (parse-external-declaration-unknown-length 300 | lex-fun 301 | :start start :end end) 302 | (when (and where (= where start)) ;;parsing failed 303 | (return-from exit)) 304 | (push cst results) 305 | ;;test whether the external-declaration was a typedef 306 | (multiple-value-bind (value typedef-p) (cst-typedef-p cst) 307 | ;;if it is 308 | (when typedef-p 309 | ;;then add the new names to the environment 310 | (mapc 311 | (lambda (x) 312 | (pushnew x *typedef-env* :test 'string=)) 313 | value))) 314 | (setf start 315 | where)))) 316 | (when save-result 317 | (setf *parsed* (append results *parsed*))) 318 | (values results 319 | *typedef-env*))) 320 | 321 | (defun c-parse (string) 322 | "Parse a string representing C code, return (values [list of declarations] [list of typedef names])" 323 | (keep-parsing :string string)) 324 | 325 | (defun print-csts (csts) 326 | (mapcar 'dump-cst csts)) 327 | 328 | (defparameter *c-data* 329 | `("typedef struct tagNode 330 | { 331 | enum tagNode* entry; 332 | 333 | struct tagNode* next; 334 | } Node; 335 | " 336 | "typedef enum {SUCCESS, FAIL} (*MathFunc)(float, int), bar ,foo;")) 337 | (defparameter *c-data-0* 338 | nil 339 | #+nil 340 | (mapcar 'parsefoobar *c-data*) 341 | ) 342 | (defun are-typedefs () 343 | (mapcar 'cst-typedef *c-data-0*)) 344 | 345 | (defun cst-typedef-p (&optional (CST (alexandria:random-elt *c-data-0*))) 346 | "return a (values list-of-typedef'd-identifiers t) if its a typedef, (values nil nil) otherwise 347 | depends on the CST being dumped in the form below, where node is (production-rule number &rest children)" 348 | (match CST 349 | ((list 350 | :EXTERNAL_DECLARATION 351 | 1 352 | (list 353 | :DECLARATION 354 | 1 355 | (list 356 | :DECLARATION_SPECIFIERS 357 | 0 358 | (list 359 | :STORAGE_CLASS_SPECIFIER 360 | 0 361 | (character-section (data "typedef")) 362 | ) 363 | _) 364 | 365 | (list* 366 | :INIT_DECLARATOR_LIST _ names) 367 | (character-section (data ";")) 368 | )) 369 | ;;each typedef can define many values. this finds them 370 | (values 371 | (mapcar 'character-section-data 372 | (find-$direct_declarator0$ names)) 373 | t)))) 374 | 375 | (defun find-$direct_declarator0$ (tree) 376 | (let ((acc nil)) 377 | (mapc-tree 378 | (lambda (node) 379 | (match node 380 | ((list :DIRECT_DECLARATOR 0 value) 381 | (push value acc)))) 382 | tree) 383 | acc)) 384 | 385 | (defun mapc-tree (fun node) 386 | (funcall fun node) 387 | (when (consp node) 388 | (mapc-tree fun (car node)) 389 | (mapc-tree fun (cdr node)))) 390 | 391 | (defun dump-cst (cst &optional (depth 0)) 392 | (typecase cst 393 | (character-section 394 | (write-char #\Space) 395 | (write-string (character-section-data cst))) 396 | (otherwise 397 | (let ((items (cddr cst)) 398 | (newdepth depth)) 399 | (unless (= 1 (list-length items)) 400 | (incf newdepth 1) 401 | (terpri) 402 | (loop :repeat depth :do (write-char #\Space))) 403 | (dolist (item items) 404 | (dump-cst item newdepth)))))) 405 | 406 | ;;;;FIXME::where to put this? 407 | (defun start-up () 408 | (eval-lexer) 409 | (eval (gen-parser-code)) 410 | (values)) 411 | -------------------------------------------------------------------------------- /yacc.txt: -------------------------------------------------------------------------------- 1 | %token IDENTIFIER I_CONSTANT F_CONSTANT STRING_LITERAL FUNC_NAME SIZEOF 2 | %token PTR_OP INC_OP DEC_OP LEFT_OP RIGHT_OP LE_OP GE_OP EQ_OP NE_OP 3 | %token AND_OP OR_OP MUL_ASSIGN DIV_ASSIGN MOD_ASSIGN ADD_ASSIGN 4 | %token SUB_ASSIGN LEFT_ASSIGN RIGHT_ASSIGN AND_ASSIGN 5 | %token XOR_ASSIGN OR_ASSIGN 6 | %token TYPEDEF_NAME ENUMERATION_CONSTANT 7 | 8 | %token TYPEDEF EXTERN STATIC AUTO REGISTER INLINE 9 | %token CONST RESTRICT VOLATILE 10 | %token BOOL CHAR SHORT INT LONG SIGNED UNSIGNED FLOAT DOUBLE VOID 11 | %token COMPLEX IMAGINARY 12 | %token STRUCT UNION ENUM ELLIPSIS 13 | 14 | %token CASE DEFAULT IF ELSE SWITCH WHILE DO FOR GOTO CONTINUE BREAK RETURN 15 | 16 | %token ALIGNAS ALIGNOF ATOMIC GENERIC NORETURN STATIC_ASSERT THREAD_LOCAL 17 | 18 | %start translation_unit 19 | %% 20 | 21 | primary_expression 22 | : IDENTIFIER 23 | | constant 24 | | string 25 | | '(' expression ')' 26 | | generic_selection 27 | ; 28 | 29 | constant 30 | : I_CONSTANT /* includes character_constant */ 31 | | F_CONSTANT 32 | | ENUMERATION_CONSTANT /* after it has been defined as such */ 33 | ; 34 | 35 | enumeration_constant /* before it has been defined as such */ 36 | : IDENTIFIER 37 | ; 38 | 39 | string 40 | : STRING_LITERAL 41 | | FUNC_NAME 42 | ; 43 | 44 | generic_selection 45 | : GENERIC '(' assignment_expression ',' generic_assoc_list ')' 46 | ; 47 | 48 | generic_assoc_list 49 | : generic_association 50 | | generic_assoc_list ',' generic_association 51 | ; 52 | 53 | generic_association 54 | : type_name ':' assignment_expression 55 | | DEFAULT ':' assignment_expression 56 | ; 57 | 58 | postfix_expression 59 | : primary_expression 60 | | postfix_expression '[' expression ']' 61 | | postfix_expression '(' ')' 62 | | postfix_expression '(' argument_expression_list ')' 63 | | postfix_expression '.' IDENTIFIER 64 | | postfix_expression PTR_OP IDENTIFIER 65 | | postfix_expression INC_OP 66 | | postfix_expression DEC_OP 67 | | '(' type_name ')' '{' initializer_list '}' 68 | | '(' type_name ')' '{' initializer_list ',' '}' 69 | ; 70 | 71 | argument_expression_list 72 | : assignment_expression 73 | | argument_expression_list ',' assignment_expression 74 | ; 75 | 76 | unary_expression 77 | : postfix_expression 78 | | INC_OP unary_expression 79 | | DEC_OP unary_expression 80 | | unary_operator cast_expression 81 | | SIZEOF unary_expression 82 | | SIZEOF '(' type_name ')' 83 | | ALIGNOF '(' type_name ')' 84 | ; 85 | 86 | unary_operator 87 | : '&' 88 | | '*' 89 | | '+' 90 | | '-' 91 | | '~' 92 | | '!' 93 | ; 94 | 95 | cast_expression 96 | : unary_expression 97 | | '(' type_name ')' cast_expression 98 | ; 99 | 100 | multiplicative_expression 101 | : cast_expression 102 | | multiplicative_expression '*' cast_expression 103 | | multiplicative_expression '/' cast_expression 104 | | multiplicative_expression '%' cast_expression 105 | ; 106 | 107 | additive_expression 108 | : multiplicative_expression 109 | | additive_expression '+' multiplicative_expression 110 | | additive_expression '-' multiplicative_expression 111 | ; 112 | 113 | shift_expression 114 | : additive_expression 115 | | shift_expression LEFT_OP additive_expression 116 | | shift_expression RIGHT_OP additive_expression 117 | ; 118 | 119 | relational_expression 120 | : shift_expression 121 | | relational_expression '<' shift_expression 122 | | relational_expression '>' shift_expression 123 | | relational_expression LE_OP shift_expression 124 | | relational_expression GE_OP shift_expression 125 | ; 126 | 127 | equality_expression 128 | : relational_expression 129 | | equality_expression EQ_OP relational_expression 130 | | equality_expression NE_OP relational_expression 131 | ; 132 | 133 | and_expression 134 | : equality_expression 135 | | and_expression '&' equality_expression 136 | ; 137 | 138 | exclusive_or_expression 139 | : and_expression 140 | | exclusive_or_expression '^' and_expression 141 | ; 142 | 143 | inclusive_or_expression 144 | : exclusive_or_expression 145 | | inclusive_or_expression '|' exclusive_or_expression 146 | ; 147 | 148 | logical_and_expression 149 | : inclusive_or_expression 150 | | logical_and_expression AND_OP inclusive_or_expression 151 | ; 152 | 153 | logical_or_expression 154 | : logical_and_expression 155 | | logical_or_expression OR_OP logical_and_expression 156 | ; 157 | 158 | conditional_expression 159 | : logical_or_expression 160 | | logical_or_expression '?' expression ':' conditional_expression 161 | ; 162 | 163 | assignment_expression 164 | : conditional_expression 165 | | unary_expression assignment_operator assignment_expression 166 | ; 167 | 168 | assignment_operator 169 | : '=' 170 | | MUL_ASSIGN 171 | | DIV_ASSIGN 172 | | MOD_ASSIGN 173 | | ADD_ASSIGN 174 | | SUB_ASSIGN 175 | | LEFT_ASSIGN 176 | | RIGHT_ASSIGN 177 | | AND_ASSIGN 178 | | XOR_ASSIGN 179 | | OR_ASSIGN 180 | ; 181 | 182 | expression 183 | : assignment_expression 184 | | expression ',' assignment_expression 185 | ; 186 | 187 | constant_expression 188 | : conditional_expression /* with constraints */ 189 | ; 190 | 191 | declaration 192 | : declaration_specifiers ';' 193 | | declaration_specifiers init_declarator_list ';' 194 | | static_assert_declaration 195 | ; 196 | 197 | declaration_specifiers 198 | : storage_class_specifier declaration_specifiers 199 | | storage_class_specifier 200 | | type_specifier declaration_specifiers 201 | | type_specifier 202 | | type_qualifier declaration_specifiers 203 | | type_qualifier 204 | | function_specifier declaration_specifiers 205 | | function_specifier 206 | | alignment_specifier declaration_specifiers 207 | | alignment_specifier 208 | ; 209 | 210 | init_declarator_list 211 | : init_declarator 212 | | init_declarator_list ',' init_declarator 213 | ; 214 | 215 | init_declarator 216 | : declarator '=' initializer 217 | | declarator 218 | ; 219 | 220 | storage_class_specifier 221 | : TYPEDEF /* identifiers must be flagged as TYPEDEF_NAME */ 222 | | EXTERN 223 | | STATIC 224 | | THREAD_LOCAL 225 | | AUTO 226 | | REGISTER 227 | ; 228 | 229 | type_specifier 230 | : VOID 231 | | CHAR 232 | | SHORT 233 | | INT 234 | | LONG 235 | | FLOAT 236 | | DOUBLE 237 | | SIGNED 238 | | UNSIGNED 239 | | BOOL 240 | | COMPLEX 241 | | IMAGINARY /* non-mandated extension */ 242 | | atomic_type_specifier 243 | | struct_or_union_specifier 244 | | enum_specifier 245 | | TYPEDEF_NAME /* after it has been defined as such */ 246 | ; 247 | 248 | struct_or_union_specifier 249 | : struct_or_union '{' struct_declaration_list '}' 250 | | struct_or_union IDENTIFIER '{' struct_declaration_list '}' 251 | | struct_or_union IDENTIFIER 252 | ; 253 | 254 | struct_or_union 255 | : STRUCT 256 | | UNION 257 | ; 258 | 259 | struct_declaration_list 260 | : struct_declaration 261 | | struct_declaration_list struct_declaration 262 | ; 263 | 264 | struct_declaration 265 | : specifier_qualifier_list ';' /* for anonymous struct/union */ 266 | | specifier_qualifier_list struct_declarator_list ';' 267 | | static_assert_declaration 268 | ; 269 | 270 | specifier_qualifier_list 271 | : type_specifier specifier_qualifier_list 272 | | type_specifier 273 | | type_qualifier specifier_qualifier_list 274 | | type_qualifier 275 | ; 276 | 277 | struct_declarator_list 278 | : struct_declarator 279 | | struct_declarator_list ',' struct_declarator 280 | ; 281 | 282 | struct_declarator 283 | : ':' constant_expression 284 | | declarator ':' constant_expression 285 | | declarator 286 | ; 287 | 288 | enum_specifier 289 | : ENUM '{' enumerator_list '}' 290 | | ENUM '{' enumerator_list ',' '}' 291 | | ENUM IDENTIFIER '{' enumerator_list '}' 292 | | ENUM IDENTIFIER '{' enumerator_list ',' '}' 293 | | ENUM IDENTIFIER 294 | ; 295 | 296 | enumerator_list 297 | : enumerator 298 | | enumerator_list ',' enumerator 299 | ; 300 | 301 | enumerator /* identifiers must be flagged as ENUMERATION_CONSTANT */ 302 | : enumeration_constant '=' constant_expression 303 | | enumeration_constant 304 | ; 305 | 306 | atomic_type_specifier 307 | : ATOMIC '(' type_name ')' 308 | ; 309 | 310 | type_qualifier 311 | : CONST 312 | | RESTRICT 313 | | VOLATILE 314 | | ATOMIC 315 | ; 316 | 317 | function_specifier 318 | : INLINE 319 | | NORETURN 320 | ; 321 | 322 | alignment_specifier 323 | : ALIGNAS '(' type_name ')' 324 | | ALIGNAS '(' constant_expression ')' 325 | ; 326 | 327 | declarator 328 | : pointer direct_declarator 329 | | direct_declarator 330 | ; 331 | 332 | direct_declarator 333 | : IDENTIFIER 334 | | '(' declarator ')' 335 | | direct_declarator '[' ']' 336 | | direct_declarator '[' '*' ']' 337 | | direct_declarator '[' STATIC type_qualifier_list assignment_expression ']' 338 | | direct_declarator '[' STATIC assignment_expression ']' 339 | | direct_declarator '[' type_qualifier_list '*' ']' 340 | | direct_declarator '[' type_qualifier_list STATIC assignment_expression ']' 341 | | direct_declarator '[' type_qualifier_list assignment_expression ']' 342 | | direct_declarator '[' type_qualifier_list ']' 343 | | direct_declarator '[' assignment_expression ']' 344 | | direct_declarator '(' parameter_type_list ')' 345 | | direct_declarator '(' ')' 346 | | direct_declarator '(' identifier_list ')' 347 | ; 348 | 349 | pointer 350 | : '*' type_qualifier_list pointer 351 | | '*' type_qualifier_list 352 | | '*' pointer 353 | | '*' 354 | ; 355 | 356 | type_qualifier_list 357 | : type_qualifier 358 | | type_qualifier_list type_qualifier 359 | ; 360 | 361 | 362 | parameter_type_list 363 | : parameter_list ',' ELLIPSIS 364 | | parameter_list 365 | ; 366 | 367 | parameter_list 368 | : parameter_declaration 369 | | parameter_list ',' parameter_declaration 370 | ; 371 | 372 | parameter_declaration 373 | : declaration_specifiers declarator 374 | | declaration_specifiers abstract_declarator 375 | | declaration_specifiers 376 | ; 377 | 378 | identifier_list 379 | : IDENTIFIER 380 | | identifier_list ',' IDENTIFIER 381 | ; 382 | 383 | type_name 384 | : specifier_qualifier_list abstract_declarator 385 | | specifier_qualifier_list 386 | ; 387 | 388 | abstract_declarator 389 | : pointer direct_abstract_declarator 390 | | pointer 391 | | direct_abstract_declarator 392 | ; 393 | 394 | direct_abstract_declarator 395 | : '(' abstract_declarator ')' 396 | | '[' ']' 397 | | '[' '*' ']' 398 | | '[' STATIC type_qualifier_list assignment_expression ']' 399 | | '[' STATIC assignment_expression ']' 400 | | '[' type_qualifier_list STATIC assignment_expression ']' 401 | | '[' type_qualifier_list assignment_expression ']' 402 | | '[' type_qualifier_list ']' 403 | | '[' assignment_expression ']' 404 | | direct_abstract_declarator '[' ']' 405 | | direct_abstract_declarator '[' '*' ']' 406 | | direct_abstract_declarator '[' STATIC type_qualifier_list assignment_expression ']' 407 | | direct_abstract_declarator '[' STATIC assignment_expression ']' 408 | | direct_abstract_declarator '[' type_qualifier_list assignment_expression ']' 409 | | direct_abstract_declarator '[' type_qualifier_list STATIC assignment_expression ']' 410 | | direct_abstract_declarator '[' type_qualifier_list ']' 411 | | direct_abstract_declarator '[' assignment_expression ']' 412 | | '(' ')' 413 | | '(' parameter_type_list ')' 414 | | direct_abstract_declarator '(' ')' 415 | | direct_abstract_declarator '(' parameter_type_list ')' 416 | ; 417 | 418 | initializer 419 | : '{' initializer_list '}' 420 | | '{' initializer_list ',' '}' 421 | | assignment_expression 422 | ; 423 | 424 | initializer_list 425 | : designation initializer 426 | | initializer 427 | | initializer_list ',' designation initializer 428 | | initializer_list ',' initializer 429 | ; 430 | 431 | designation 432 | : designator_list '=' 433 | ; 434 | 435 | designator_list 436 | : designator 437 | | designator_list designator 438 | ; 439 | 440 | designator 441 | : '[' constant_expression ']' 442 | | '.' IDENTIFIER 443 | ; 444 | 445 | static_assert_declaration 446 | : STATIC_ASSERT '(' constant_expression ',' STRING_LITERAL ')' ';' 447 | ; 448 | 449 | statement 450 | : labeled_statement 451 | | compound_statement 452 | | expression_statement 453 | | selection_statement 454 | | iteration_statement 455 | | jump_statement 456 | ; 457 | 458 | labeled_statement 459 | : IDENTIFIER ':' statement 460 | | CASE constant_expression ':' statement 461 | | DEFAULT ':' statement 462 | ; 463 | 464 | compound_statement 465 | : '{' '}' 466 | | '{' block_item_list '}' 467 | ; 468 | 469 | block_item_list 470 | : block_item 471 | | block_item_list block_item 472 | ; 473 | 474 | block_item 475 | : declaration 476 | | statement 477 | ; 478 | 479 | expression_statement 480 | : ';' 481 | | expression ';' 482 | ; 483 | 484 | selection_statement 485 | : IF '(' expression ')' statement ELSE statement 486 | | IF '(' expression ')' statement 487 | | SWITCH '(' expression ')' statement 488 | ; 489 | 490 | iteration_statement 491 | : WHILE '(' expression ')' statement 492 | | DO statement WHILE '(' expression ')' ';' 493 | | FOR '(' expression_statement expression_statement ')' statement 494 | | FOR '(' expression_statement expression_statement expression ')' statement 495 | | FOR '(' declaration expression_statement ')' statement 496 | | FOR '(' declaration expression_statement expression ')' statement 497 | ; 498 | 499 | jump_statement 500 | : GOTO IDENTIFIER ';' 501 | | CONTINUE ';' 502 | | BREAK ';' 503 | | RETURN ';' 504 | | RETURN expression ';' 505 | ; 506 | 507 | translation_unit 508 | : external_declaration 509 | | translation_unit external_declaration 510 | ; 511 | 512 | external_declaration 513 | : function_definition 514 | | declaration 515 | ; 516 | 517 | function_definition 518 | : declaration_specifiers declarator declaration_list compound_statement 519 | | declaration_specifiers declarator compound_statement 520 | ; 521 | 522 | declaration_list 523 | : declaration 524 | | declaration_list declaration 525 | ; 526 | 527 | %% 528 | #include 529 | 530 | void yyerror(const char *s) 531 | { 532 | fflush(stdout); 533 | fprintf(stderr, "*** %s\n", s); 534 | } 535 | 536 | --------------------------------------------------------------------------------