├── README.md ├── aexp.masm ├── metaii.masm ├── metaii.meta └── metavm.lisp /README.md: -------------------------------------------------------------------------------- 1 | # Meta II 2 | 3 | ## Introduction 4 | 5 | This is an implementation of Val Schorre's Meta II language, written 6 | in Common Lisp. Meta II is a compiler-writing language for 7 | syntax-directed translation, expressed in a syntax similar to BNF 8 | rules that are annotated to produce code in the target language. The 9 | code here is a virtual machine for the target language. The Meta II 10 | compiler can itself be written in Meta II. 11 | 12 | Schorre, D.V., 1964. Meta II: a Syntax-oriented Compiler Writing 13 | Language. In Proceedings of the 1964 19th ACM national conference (pp. 14 | D1.3-1 -- D1.3-11). 15 | 16 | Other useful references: 17 | * [Online version of the Schorre paper](http://www.chilton-computing.org.uk/acl/literature/reports/p025.htm) 18 | * James Neighbors extremely detailed explication of Meta II: [metacompiler tutorial](http://www.bayfronttechnologies.com/mc_tutorial.html) 19 | * [Wikipedia page on Meta II](https://en.wikipedia.org/wiki/META_II) 20 | 21 | ## The files 22 | 23 | | File | Contents | 24 | | ----------- | ---------------------------------------------------------------- | 25 | | metaii.lisp | The virtual machine in Common Lisp | 26 | | aexp.masm | James Neighbors algebraic expression syntax, compiled by Meta II | 27 | | metaii.meta | The Meta II interpreter in Meta II | 28 | | metaii.masm | The Meta II interpreter, compiled by Meta II | 29 | 30 | 31 | ## Examples 32 | 33 | * A simple example (from James Neighbors) of an algebraic expression 34 | language: 35 | 36 | ```lisp 37 | (run (build-instructions (parse-file "aexp.meta")) 38 | "fern:=5+6;ace:=fern*5;waldo:=fern+alpha/-beta^gamma;") 39 | ``` 40 | 41 | * Compiling the Meta II compiler. 42 | 43 | ```lisp 44 | (run (build-instructions (parse-file "metaii.meta")) 45 | (file-as-string "metaii.meta")) 46 | ``` 47 | * Compiling the Meta II compiler using the compiled Meta II compiler. 48 | 49 | ```lisp 50 | (run (build-instructions (parse-file "metaii.masm")) 51 | (convert-single-to-double (file-as-string "metaii.meta"))) 52 | ``` 53 | -------------------------------------------------------------------------------- /aexp.masm: -------------------------------------------------------------------------------- 1 | ADR AEXP 2 | AEXP 3 | CLL AS 4 | BF L1 5 | L2 6 | CLL AS 7 | BT L2 8 | SET 9 | BE 10 | L1 11 | L3 12 | R 13 | AS 14 | ID 15 | BF L4 16 | CL 'address ' 17 | CI 18 | OUT 19 | TST ':=' 20 | BE 21 | CLL EX1 22 | BE 23 | CL 'store' 24 | OUT 25 | TST ';' 26 | BE 27 | L4 28 | L5 29 | R 30 | EX1 31 | CLL EX2 32 | BF L6 33 | L7 34 | TST '+' 35 | BF L8 36 | CLL EX2 37 | BE 38 | CL 'add' 39 | OUT 40 | L8 41 | BT L9 42 | TST '-' 43 | BF L10 44 | CLL EX2 45 | BE 46 | CL 'sub' 47 | OUT 48 | L10 49 | L9 50 | BT L7 51 | SET 52 | BE 53 | L6 54 | L11 55 | R 56 | EX2 57 | CLL EX3 58 | BF L12 59 | L13 60 | TST '*' 61 | BF L14 62 | CLL EX3 63 | BE 64 | CL 'mpy' 65 | OUT 66 | L14 67 | BT L15 68 | TST '/' 69 | BF L16 70 | CLL EX3 71 | BE 72 | CL 'div' 73 | OUT 74 | L16 75 | L15 76 | BT L13 77 | SET 78 | BE 79 | L12 80 | L17 81 | R 82 | EX3 83 | CLL EX4 84 | BF L18 85 | L19 86 | TST '^' 87 | BF L20 88 | CLL EX3 89 | BE 90 | CL 'exp' 91 | OUT 92 | L20 93 | L21 94 | BT L19 95 | SET 96 | BE 97 | L18 98 | L22 99 | R 100 | EX4 101 | TST '+' 102 | BF L23 103 | CLL EX5 104 | BE 105 | L23 106 | BT L24 107 | TST '-' 108 | BF L25 109 | CLL EX5 110 | BE 111 | CL 'minus' 112 | OUT 113 | L25 114 | BT L24 115 | CLL EX5 116 | BF L26 117 | L26 118 | L24 119 | R 120 | EX5 121 | ID 122 | BF L27 123 | CL 'load ' 124 | CI 125 | OUT 126 | L27 127 | BT L28 128 | NUM 129 | BF L29 130 | CL 'literal ' 131 | CI 132 | OUT 133 | L29 134 | BT L28 135 | TST '(' 136 | BF L30 137 | CLL EX1 138 | BE 139 | TST ')' 140 | BE 141 | L30 142 | L28 143 | R 144 | END 145 | -------------------------------------------------------------------------------- /metaii.masm: -------------------------------------------------------------------------------- 1 | ADR PROGRAM 2 | PROGRAM 3 | TST '.SYNTAX' 4 | BF L1 5 | ID 6 | BE 7 | CL 'ADR ' 8 | CI 9 | OUT 10 | L2 11 | CLL ST 12 | BT L2 13 | SET 14 | BE 15 | TST '.END' 16 | BE 17 | CL 'END' 18 | OUT 19 | L1 20 | L3 21 | R 22 | ST 23 | ID 24 | BF L4 25 | LB 26 | CI 27 | OUT 28 | TST '=' 29 | BE 30 | CLL EX1 31 | BE 32 | TST ';' 33 | BE 34 | CL 'R' 35 | OUT 36 | L4 37 | L5 38 | R 39 | EX1 40 | CLL EX2 41 | BF L6 42 | L7 43 | TST '/' 44 | BF L8 45 | CL 'BT ' 46 | GN1 47 | OUT 48 | CLL EX2 49 | BE 50 | L8 51 | L9 52 | BT L7 53 | SET 54 | BE 55 | LB 56 | GN1 57 | OUT 58 | L6 59 | L10 60 | R 61 | EX2 62 | CLL EX3 63 | BF L11 64 | CL 'BF ' 65 | GN1 66 | OUT 67 | L11 68 | BT L12 69 | CLL OUTPUT 70 | BF L13 71 | L13 72 | L12 73 | BF L14 74 | L15 75 | CLL EX3 76 | BF L16 77 | CL 'BE' 78 | OUT 79 | L16 80 | BT L17 81 | CLL OUTPUT 82 | BF L18 83 | L18 84 | L17 85 | BT L15 86 | SET 87 | BE 88 | LB 89 | GN1 90 | OUT 91 | L14 92 | L19 93 | R 94 | EX3 95 | ID 96 | BF L20 97 | CL 'CLL ' 98 | CI 99 | OUT 100 | L20 101 | BT L21 102 | SR 103 | BF L22 104 | CL 'TST ' 105 | CI 106 | OUT 107 | L22 108 | BT L21 109 | TST '.ID' 110 | BF L23 111 | CL 'ID' 112 | OUT 113 | L23 114 | BT L21 115 | TST '.NUMBER' 116 | BF L24 117 | CL 'NUM' 118 | OUT 119 | L24 120 | BT L21 121 | TST '.STRING' 122 | BF L25 123 | CL 'SR' 124 | OUT 125 | L25 126 | BT L21 127 | TST '(' 128 | BF L26 129 | CLL EX1 130 | BE 131 | TST ')' 132 | BE 133 | L26 134 | BT L21 135 | TST '.EMPTY' 136 | BF L27 137 | CL 'SET' 138 | OUT 139 | L27 140 | BT L21 141 | TST '$' 142 | BF L28 143 | LB 144 | GN1 145 | OUT 146 | CLL EX3 147 | BE 148 | CL 'BT ' 149 | GN1 150 | OUT 151 | CL 'SET' 152 | OUT 153 | L28 154 | L21 155 | R 156 | OUTPUT 157 | TST '.OUT' 158 | BF L29 159 | TST '(' 160 | BE 161 | L30 162 | CLL OUT1 163 | BT L30 164 | SET 165 | BE 166 | TST ')' 167 | BE 168 | L29 169 | BT L31 170 | TST '.LABEL' 171 | BF L32 172 | CL 'LB' 173 | OUT 174 | CLL OUT1 175 | BE 176 | L32 177 | L31 178 | BF L33 179 | CL 'OUT' 180 | OUT 181 | L33 182 | L34 183 | R 184 | OUT1 185 | TST '*1' 186 | BF L35 187 | CL 'GN1' 188 | OUT 189 | L35 190 | BT L36 191 | TST '*2' 192 | BF L37 193 | CL 'GN2' 194 | OUT 195 | L37 196 | BT L36 197 | TST '*' 198 | BF L38 199 | CL 'CI' 200 | OUT 201 | L38 202 | BT L36 203 | SR 204 | BF L39 205 | CL 'CL ' 206 | CI 207 | OUT 208 | L39 209 | L36 210 | R 211 | END 212 | -------------------------------------------------------------------------------- /metaii.meta: -------------------------------------------------------------------------------- 1 | .SYNTAX PROGRAM 2 | 3 | PROGRAM = '.SYNTAX' .ID .OUT('ADR ' *) 4 | $ ST 5 | '.END' .OUT('END') ; 6 | 7 | ST = .ID .LABEL * '=' EX1 ';' .OUT('R') ; 8 | 9 | EX1 = EX2 $('/' .OUT('BT ' *1) EX2 ) 10 | .LABEL *1 ; 11 | 12 | EX2 = (EX3 .OUT('BF ' *1) / OUTPUT) 13 | $(EX3 .OUT('BE') / OUTPUT) 14 | .LABEL *1 ; 15 | 16 | EX3 = .ID .OUT('CLL '*) / 17 | .STRING .OUT('TST '*) / 18 | '.ID' .OUT('ID') / 19 | '.NUMBER' .OUT('NUM') / 20 | '.STRING' .OUT('SR') / 21 | '(' EX1 ')' / 22 | '.EMPTY' .OUT('SET') / 23 | '$' .LABEL *1 EX3 .OUT('BT ' *1) .OUT('SET') ; 24 | 25 | OUTPUT = ('.OUT' '('$OUT1 ')' / 26 | '.LABEL' .OUT('LB') OUT1) 27 | .OUT('OUT') ; 28 | 29 | OUT1 = '*1' .OUT('GN1') / 30 | '*2' .OUT('GN2') / 31 | '*' .OUT('CI') / 32 | .STRING .OUT('CL '*) ; 33 | 34 | .END 35 | -------------------------------------------------------------------------------- /metavm.lisp: -------------------------------------------------------------------------------- 1 | ;; Virtual machine for META II. Steve Bagley, Sept 2015, 2 | ;; stevenbagley@fastmail.fm. This is based on Val Schorre, A Syntax 3 | ;; Oriented Compiler Writing Language, and especially James Neighbors, 4 | ;; http://www.bayfronttechnologies.com/mc_tutorial.html 5 | 6 | ;; Depends on: quicklisp and cl-ppcre 7 | 8 | (in-package #:cl-user) 9 | 10 | (eval-when (:compile-toplevel :load-toplevel :execute) 11 | (ql:quickload 'cl-ppcre) 12 | ) 13 | 14 | ;; Utilities, and code for parsing a file into lisp-based tokens. 15 | 16 | (defun string-append (&rest strings) 17 | (apply #'concatenate 'string strings)) 18 | 19 | (eval-when (:compile-toplevel :load-toplevel :execute) 20 | 21 | ;; does leaf appear in tree? 22 | (defun has-leaf? (leaf tree) 23 | (cond ((null tree) nil) 24 | ((eq leaf tree) t) 25 | ((atom tree) nil) 26 | ((or (has-leaf? leaf (car tree)) 27 | (has-leaf? leaf (cdr tree)))))) 28 | ) 29 | 30 | (defun file-as-string (file) 31 | (with-open-file (stream file :direction :input) 32 | (let ((s (make-string (file-length stream)))) 33 | (read-sequence s stream) 34 | s))) 35 | 36 | ;; change all single quotes to double quotes 37 | (defun convert-single-to-double (string) 38 | (cl-ppcre:regex-replace-all "'" string "\"")) 39 | 40 | ;; "foo" -> ("foo"), "foo bar" -> ("foo" "bar") 41 | (defun split-line-at-space (line) 42 | (let* ((line2 (string-trim '(#\space #\tab) line)) 43 | (pos (position #\space line2))) 44 | (if (null pos) 45 | (list line2) 46 | (list (subseq line2 0 pos) 47 | (subseq line2 (1+ pos)))))) 48 | 49 | ;; handle single quotes in strings 50 | ;; "'foo'" -> "foo", "foo" -> foo 51 | (defun convert-string (string) 52 | (if (eql #\' (char string 0)) 53 | (string-trim '(#\') string) 54 | (intern string))) 55 | 56 | (defun parse-line (line) 57 | (cond ((eql #\space (char line 0)) 58 | (mapcar #'convert-string (split-line-at-space line))) 59 | (:else 60 | (intern (string-trim '(#\space #\tab) line))))) 61 | 62 | ;; read file containing META code, return list of sexprs. 63 | (defun parse-file (file) 64 | (with-open-file (s file :direction :input) 65 | (loop for line = (read-line s nil :eof) 66 | until (eq line :eof) 67 | collect (parse-line line)))) 68 | 69 | ;; convert from lists of exprs to a vector of instructions. exprs are 70 | ;; contiguous expressions, separated by labels (symbols). returns list 71 | ;; of (label-map block), where block is a vector of instructions, and 72 | ;; label-map maps from labels to instruction locations (vector index). 73 | (defun build-instructions (exprs) 74 | (loop with index = 0 75 | and label-map = nil 76 | and block = nil 77 | for expr in exprs 78 | do (cond ((symbolp expr) 79 | ;; record index of label 80 | (push (list expr index) label-map)) 81 | (:else 82 | (push expr block) 83 | (incf index))) 84 | finally (return (list label-map (make-array (length block) 85 | :element-type t 86 | :initial-contents (nreverse block)))))) 87 | 88 | ;; build a register machine using lisp globals. 89 | 90 | (defvar *switch*) 91 | (defvar *label1*) 92 | (defvar *label2*) 93 | (defvar *output-buffer*) 94 | (defvar *output-column*) 95 | (defvar *buffer*) 96 | (defvar *buffer-index*) 97 | (defvar *token*) 98 | (defvar *call-stack*) 99 | (defvar *label-map*) 100 | (defvar *block*) 101 | (defvar *program-counter*) 102 | 103 | (defvar *instruction-table* (make-hash-table :size 20 :test #'eq)) 104 | 105 | (defvar *label-counter* 0) 106 | 107 | (defun generate-label () 108 | (prog1 109 | (intern (format nil "L~D" *label-counter*)) 110 | (incf *label-counter*))) 111 | 112 | (defun check-buffer-index () 113 | (when (>= *buffer-index* (length *buffer*)) 114 | (format t "~&Hit end of input buffer, exiting.") 115 | (throw 'run-loop nil))) 116 | 117 | (defun run-inst (inst) 118 | (let* ((op (car inst)) 119 | (proc (gethash op *instruction-table*))) 120 | (when (null proc) 121 | (error (format nil "missing proc for ~S" op))) 122 | (apply proc (cdr inst)))) 123 | 124 | ;; Looks up label in label-map, and returns the vector index of that 125 | ;; label. 126 | (defun get-label-index (label label-map) 127 | (let ((entry (assq label label-map))) 128 | (if entry 129 | (cadr entry) 130 | (error "can't find label ~S in label-map ~S" label label-map)))) 131 | 132 | (defun print-machine-state () 133 | (format t "~%") 134 | (format t "~&call-stack: ~S" *call-stack*) 135 | (format t "~&program-counter: ~D" *program-counter*) 136 | (format t "~&buffer: ~S" (subseq *buffer* *buffer-index* (min (+ *buffer-index* 20) (length *buffer*)))) 137 | (format t "~&buffer-index: ~S" *buffer-index*) 138 | (format t "~&label1: ~S" *label1*) 139 | (format t "~&label2: ~S" *label2*) 140 | (format t "~&switch: ~S" *switch*) 141 | (format t "~&token: ~S" *token*) 142 | (format t "~&output-buffer: ~S" *output-buffer*) 143 | (format t "~&output-column: ~S" *output-column*) 144 | (format t "~%")) 145 | 146 | ;; top level function, runs instructions (from build-instructions), 147 | ;; taking input from string. 148 | (defun run (instructions string &optional (trace nil)) 149 | (setq *label-counter* 0) 150 | (setq *switch* :none) 151 | (setq *label1* nil) 152 | (setq *label2* nil) 153 | (setq *buffer* string) 154 | (setq *buffer-index* 0) 155 | (setq *output-buffer* nil) 156 | (setq *output-column* 0) 157 | (setq *token* nil) 158 | (setq *label-map* (car instructions)) 159 | (setq *block* (cadr instructions)) 160 | (setq *call-stack* nil) 161 | (when trace 162 | (pprint (coerce *block* 'list))) 163 | (setq *program-counter* 0) 164 | (catch 'run-loop 165 | (loop with n = (length *block*) 166 | do (let ((inst (aref *block* *program-counter*))) 167 | (when trace 168 | (format t "~&~%RUN-INST: ~S" inst)) 169 | (run-inst inst) 170 | (when trace 171 | (print-machine-state) 172 | (read-char)) 173 | (cond ((null *program-counter*) 174 | (format t "Trying to return from subroutine when call stack is empty. Exiting.") 175 | (loop-finish)) 176 | ((>= *program-counter* n) 177 | (loop-finish)) 178 | ((and (null *call-stack*) 179 | (null *output-buffer*) 180 | (= *buffer-index* (length *buffer*))) 181 | (format t "~&~%Hit EOF in buffer, exiting.") 182 | (loop-finish)))))) 183 | 'done) 184 | 185 | ;; macro for defining the machine instructions. Each instruction names 186 | ;; a procedure stored in hash table. Note that this adds code to 187 | ;; increment the program counter unless the instruction body already 188 | ;; does something to that variable. 189 | (defmacro definst (inst operand &body body) 190 | (let* ((uses-pc? (has-leaf? '*program-counter* body)) 191 | (new-body (if uses-pc? 192 | body 193 | (append body '((incf *program-counter*)))))) 194 | `(setf (gethash ',inst *instruction-table*) 195 | (lambda ,operand ,@new-body)))) 196 | 197 | ;; the following are all the machine instructions. 198 | 199 | ;; name of program, so just jump to starting label 200 | (definst ADR (label) 201 | (setq *program-counter* (get-label-index label *label-map*))) 202 | 203 | ;; end of program, do nothing 204 | (definst END ()) 205 | 206 | (defun test-for-string (target-string) 207 | (check-buffer-index) 208 | (multiple-value-bind (match-start match-end) 209 | (cl-ppcre:scan (string-append "^\\s*" (cl-ppcre:quote-meta-chars target-string)) *buffer* :start *buffer-index*) 210 | (cond (match-start 211 | (setq *buffer-index* match-end) 212 | (setq *switch* t)) 213 | (:else 214 | (setq *switch* nil))))) 215 | 216 | (definst TST (target-string) 217 | (test-for-string target-string)) 218 | 219 | (defun look-for-identifier-token () 220 | (check-buffer-index) 221 | (multiple-value-bind (match-string) 222 | (cl-ppcre:scan-to-strings "^\\s*[A-Za-z][A-Za-z0-9]*" *buffer* :start *buffer-index*) 223 | (cond (match-string 224 | (incf *buffer-index* (length match-string)) 225 | (setq match-string (string-left-trim '(#\space #\tab #\newline) match-string)) 226 | (setq *token* match-string) 227 | (setq *switch* t)) 228 | (:else 229 | (setq *switch* nil))))) 230 | 231 | (definst ID () 232 | (look-for-identifier-token)) 233 | 234 | (defun look-for-number-token () 235 | (check-buffer-index) 236 | (multiple-value-bind (match-string) 237 | (cl-ppcre:scan-to-strings "^\\s*[0-9]+" *buffer* :start *buffer-index*) 238 | (cond (match-string 239 | (incf *buffer-index* (length match-string)) 240 | (setq match-string (string-left-trim '(#\space #\tab #\newline) match-string)) 241 | (setq *token* match-string) 242 | (setq *switch* t)) 243 | (:else 244 | (setq *switch* nil))))) 245 | 246 | (definst NUM () 247 | (look-for-number-token)) 248 | 249 | (defun look-for-string-token () 250 | (check-buffer-index) 251 | (multiple-value-bind (match-string) 252 | (cl-ppcre:scan-to-strings "^\\s*\"[^\"]+\"" *buffer* :start *buffer-index*) 253 | (cond (match-string 254 | (incf *buffer-index* (length match-string)) 255 | (setq match-string (string-left-trim '(#\space #\tab #\newline) match-string)) 256 | (setq *token* match-string) 257 | (setq *switch* t)) 258 | (:else 259 | (setq *switch* nil))))) 260 | 261 | (definst SR () 262 | (look-for-string-token)) 263 | 264 | ;; call subroutine 265 | (definst CLL (label) 266 | (progn 267 | ;; save the current control state 268 | (push :blank *label1*) 269 | (push :blank *label2*) 270 | (push *program-counter* *call-stack*) 271 | ;; jump to label 272 | (setq *program-counter* (get-label-index label *label-map*)))) 273 | 274 | ;; return from subroutine 275 | (definst R () 276 | (progn 277 | ;; pop stack 278 | (pop *label1*) 279 | (pop *label2*) 280 | ;; jump to instruction following previous program location 281 | (let ((addr (pop *call-stack*))) 282 | (if addr 283 | (setq *program-counter* (1+ addr)) 284 | (setq *program-counter* nil))))) 285 | 286 | ;; set switch. 287 | (definst SET () 288 | (setq *switch* t)) 289 | 290 | ;; unconditional branch 291 | (definst B (label) 292 | (setq *program-counter* (get-label-index label *label-map*))) 293 | 294 | ;; branch if false 295 | (definst BF (label) 296 | (if (not *switch*) 297 | (setq *program-counter* (get-label-index label *label-map*)) 298 | (incf *program-counter*))) 299 | 300 | ;; branch if true 301 | (definst BT (label) 302 | (if *switch* 303 | (setq *program-counter* (get-label-index label *label-map*)) 304 | (incf *program-counter*))) 305 | 306 | ;; branch to error if false 307 | (definst BE () 308 | (cond ((not *switch*) 309 | (format t "~&META-II error. Halting.") 310 | (throw 'run-loop nil)) 311 | (:else 312 | (incf *program-counter*)))) 313 | 314 | ;; copy literal to output 315 | (definst CL (string) 316 | (push string *output-buffer*)) 317 | 318 | ;; copy input to output 319 | (definst CI () 320 | (push *token* *output-buffer*)) 321 | 322 | ;; generate label1 323 | (definst GN1 () 324 | (progn 325 | (if (eq :blank (car *label1*)) 326 | (push (generate-label) *label1*)) 327 | (push (car *label1*) *output-buffer*))) 328 | 329 | ;; generate label2 330 | (definst GN2 () 331 | (progn 332 | (if (eq :blank (car *label2*)) 333 | (push (generate-label) *label2*)) 334 | (push (car *label2*) *output-buffer*))) 335 | 336 | ;; move output pointer to label field 337 | (definst LB () 338 | (setq *output-column* 0)) 339 | 340 | ;; print the current output buffer, then reset 341 | (definst OUT () 342 | (progn 343 | (loop repeat *output-column* do (princ " ")) 344 | (loop for string in (reverse *output-buffer*) 345 | do (princ string)) 346 | (terpri) 347 | (setq *output-buffer* nil) 348 | (setq *output-column* 8))) 349 | --------------------------------------------------------------------------------