├── .gitignore ├── examples ├── closures.clj ├── labels1.clj ├── hello.clj ├── blue.li.clj ├── stripes-grey.clj ├── strlen.clj ├── stripes.li.clj └── write-number.clj ├── deps.edn ├── src └── lithium │ ├── utils.clj │ ├── compiler │ ├── state.clj │ ├── repr.clj │ ├── code.clj │ ├── closure.clj │ ├── primitives.clj │ └── ast.clj │ ├── driver.clj │ ├── compiler.clj │ └── assembler.clj ├── test └── lithium │ └── compiler_test.clj └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | .nrepl* 2 | target -------------------------------------------------------------------------------- /examples/closures.clj: -------------------------------------------------------------------------------- 1 | (let [x 1 f (fn [y] (+ x y))] 2 | (f (inc x))) -------------------------------------------------------------------------------- /deps.edn: -------------------------------------------------------------------------------- 1 | {:paths ["src"] 2 | :deps {org.clojure/clojure {:mvn/version "1.12.0"}}} 3 | -------------------------------------------------------------------------------- /examples/labels1.clj: -------------------------------------------------------------------------------- 1 | (labels [f1 (code [x] [y] (+ x y))] 2 | (let [x 1 f (closure f1 [(inc x)])] 3 | (fncall f x)))) -------------------------------------------------------------------------------- /examples/hello.clj: -------------------------------------------------------------------------------- 1 | (do 2 | (write-char \H) 3 | (write-char \e) 4 | (write-char \l) 5 | (write-char \l) 6 | (write-char \o)) 7 | -------------------------------------------------------------------------------- /examples/blue.li.clj: -------------------------------------------------------------------------------- 1 | (mov :ax 0x13) 2 | (int 0x10) 3 | (mov :ax 0xa000) 4 | (mov :es :ax) 5 | (xor :di :di) 6 | (mov :al 1) 7 | (mov :cx 64000) 8 | :loop 9 | (stosb) 10 | (loop :loop) 11 | :forever 12 | (jmp :forever) 13 | -------------------------------------------------------------------------------- /examples/stripes-grey.clj: -------------------------------------------------------------------------------- 1 | (do (init-graph) 2 | (loop [x 0 y 0] 3 | (put-pixel x y (let [z (mod (+ x y) 32)] 4 | (+ 16 (if (< z 16) z (- 31 z))))) 5 | (if (= y 200) 6 | nil 7 | (if (= x 319) 8 | (recur 0 (inc y)) 9 | (recur (inc x) y))))) -------------------------------------------------------------------------------- /examples/strlen.clj: -------------------------------------------------------------------------------- 1 | (def write-string 2 | (fn [s] 3 | (let [len (strlen s)] 4 | (loop [i 0] 5 | (if (= i len) 6 | nil 7 | (do 8 | (write-char (char-at s i)) 9 | (recur (inc i)))))))) 10 | 11 | (write-string "Life ") 12 | (write-string (symbol->string :is)) 13 | (write-string " life\r\n") 14 | (write-string "NaNaNaNaNa!\r\n") 15 | -------------------------------------------------------------------------------- /src/lithium/utils.clj: -------------------------------------------------------------------------------- 1 | (ns lithium.utils 2 | (:require [clojure.java.io :as io]) 3 | (:use [clojure.test :only [deftest is]])) 4 | 5 | (defn read-all 6 | [input] 7 | (if (or (string? input) (instance? java.io.File input)) 8 | (with-open [f (java.io.PushbackReader. (io/reader input))] 9 | (let [eof (Object.)] 10 | (doall (take-while #(not= % eof) (repeatedly #(read f false eof)))))) 11 | input)) 12 | -------------------------------------------------------------------------------- /examples/stripes.li.clj: -------------------------------------------------------------------------------- 1 | (mov :ax 0x13) 2 | (int 0x10) 3 | (mov :ax 0xa000) 4 | (mov :es :ax) 5 | (xor :di :di) 6 | (mov :cx 200) 7 | (xor :al :al) 8 | :outer 9 | (push :cx) 10 | (mov :cx 320) 11 | :inner 12 | (stosb) 13 | (inc :al) 14 | (cmp :al 40) 15 | (jne :dontzero) 16 | (xor :al :al) 17 | :dontzero 18 | (loop :inner) 19 | (inc :al) 20 | (cmp :al 40) 21 | (jne :dontzero2) 22 | (xor :al :al) 23 | :dontzero2 24 | (pop :cx) 25 | (loop :outer) 26 | (mov :ah 7) 27 | (int 0x21) 28 | (mov :ax 3) 29 | (int 0x10) 30 | (ret) -------------------------------------------------------------------------------- /test/lithium/compiler_test.clj: -------------------------------------------------------------------------------- 1 | (ns lithium.compiler-test 2 | (:require [lithium.compiler :as compiler] 3 | [lithium.compiler.repr :as repr] 4 | [clojure.test :refer [deftest are]])) 5 | 6 | (deftest test-compiler 7 | (are [expr expected-result] (let [{:keys [ax]} (compiler/compile-and-run! [expr] compiler/register-dump)] 8 | (= ax (repr/immediate expected-result))) 9 | '(+ 3 4) 7 10 | '(< 10 15) true 11 | '(if true 3 4) 3 12 | '(if (< 15 10) 3 4) 4 13 | '(let [x 2 y (+ x 3)] 14 | (+ x y)) 7 15 | '(let [x 2 16 | f (fn [y] 17 | (+ x y))] 18 | (f 3)) 5)) 19 | -------------------------------------------------------------------------------- /src/lithium/compiler/state.clj: -------------------------------------------------------------------------------- 1 | (ns lithium.compiler.state 2 | (:require [lithium.compiler.repr :as repr])) 3 | 4 | (def initial-compilation-state 5 | {:code [] 6 | :stack-pointer (- repr/wordsize) 7 | :global-env-start (- 0x10000 repr/wordsize) 8 | :environment {} 9 | :recur-point nil 10 | :min-sp (- repr/wordsize)}) 11 | 12 | (defn next-loc [state] 13 | (as-> state state 14 | (update state :stack-pointer - repr/wordsize) 15 | (update state :min-sp min (:stack-pointer state)))) 16 | 17 | (defn restore-env [orig-state state] 18 | (assoc orig-state 19 | :code (:code state) 20 | :min-sp (min (:min-sp orig-state) (:min-sp state) (:stack-pointer state)))) 21 | -------------------------------------------------------------------------------- /src/lithium/compiler/repr.clj: -------------------------------------------------------------------------------- 1 | (ns lithium.compiler.repr) 2 | 3 | (def number-tag 2r00) 4 | (def vector-tag 2r010) 5 | (def closure-tag 2r110) 6 | (def cons-tag 2r001) 7 | (def string-tag 2r011) 8 | (def symbol-tag 2r101) 9 | (def char-tag 2r00001111) 10 | (def nil-tag 2r00101111) 11 | (def boolean-tag 2r0011111) 12 | 13 | (def wordsize 2) 14 | 15 | (def type->tag {:string string-tag 16 | :keyword symbol-tag}) 17 | 18 | (defn immediate [x] 19 | (cond 20 | (integer? x) (bit-shift-left x 2) 21 | (nil? x) nil-tag 22 | (char? x) (bit-or (bit-shift-left (int x) 8) char-tag) 23 | (boolean? x) (bit-or (if x 2r10000000 0) boolean-tag))) 24 | 25 | (defn immediate? [x] 26 | (immediate x)) 27 | -------------------------------------------------------------------------------- /src/lithium/compiler/code.clj: -------------------------------------------------------------------------------- 1 | (ns lithium.compiler.code 2 | (:require [clojure.string :as string] 3 | [lithium.compiler.state :as state])) 4 | 5 | (declare compile-expr) 6 | 7 | (defn combine [state instr] 8 | (cond (and (sequential? instr) (= (first instr) ':subexpr)) 9 | #_=> (let [[_ expr state-modifier restore-fn] instr 10 | orig-state state 11 | state (cond-> state state-modifier state-modifier) 12 | state (compile-expr expr state)] 13 | ((or restore-fn state/restore-env) orig-state state)) 14 | (fn? instr) 15 | #_=> (instr state) 16 | :else (update state :code conj instr))) 17 | 18 | (defn codeseq [state & code] 19 | (reduce combine state code)) 20 | 21 | (defn genkey [] 22 | (-> (gensym) name string/lower-case keyword)) 23 | -------------------------------------------------------------------------------- /examples/write-number.clj: -------------------------------------------------------------------------------- 1 | #_(comment 2 | (def write-char print) 3 | (def int->char char)) 4 | 5 | (def write-number 6 | (fn [x] 7 | (if (= x 0) 8 | (write-char \0) 9 | (loop [modulus 1000 n x emitting? false] 10 | (if (= modulus 0) 11 | nil 12 | (let [digit (quot n modulus) 13 | now-emitting? (if emitting? true (< 0 digit))] 14 | (if now-emitting? 15 | (write-char (int->char (+ 48 digit))) 16 | nil) 17 | (recur (quot modulus 10) 18 | (- n (* digit modulus)) 19 | now-emitting?))))))) 20 | 21 | (def print-mul 22 | (fn [a b] 23 | (let [] 24 | (write-number a) 25 | (write-char \*) 26 | (write-number b) 27 | (write-char \=) 28 | (write-number (* a b))))) 29 | 30 | (print-mul 23 79) 31 | -------------------------------------------------------------------------------- /src/lithium/driver.clj: -------------------------------------------------------------------------------- 1 | (ns lithium.driver 2 | (:require [clojure.java.io :as io] 3 | [clojure.java.shell :refer [sh]] 4 | [clojure.string :as string])) 5 | 6 | (defn run-and-wait! [prog] 7 | (let [binary "/tmp/runwait.com"] 8 | (with-open [f (java.io.FileOutputStream. binary)] 9 | (.write f (byte-array prog)) 10 | (sh "dosbox" binary) 11 | nil))) 12 | 13 | (defn parse-registers [str] 14 | (->> (string/split str #"[\r\n]+") 15 | (map string/trim) 16 | (filter seq) 17 | (map (fn [s] 18 | (let [[reg val] (string/split s #"=")] 19 | [(keyword (string/lower-case reg)) 20 | (Long/parseLong val 16)]))) 21 | (into {}))) 22 | 23 | (defn run-and-capture-registers! [prog] 24 | (let [tmpdir "/tmp" 25 | binary (str tmpdir "/runcapt.com") 26 | batch (str tmpdir "/run.bat") 27 | output (str tmpdir "/REGS.TXT")] 28 | (with-open [f (java.io.FileOutputStream. binary)] 29 | (.write f (byte-array prog))) 30 | (spit batch "@cls\n@runcapt.com >regs.txt\n@exit\n") 31 | (sh "dosbox" batch :env {"SDL_VIDEODRIVER" "dummy"}) 32 | (parse-registers (slurp output)))) 33 | 34 | (defn run-program! [prog wait?] 35 | ((if wait? run-and-wait! run-and-capture-registers!) prog)) 36 | 37 | (defn hexdump [bytes] 38 | (string/join " " (map #(format "%02x" %) bytes))) 39 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # What is Lithium? 2 | 3 | Lithium is an attempt at several things at once: 4 | 5 | * An assembler for x86 CPUs, written in Clojure and using 6 | Clojure S-expressions as its input data. This part has been 7 | first announced in [this blog post][1]. 8 | * A compiler for a toy Lisp-like language, using that assembler as a 9 | backend. The compiler is being written following the guidelines 10 | found in a paper "[An Incremental Approach to Compiler Construction][2]." 11 | 12 | The purposes of Lithium are, first and foremost, to learn and to have fun. 13 | 14 | # Try it out 15 | 16 | In the REPL: 17 | 18 | ```clojure 19 | (require '[lithium.compiler :as compiler]) 20 | (compiler/compile-and-run! "examples/stripes-grey.clj") 21 | ``` 22 | 23 | # License 24 | 25 | Unless otherwise noted, code in this repository is copyright by 26 | Daniel Janus and released under the MIT license: 27 | 28 | ``` 29 | Copyright 2012–2021 Daniel Janus 30 | 31 | Permission is hereby granted, free of charge, to any person obtaining 32 | a copy of this software and associated documentation files (the 33 | "Software"), to deal in the Software without restriction, including 34 | without limitation the rights to use, copy, modify, merge, publish, 35 | distribute, sublicense, and/or sell copies of the Software, and to 36 | permit persons to whom the Software is furnished to do so, subject to 37 | the following conditions: 38 | 39 | The above copyright notice and this permission notice shall be 40 | included in all copies or substantial portions of the Software. 41 | 42 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 43 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 44 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 45 | NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE 46 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 47 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 48 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 49 | ``` 50 | 51 | The assembly code in `register-dump` is a s-expression version of a snippet 52 | that comes from http://www.fysnet.net/yourhelp.htm and is copyright by 53 | Forever Young Software. 54 | 55 | [1]: https://blog.danieljanus.pl/2012/05/14/lithium/ 56 | [2]: http://scheme2006.cs.uchicago.edu/11-ghuloum.pdf 57 | -------------------------------------------------------------------------------- /src/lithium/compiler/closure.clj: -------------------------------------------------------------------------------- 1 | (ns lithium.compiler.closure 2 | (:require [lithium.compiler.ast :as ast] 3 | [lithium.compiler.code :refer [genkey]])) 4 | 5 | (defn free-variable-analysis [ast] 6 | (ast/walk 7 | (assoc ast :bound-vars []) 8 | (fn [ast] 9 | (ast/match ast 10 | ast/let-like [bound-vars bindings body] 11 | (let [bindings (loop [acc [] 12 | bound-vars bound-vars 13 | bindings bindings] 14 | (let [{:keys [symbol expr] :as binding} (first bindings)] 15 | (if binding 16 | (recur (conj acc {:symbol symbol, :expr (assoc expr :bound-vars bound-vars)}) 17 | (conj bound-vars symbol) 18 | (next bindings)) 19 | acc))) 20 | body-bound-vars (into bound-vars (map :symbol bindings))] 21 | (assoc ast :bindings bindings :body (map #(assoc % :bound-vars body-bound-vars) body))) 22 | 23 | :fn [args body bound-vars] 24 | (let [body-bound-vars (into bound-vars args)] 25 | (assoc ast :body (map #(assoc % :bound-vars body-bound-vars) body))) 26 | 27 | ;; otherwise 28 | (ast/expr-update ast assoc :bound-vars (:bound-vars ast)))) 29 | identity)) 30 | 31 | (defn long-value? 32 | "Returns true if ast represents a 'long value', i.e. one whose content can't fit into a register." 33 | [ast] 34 | (and (= (:type ast) :value) 35 | (contains? #{:string :keyword} (:value-type ast)))) 36 | 37 | (defn collect-closures [ast] 38 | (let [codes (atom []) 39 | long-values (atom {}) 40 | outer-xform (fn [ast] 41 | (ast/match ast 42 | long-value? [value value-type] 43 | #_=> (let [ref (get (swap! long-values update value #(or % {:value-type value-type, :ref (genkey)})) value)] 44 | (merge {:type :long-value-ref} ref)) 45 | :fn [args body bound-vars] 46 | #_=> (let [label (gensym "cl")] 47 | (swap! codes conj {:type :code, :label label, :args args, :bound-vars bound-vars, :body body}) 48 | {:type :closure, :label label, :vars bound-vars}))) 49 | transformed (ast/walk ast identity outer-xform)] 50 | (if (or (seq @codes) (seq @long-values)) 51 | {:type :labels, :labels @codes, :long-values @long-values, :body transformed} 52 | transformed))) 53 | -------------------------------------------------------------------------------- /src/lithium/compiler/primitives.clj: -------------------------------------------------------------------------------- 1 | (ns lithium.compiler.primitives 2 | (:require 3 | [lithium.compiler.repr :as repr] 4 | [lithium.compiler.state :refer [next-loc] :as state] 5 | [lithium.compiler.code :refer [codeseq compile-expr genkey]])) 6 | 7 | (def primitives {}) 8 | 9 | (defmacro defprimitive [name args & code] 10 | `(def primitives 11 | (assoc primitives '~name 12 | (fn [~'state [~@args]] 13 | ~@code)))) 14 | 15 | (defprimitive + [a b] 16 | (codeseq 17 | state 18 | [:subexpr b] 19 | ['mov [:bp (:stack-pointer state)] :ax] 20 | [:subexpr a next-loc] 21 | ['add :ax [:bp (:stack-pointer state)]])) 22 | 23 | (defprimitive - [a b] 24 | (codeseq 25 | state 26 | [:subexpr b] 27 | ['mov [:bp (:stack-pointer state)] :ax] 28 | [:subexpr a next-loc] 29 | ['sub :ax [:bp (:stack-pointer state)]])) 30 | 31 | (defprimitive * [a b] 32 | (codeseq 33 | state 34 | [:subexpr b] 35 | ['sar :ax 2] 36 | ['mov [:bp (:stack-pointer state)] :ax] 37 | [:subexpr a next-loc] 38 | ['mul [:word :bp (:stack-pointer state)]])) 39 | 40 | (defprimitive mod [a b] 41 | (codeseq 42 | state 43 | [:subexpr b] 44 | ['sar :ax 2] 45 | ['mov [:bp (:stack-pointer state)] :ax] 46 | [:subexpr a next-loc] 47 | ['mov :dx 0] 48 | ['sar :ax 2] 49 | ['div [:word :bp (:stack-pointer state)]] 50 | ['mov :ax :dx] 51 | ['sal :ax 2])) 52 | 53 | (defprimitive quot [a b] 54 | (codeseq 55 | state 56 | [:subexpr b] 57 | ['sar :ax 2] 58 | ['mov [:bp (:stack-pointer state)] :ax] 59 | [:subexpr a next-loc] 60 | ['mov :dx 0] 61 | ['sar :ax 2] 62 | ['div [:word :bp (:stack-pointer state)]] 63 | ['sal :ax 2])) 64 | 65 | (defprimitive = [a b] 66 | (let [l1 (genkey) l2 (genkey)] 67 | (codeseq 68 | state 69 | [:subexpr b] 70 | ['mov [:bp (:stack-pointer state)] :ax] 71 | [:subexpr a next-loc] 72 | ['cmp :ax [:bp (:stack-pointer state)]] 73 | ['jne l1] 74 | ['mov :ax (repr/immediate true)] 75 | ['jmp l2] 76 | l1 77 | ['mov :ax (repr/immediate false)] 78 | l2))) 79 | 80 | (defprimitive < [a b] 81 | (codeseq 82 | state 83 | [:subexpr b] 84 | ['mov [:bp (:stack-pointer state)] :ax] 85 | [:subexpr a next-loc] 86 | ['xor :bx :bx] 87 | ['cmp :ax [:bp (:stack-pointer state)]] 88 | ['setb :bl] 89 | ['mov :ax :bx] 90 | ['sal :ax 7] 91 | ['or :ax repr/boolean-tag])) 92 | 93 | (defprimitive write-char [x] 94 | (codeseq 95 | state 96 | [:subexpr x] 97 | ['sar :ax 8] 98 | ['mov :ah 0x0e] 99 | ['int 0x10])) 100 | 101 | (defprimitive byte [{:keys [value]}] 102 | (codeseq 103 | state 104 | ['mov :al value])) 105 | 106 | (defprimitive inc [x] 107 | (codeseq 108 | state 109 | [:subexpr x] 110 | ['add :ax (repr/immediate 1)])) 111 | 112 | (defprimitive nil? [x] 113 | (codeseq 114 | state 115 | [:subexpr x] 116 | ['cmp :ax repr/nil-tag] 117 | ['mov :ax 0] 118 | ['sete :al] 119 | ['sal :ax 7] 120 | ['or :ax repr/boolean-tag])) 121 | 122 | (defprimitive cons [x y] 123 | (codeseq 124 | state 125 | [:subexpr x] 126 | ['mov [:si] :ax] 127 | [:subexpr y] 128 | ['mov [:si repr/wordsize] :ax] 129 | ['mov :ax :si] 130 | ['or :ax repr/cons-tag] 131 | ['add :si 8])) 132 | 133 | (defprimitive car [x] 134 | (codeseq 135 | state 136 | [:subexpr x] 137 | ['mov :bx :ax] 138 | ['mov :ax [:bx -1]])) 139 | 140 | (defprimitive recur [& exprs] 141 | (let [orig-state state] 142 | (as-> state state 143 | (reduce (fn [state expr] 144 | (codeseq 145 | state 146 | [:subexpr expr] 147 | ['mov [:bp (:stack-pointer state)] :ax] 148 | next-loc)) 149 | state 150 | exprs) 151 | (state/restore-env orig-state state) 152 | (reduce (fn [state [i symbol]] 153 | (codeseq 154 | state 155 | ['mov :bx [:bp (- (:stack-pointer state) (* i repr/wordsize))]] 156 | ['mov [:bp (get-in state [:environment symbol :offset])] :bx])) 157 | state 158 | (map-indexed vector (:loop-symbols state))) 159 | (codeseq 160 | state 161 | ['jmp (:recur-point state)])))) 162 | 163 | (defprimitive init-graph [] 164 | (codeseq 165 | state 166 | ['mov :ax 0x13] 167 | ['int 0x10] 168 | ['mov :ax 0xa000] 169 | ['mov :es :ax])) 170 | 171 | (defprimitive put-pixel [x y c] 172 | (codeseq 173 | state 174 | ['mov :cx :di] 175 | [:subexpr y] 176 | ['sal :ax 4] 177 | ['mov :di :ax] 178 | ['sal :ax 2] 179 | ['add :di :ax] 180 | [:subexpr x] 181 | ['sar :ax 2] 182 | ['add :di :ax] 183 | [:subexpr c] 184 | ['sar :ax 2] 185 | ['stosb] 186 | ['mov :di :cx])) 187 | 188 | (defprimitive cdr [x] 189 | (codeseq 190 | state 191 | [:subexpr x] 192 | ['mov :bx :ax] 193 | ['mov :ax [:bx (dec repr/wordsize)]])) 194 | 195 | (defprimitive int->char [x] 196 | (codeseq 197 | state 198 | [:subexpr x] 199 | ['sal :ax 6] 200 | ['mov :al repr/char-tag])) 201 | 202 | (defprimitive strlen [x] 203 | (codeseq 204 | state 205 | [:subexpr x] 206 | ['mov :bx :ax] 207 | ['sub :bx repr/string-tag] 208 | ['xor :ax :ax] 209 | ['mov :al [:bx]] 210 | ['sal :ax 2])) 211 | 212 | (defprimitive symbol->string [x] 213 | (codeseq 214 | state 215 | [:subexpr x] 216 | ['sub :ax (- repr/symbol-tag repr/string-tag)])) 217 | 218 | (defprimitive char-at [s i] 219 | (codeseq 220 | state 221 | [:subexpr s] 222 | ['mov :bx :ax] 223 | ['sub :bx repr/string-tag] 224 | [:subexpr i] 225 | ['sar :ax 2] 226 | ['inc :ax] 227 | ['add :bx :ax] 228 | ['mov :ah [:bx]] 229 | ['mov :al repr/char-tag])) 230 | -------------------------------------------------------------------------------- /src/lithium/compiler/ast.clj: -------------------------------------------------------------------------------- 1 | (ns lithium.compiler.ast 2 | "The AST is Lithium's first intermediate representation. As the name suggests, 3 | it's a uniform way to represent the Lithium language expressions as Clojure 4 | data structures – an abstract syntax tree. 5 | 6 | An AST node is a map containing the key `:type`, whose value is a keyword 7 | determining the expression type: `:if`, `:let`, etc. Depending on the type, 8 | there are additional members. A schema or spec will follow at some time, but 9 | for now, look at clojure->ast to see how it looks. 10 | 11 | This representation is inspired by edn-query-language's AST. The advantage 12 | is that it's more amenable to transformations than plain s-expressions, so 13 | it's easier to write code walkers or analysers that target it. 14 | 15 | This ns declares functions to convert Lithium code to AST and back, as well 16 | as helper macros and functions to access and transform the AST." 17 | (:require [lithium.compiler.primitives :as primitives])) 18 | 19 | (declare clojure->ast) 20 | 21 | (defn list->ast [[fst & lst :as expr]] 22 | (cond 23 | (= fst 'if) {:type :if 24 | :condition (clojure->ast (first lst)) 25 | :then-expr (clojure->ast (second lst)) 26 | :else-expr (clojure->ast (last lst))} 27 | (= fst 'do) {:type :do 28 | :exprs (mapv clojure->ast lst)} 29 | (= fst 'fn) {:type :fn 30 | :args (first lst) 31 | :body (mapv clojure->ast (next lst))} 32 | (= fst 'def) {:type :def 33 | :symbol (first lst) 34 | :definition (clojure->ast (second lst))} 35 | (contains? '#{let loop} fst) {:type (keyword (name fst)) 36 | :bindings (vec (for [[k v] (partition 2 (first lst))] 37 | {:symbol k, :expr (clojure->ast v)})) 38 | :body (mapv clojure->ast (next lst))} 39 | (contains? primitives/primitives fst) {:type :primitive-call 40 | :primitive fst 41 | :args (mapv clojure->ast lst)} 42 | :otherwise {:type :fn-call 43 | :fn-expr (clojure->ast fst) 44 | :args (mapv clojure->ast lst)})) 45 | 46 | (defn clojure->ast [expr] 47 | (cond 48 | (list? expr) (list->ast expr) 49 | (int? expr) {:type :value, :value-type :int, :value expr} 50 | (boolean? expr) {:type :value, :value-type :boolean, :value expr} 51 | (char? expr) {:type :value, :value-type :char, :value expr} 52 | (string? expr) {:type :value, :value-type :string, :value expr} 53 | (keyword? expr) {:type :value, :value-type :keyword, :value expr} 54 | (symbol? expr) {:type :env-lookup, :symbol expr} 55 | (nil? expr) {:type :value, :value-type :nil, :value nil} 56 | :otherwise {:type :unrecognized 57 | :expr expr})) 58 | 59 | (defn type-matches? 60 | "A helper for match. See below." 61 | [specifier ast] 62 | (if (keyword? specifier) 63 | (= (:type ast) specifier) 64 | (specifier ast))) 65 | 66 | (defn let-like [{:keys [type]}] 67 | (contains? #{:let :loop} type)) 68 | 69 | (defmacro match 70 | "Evaluates one of the clauses depending on the type of the given AST node. 71 | Clauses are triplets, supplied sequentially (without additional parens), 72 | as in cond. Each clause has the form: 73 | 74 | `type-specifier bindings expr` 75 | 76 | where: 77 | 78 | - `type-specifier` is either a keyword (denoting AST of that specific type) 79 | or a predicate; 80 | - `bindings` is a vector of symbols to which their particular values 81 | in the AST will be bound; 82 | - `expr` is the expression to evaluate if type-specifier matches the AST. 83 | 84 | A final clause can be supplied as an expression for the `:otherwise` case. 85 | If not supplied and no clause matches, ast is returned unchanged." 86 | [ast & clauses] 87 | (let [ast-sym (gensym) 88 | transformed-clauses (for [[name bindings body] (partition 3 clauses)] 89 | [name `(let [{:keys ~bindings} ~ast-sym] ~body)])] 90 | `(let [~ast-sym ~ast] 91 | (condp type-matches? ~ast-sym 92 | ~@(apply concat transformed-clauses) 93 | ~(or (when-not (zero? (mod (count clauses) 3)) 94 | (last clauses)) 95 | ast-sym))))) 96 | 97 | (defn ast->clojure [ast] 98 | (match ast 99 | :value [value] value 100 | :env-lookup [symbol] symbol 101 | :primitive-call [primitive args] `(~primitive ~@(map ast->clojure args)) 102 | :if [condition then-expr else-expr] `(~'if ~(ast->clojure condition) ~(ast->clojure then-expr) ~(ast->clojure else-expr)) 103 | :do [exprs] `(~'do ~@(map ast->clojure exprs)) 104 | :fn-call [fn-expr args] `(~(ast->clojure fn-expr) ~@(map ast->clojure args)) 105 | :unrecognized [expr] expr 106 | :fn [args bound-vars body] `(~'fn ~@(when bound-vars [{:bound-vars bound-vars}]) ~args ~@(map ast->clojure body)) 107 | :def [symbol definition] `(~'def ~symbol ~(ast->clojure definition)) 108 | let-like [type bindings body] `(~(symbol (name type)) 109 | ~(vec (apply concat (for [{:keys [symbol expr]} bindings] 110 | [symbol (ast->clojure expr)]))) 111 | ~@(map ast->clojure body)) 112 | ;; after closure analysis: 113 | :closure [label vars] `(~'closure ~label ~@vars) 114 | :code [args bound-vars body] `(~'code ~args ~bound-vars ~@(map ast->clojure body)) 115 | :heap-ref [ref] `(~'heap-ref ~ref) 116 | :labels [labels heap-values body] `(~'labels 117 | ~(vec (mapcat (fn [{:keys [label] :as code}] 118 | [label (ast->clojure code)]) 119 | labels)) 120 | ~heap-values 121 | ~(ast->clojure body)))) 122 | 123 | (defn expr-update 124 | "Applies f to every sub-expression of ast, returning the updated ast." 125 | ([ast f] 126 | (match ast 127 | let-like [] (-> ast 128 | (update :bindings (partial mapv (fn [binding] (update binding :expr f)))) 129 | (update :body (partial mapv f))) 130 | :if [] (-> ast 131 | (update :condition f) 132 | (update :then-expr f) 133 | (update :else-expr f)) 134 | :do [] (update ast :exprs (partial mapv f)) 135 | :def [] (update ast :definition f) 136 | :primitive-call [] (update ast :args (partial mapv f)) 137 | :fn-call [] (-> ast 138 | (update :fn-expr f) 139 | (update :args (partial mapv f))) 140 | :fn [] (update ast :body (partial mapv f)))) 141 | ([ast f arg & args] (expr-update ast #(apply f % arg args)))) 142 | 143 | (defn walk 144 | "Traverses and potentially modifies ast, applying inner recursively to each 145 | nested expression, and then outer to potentially transform the expression." 146 | [ast inner outer] 147 | (outer (expr-update (inner ast) walk inner outer))) 148 | 149 | (comment 150 | (def ast1 (clojure->ast '(let [f (fn [] 42) 151 | g (fn [x] (+ (f) x))] 152 | (g (f))))) 153 | (def ast2 (clojure->ast '(let [x 5] 154 | (fn [y] (fn [] (+ x y))))))) 155 | -------------------------------------------------------------------------------- /src/lithium/compiler.clj: -------------------------------------------------------------------------------- 1 | (ns lithium.compiler 2 | (:require [clojure.string :as string] 3 | [lithium.assembler :as assembler] 4 | [lithium.compiler.ast :as ast] 5 | [lithium.compiler.closure :as closure] 6 | [lithium.compiler.code :refer [codeseq compile-expr genkey]] 7 | [lithium.compiler.primitives :as primitives] 8 | [lithium.compiler.repr :as repr] 9 | [lithium.compiler.state :as state] 10 | [lithium.driver :as driver] 11 | [lithium.utils :refer [read-all]])) 12 | 13 | (def prolog 14 | [['cli] 15 | ['mov :bp :sp] 16 | ['mov :si :heap-start] 17 | ['add :si 7] 18 | ['and :si 0xfff8] 19 | [:reserve-tmp-space]]) ;; reserve some space between bp and sp for temporary values 20 | 21 | (def endless-loop-epilog 22 | [:forever 23 | ['jmp :forever]]) 24 | 25 | (def dos-exit-epilog 26 | [['mov :ax 0x4c00] 27 | ['int 0x21]]) 28 | 29 | (def register-dump 30 | ;; taken from http://www.fysnet.net/yourhelp.htm 31 | [['pushf] 32 | ['pusha] 33 | ['push :cs] 34 | ['mov :di :buff] 35 | ['mov :si :msg1] 36 | ['mov :cx 10] 37 | :loop1 38 | ['movsw] 39 | ['mov :al (int \=)] 40 | ['stosb] 41 | ['pop :ax] 42 | ['mov :bx 4] 43 | :ploop 44 | ['rol :ax 4] 45 | ['push :ax] 46 | ['and :al 0x0f] 47 | ['daa] 48 | ['add :al 0xf0] 49 | ['adc :al 0x40] 50 | ['stosb] 51 | ['pop :ax] 52 | ['dec :bx] 53 | ['jnz :ploop] 54 | ['mov :ax 0x0d0a] 55 | ['stosw] 56 | ['loop :loop1] 57 | ['mov :al 0x24] 58 | ['stosb] 59 | ['mov :dx :buff] 60 | ['mov :ah 9] 61 | ['int 0x21] 62 | ['mov :ah 0x4c] 63 | ['int 0x21] 64 | :msg1 65 | ['string "CSDISIBPSPBXDXCXAXFL"] 66 | :buff 67 | ['bytes (repeat 100 0)]]) 68 | 69 | (defn make-environment-element 70 | [symbol type offset] 71 | [symbol {:type type, :offset offset}]) 72 | 73 | (defn compile-let 74 | [bindings body loop? orig-state] 75 | (as-> orig-state state 76 | (reduce 77 | (fn [{:keys [stack-pointer] :as state} {:keys [symbol expr]}] 78 | (-> state 79 | (codeseq 80 | [:subexpr expr] 81 | ['mov [:bp stack-pointer] :ax] 82 | state/next-loc) 83 | (update :environment conj (make-environment-element symbol :bound stack-pointer)))) 84 | state 85 | bindings) 86 | (if loop? 87 | (assoc (codeseq state (:recur-point state)) :loop-symbols (mapv :symbol bindings)) 88 | state) 89 | (reduce #(codeseq %1 [:subexpr %2]) state body) 90 | (state/restore-env orig-state state))) 91 | 92 | (defn compile-if 93 | [test-expr then-expr else-expr state] 94 | (let [l0 (genkey) l1 (genkey)] 95 | (codeseq 96 | state 97 | [:subexpr test-expr] 98 | ['cmp :ax (repr/immediate false)] 99 | ['je l0] 100 | ['cmp :ax (repr/immediate nil)] 101 | ['je l0] 102 | [:subexpr then-expr] 103 | ['jmp l1] 104 | l0 105 | [:subexpr else-expr] 106 | l1))) 107 | 108 | (defn compile-call 109 | [expr args orig-state] 110 | (as-> orig-state state 111 | (codeseq 112 | state 113 | [:subexpr expr] 114 | ['push :di] 115 | ['mov :di :ax]) 116 | (reduce 117 | #(codeseq 118 | (compile-expr %2 %1) 119 | ['push :ax]) 120 | state 121 | (reverse args)) 122 | (codeseq 123 | state 124 | ['mov :bx :di] 125 | ['call [:bx (- repr/closure-tag)]] 126 | ['add :sp (* repr/wordsize (count args))] 127 | ['pop :di]) 128 | (state/restore-env orig-state state))) 129 | 130 | (defn compile-long-value 131 | [state [val {:keys [ref value-type]}]] 132 | (as-> state state 133 | (codeseq state ['align 8] ref) 134 | (case value-type 135 | :keyword (let [s (pr-str val)] 136 | (codeseq state ['bytes [(count s)]] ['string s])) 137 | :string (codeseq state ['bytes [(count val)]] ['string val])))) 138 | 139 | (defn compile-labels 140 | [labels long-values code orig-state] 141 | (let [body-label (genkey)] 142 | (as-> orig-state state 143 | (codeseq 144 | state 145 | ['jmp body-label]) 146 | (reduce compile-long-value 147 | state 148 | long-values) 149 | (reduce (fn [state {:keys [label args bound-vars body]}] 150 | (codeseq 151 | state 152 | (keyword (name label)) 153 | ['push :bp] 154 | ['mov :bp :sp] 155 | [:reserve-tmp-space] 156 | (let [arg-env (map-indexed (fn [i x] 157 | (make-environment-element x :bound (* repr/wordsize (+ i 2)))) args) 158 | fvar-env (map-indexed (fn [i x] 159 | (make-environment-element x :free (* repr/wordsize (inc i)))) bound-vars)] 160 | [:subexpr 161 | (first body) ; FIXME needs multi-expr support 162 | #(-> % 163 | (assoc :stack-pointer (- repr/wordsize) 164 | :min-sp (- repr/wordsize)) 165 | (update :environment into (concat arg-env fvar-env)))]) 166 | ['mov :sp :bp] 167 | ['pop :bp] 168 | #(update % :code conj [:set-tmp-space (- (- repr/wordsize) (:min-sp %))]) 169 | ['ret])) 170 | state 171 | labels) 172 | (assoc state :min-sp (:min-sp orig-state)) 173 | (codeseq 174 | state 175 | body-label 176 | [:subexpr code 177 | identity 178 | (fn [orig-state state] 179 | (assoc (state/restore-env orig-state state) 180 | :environment (:environment state) 181 | :global-env-start (:global-env-start state)))])))) 182 | 183 | (defn compile-var-access 184 | [symbol {:keys [environment] :as state}] 185 | (let [{:keys [type offset]} (environment symbol)] 186 | (condp = type 187 | :bound (codeseq state ['mov :ax [:bp offset]]) 188 | :free (codeseq state 189 | ['mov :bx :di] 190 | ['mov :ax [:bx (- offset repr/closure-tag)]]) 191 | :var (codeseq state ['mov :ax [offset]]) 192 | (throw (Exception. (str "Unbound variable: " symbol)))))) 193 | 194 | (defn compile-def 195 | [symbol expr {:keys [stack-pointer global-env-start] :as state}] 196 | (-> (codeseq 197 | state 198 | [:subexpr expr] 199 | ['mov [global-env-start] :ax] 200 | ['sub :sp 2] 201 | ['sub :bp 2]) 202 | (update :environment conj (make-environment-element symbol :var global-env-start)) 203 | (update :global-env-start - repr/wordsize))) 204 | 205 | (defn compile-closure 206 | [label free-vars orig-state] 207 | (as-> orig-state state 208 | (codeseq state 209 | ['mov [:si] (-> label name keyword)] 210 | ['add :si repr/wordsize]) 211 | (reduce (fn [state fvar] 212 | (codeseq 213 | state 214 | [:subexpr {:type :env-lookup, :symbol fvar}] 215 | ['mov [:si] :ax] 216 | ['add :si repr/wordsize])) 217 | state free-vars) 218 | (codeseq 219 | state 220 | ['mov :ax :si] 221 | ['sub :ax (* repr/wordsize (inc (count free-vars)))] 222 | ['or :ax repr/closure-tag] 223 | ['add :si 7] 224 | ['and :si 0xfff8]))) 225 | 226 | (defn compile-value [{:keys [value]} state] 227 | (codeseq state 228 | ['mov :ax (repr/immediate value)])) 229 | 230 | (defn compile-long-value-ref [ref value-type state] 231 | (codeseq state ['mov :ax [:+ ref (repr/type->tag value-type)]])) 232 | 233 | (defn compile-expr* 234 | [ast state] 235 | (let [state (codeseq state ['comment (ast/ast->clojure ast)])] 236 | (ast/match ast 237 | :value [] (compile-value ast state) 238 | :env-lookup [symbol] (compile-var-access symbol state) 239 | :primitive-call [primitive args] ((primitives/primitives primitive) state args) 240 | :def [symbol definition] (compile-def symbol definition state) 241 | :let [bindings body] (compile-let bindings body false state) 242 | :loop [bindings body] (compile-let bindings body true (assoc state :recur-point (genkey))) 243 | :if [condition then-expr else-expr] (compile-if condition then-expr else-expr state) 244 | :do [exprs] (reduce #(compile-expr %2 %1) state exprs) 245 | :fn-call [fn-expr args] (compile-call fn-expr args state) 246 | :labels [labels long-values body] (compile-labels labels long-values body state) 247 | :closure [label vars] (compile-closure label vars state) 248 | :long-value-ref [ref value-type] (compile-long-value-ref ref value-type state) 249 | (throw (ex-info "Unable to compile" ast))))) 250 | 251 | (alter-var-root #'compile-expr (constantly compile-expr*)) 252 | 253 | (defn prepare-sexp [sexp] 254 | (-> sexp 255 | ast/clojure->ast 256 | closure/free-variable-analysis 257 | closure/collect-closures)) 258 | 259 | (defn compile-sexp 260 | [state sexp] 261 | (-> sexp 262 | prepare-sexp 263 | (compile-expr state))) 264 | 265 | (defn compile-program* 266 | [sexps] 267 | (reduce 268 | compile-sexp 269 | state/initial-compilation-state 270 | sexps)) 271 | 272 | (defn add-tmp-spaces 273 | [asm] 274 | (let [spaces (map second (filter #(and (sequential? %) (= (first %) :set-tmp-space)) asm)) 275 | spaces (into [(last spaces)] (drop-last spaces))] 276 | (first 277 | (reduce (fn [[code spaces] instr] 278 | (cond 279 | (and (sequential? instr) (= (first instr) :set-tmp-space)) [code spaces] 280 | (= instr [:reserve-tmp-space]) [(conj code ['sub :sp (first spaces)]) (next spaces)] 281 | :else [(conj code instr) spaces])) 282 | [[] spaces] 283 | asm)))) 284 | 285 | (defn compile-program 286 | [prog & [{:keys [epilog] 287 | :or {epilog endless-loop-epilog}}]] 288 | (let [state (compile-program* (read-all prog)) 289 | asm (concat prolog 290 | (:code state) 291 | [[:set-tmp-space (- (- repr/wordsize) (:min-sp state))]] 292 | epilog 293 | [:heap-start])] 294 | (add-tmp-spaces asm))) 295 | 296 | (defn compile-and-run! 297 | ([f] (compile-and-run! f dos-exit-epilog)) 298 | ([f epilog] 299 | (-> f 300 | (compile-program {:epilog epilog}) 301 | assembler/assemble 302 | (driver/run-program! (not= epilog register-dump))))) 303 | -------------------------------------------------------------------------------- /src/lithium/assembler.clj: -------------------------------------------------------------------------------- 1 | (ns lithium.assembler 2 | (:require [clojure.string :as string]) 3 | (:use [clojure.java.shell :only [sh]])) 4 | 5 | (defmacro deftable [name headers & data] 6 | `(def ~name 7 | (into {} 8 | (for [~(vec headers) ~(vec (map vec (partition (count headers) data)))] 9 | {~(first headers) (zipmap ~(vec (map keyword (rest headers))) ~(vec (rest headers)))})))) 10 | 11 | (deftable +registers+ 12 | [reg size value type] 13 | :ax 16 0 :general 14 | :bx 16 3 :general 15 | :cx 16 1 :general 16 | :dx 16 2 :general 17 | :sp 16 4 :general 18 | :bp 16 5 :general 19 | :si 16 6 :general 20 | :di 16 7 :general 21 | :cs 16 1 :segment 22 | :ds 16 3 :segment 23 | :es 16 0 :segment 24 | :ss 16 2 :segment 25 | :al 8 0 :general 26 | :ah 8 4 :general 27 | :bl 8 3 :general 28 | :bh 8 7 :general 29 | :cl 8 1 :general 30 | :ch 8 5 :general 31 | :dl 8 2 :general 32 | :dh 8 6 :general) 33 | 34 | (def +condition-codes+ 35 | {:o 0 :no 1 :b 2 :c 2 :nae 2 :ae 3 :nb 3 :nc 3 :e 4 :z 4 :ne 5 :nz 5 :be 6 :na 6 :a 7 :nbe 7 36 | :s 8 :ns 9 :p 10 :pe 10 :np 11 :po 11 :l 12 :nge 12 :ge 13 :nl 13 :le 14 :ng 14 :g 15 :nle 15}) 37 | 38 | (def +memory-widths+ #{:byte :word}) 39 | 40 | (defn modrm 41 | [mod spare rm] 42 | (+ (bit-shift-left mod 6) 43 | (bit-shift-left spare 3) 44 | rm)) 45 | 46 | (defn reg8 [x] (let [info (+registers+ x)] (and info (= (:type info) :general) (= (:size info) 8)))) 47 | (defn reg16 [x] (let [info (+registers+ x)] (and info (= (:type info) :general) (= (:size info) 16)))) 48 | (defn sreg [x] (let [info (+registers+ x)] (and info (= (:type info) :segment)))) 49 | (defn imm8 [x] (and (integer? x) (<= 0 x 255))) 50 | (defn imm16 [x] (or (and (integer? x) (<= 0 x 65535)) (keyword? x) (and (vector? x) (= (first x) :+)))) 51 | (defn mem [x] (and (vector? x) (not= (first x) :+))) 52 | (defn width [x] (first (filter +memory-widths+ x))) 53 | (defn mem8 [x] (and (mem x) (let [w (width x)] (or (nil? w) (= w :byte))))) 54 | (defn mem16 [x] (and (mem x) (let [w (width x)] (or (nil? w) (= w :word))))) 55 | (defn rm8 [x] (or (reg8 x) (mem8 x))) 56 | (defn rm16 [x] (or (reg16 x) (mem16 x))) 57 | (defn label [x] (keyword? x)) 58 | 59 | (def assembly-table 60 | [[:mov rm8 reg8] [0x88 :r] 61 | [:mov rm16 reg16] [0x89 :r] 62 | [:mov reg8 rm8] [0x8a :r] 63 | [:mov reg16 rm16] [0x8b :r] 64 | [:mov reg8 imm8] [[:r+ 0xb0] :ib] 65 | [:mov reg16 imm16] [[:r+ 0xb8] :iw] 66 | [:mov sreg rm16] [0x8e :r] 67 | [:mov rm8 imm8] [0xc6 :0 :ib] 68 | [:mov rm16 imm16] [0xc7 :0 :iw] 69 | [:xor rm8 reg8] [0x30 :r] 70 | [:xor rm16 reg16] [0x31 :r] 71 | [:push reg16] [[:r+ 0x50]] 72 | [:pop reg16] [[:r+ 0x58]] 73 | [:push :cs] [0x0e] 74 | [:pushf] [0x9c] 75 | [:pusha] [0x60] 76 | [:stosb] [0xaa] 77 | [:stosw] [0xab] 78 | [:movsw] [0xa5] 79 | [:rol rm16 imm8] [0xc1 :0 :ib] 80 | [:daa] [0x27] 81 | [:ret] [0xc3] 82 | [:cli] [0xfa] 83 | [:inc reg16] [[:r+ 0x40]] 84 | [:inc rm8] [0xfe :0] 85 | [:dec reg16] [[:r+ 0x48]] 86 | [:dec rm8] [0xfe :1] 87 | [:cmp :al imm8] [0x3c :ib] 88 | [:cmp :ax imm16] [0x3d :iw] 89 | [:cmp rm8 imm8] [0x80 :7 :ib] 90 | [:cmp rm16 imm16] [0x81 :7 :iw] 91 | [:cmp reg8 rm8] [0x3a :r] 92 | [:cmp reg16 rm16] [0x3b :r] 93 | [:adc :al imm8] [0x14 :ib] 94 | [:add rm8 reg8] [0x00 :r] 95 | [:add rm16 reg16] [0x01 :r] 96 | [:add reg8 rm8] [0x02 :r] 97 | [:add reg16 rm16] [0x03 :r] 98 | [:add :al imm8] [0x04 :ib] 99 | [:add :ax imm16] [0x05 :iw] 100 | [:add rm8 imm8] [0x80 :0 :ib] 101 | [:add rm16 imm16] [0x81 :0 :iw] 102 | [:sub rm8 imm8] [0x80 :5 :ib] 103 | [:sub rm16 imm8] [0x83 :5 :ib] 104 | [:sub rm16 imm16] [0x81 :5 :iw] 105 | [:sub reg8 rm8] [0x2a :r] 106 | [:sub reg16 rm16] [0x2b :r] 107 | [:and rm8 imm8] [0x80 :4 :ib] 108 | [:and rm16 imm8] [0x83 :4 :ib] 109 | [:and rm16 imm16] [0x81 :4 :iw] 110 | [:mul rm16] [0xf7 :4] 111 | [:mul rm8] [0xf6 :4] 112 | [:div rm16] [0xf7 :6] 113 | [:div rm8] [0xf6 :6] 114 | [:sal rm8 1] [0xd0 :4] 115 | [:sal rm8 imm8] [0xc0 :4 :ib] 116 | [:sal rm16 1] [0xd1 :4] 117 | [:sal rm16 imm8] [0xc1 :4 :ib] 118 | [:sar rm8 1] [0xd0 :7] 119 | [:sar rm8 imm8] [0xc0 :7 :ib] 120 | [:sar rm16 1] [0xd1 :7] 121 | [:sar rm16 imm8] [0xc1 :7 :ib] 122 | [:stosb] [0xaa] 123 | [:or rm8 imm8] [0x80 :1 :ib] 124 | [:or rm16 imm16] [0x81 :1 :iw] 125 | [:jCC label] [[:cc+ 0x70] :rb] 126 | [:setCC rm8] [0x0f [:cc+ 0x90] :2] 127 | [:loop label] [0xe2 :rb] 128 | [:jmp rm16] [0xff :4] 129 | [:jmp label] [0xe9 :rw] 130 | [:call rm16] [0xff :2] 131 | [:call label] [0xe8 :rw] 132 | [:int 3] [0xcc] 133 | [:int imm8] [0xcd :ib]]) 134 | 135 | (defn extract-cc [instr template] 136 | (let [re (re-pattern (string/replace (name template) "CC" "(.+)"))] 137 | (when-let [cc-s (second (re-find re (name instr)))] 138 | (keyword cc-s)))) 139 | 140 | (defn part-of-spec-matches? [datum template] 141 | (if (fn? template) (template datum) (= datum template))) 142 | 143 | (defn instruction-matches? [instr [template _]] 144 | (let [f1 (first instr) 145 | f2 (first template)] 146 | (and (or (= (name f1) (name f2)) 147 | ((set (keys +condition-codes+)) (extract-cc f1 f2))) 148 | (= (count instr) (count template)) 149 | (reduce #(and %1 %2) true (map part-of-spec-matches? (rest instr) (rest template)))))) 150 | 151 | (defn find-template [instr] 152 | (first (filter (partial instruction-matches? instr) 153 | (partition 2 assembly-table)))) 154 | 155 | (defn make-label [label type] 156 | {:label label, :type type}) 157 | 158 | (defn label? 159 | ([x] (and (map? x) (:label x))) 160 | ([x type] (and (label? x) (= (:type x) type)))) 161 | 162 | (defn word-to-bytes [[size w]] 163 | (let [w (if (and (integer? w) (neg? w)) (+ w (bit-shift-left 1 size)) w)] 164 | (case size 165 | 0 [] 166 | 8 [w] 167 | 16 (cond (keyword? w) [:placeholder (make-label w :abs)] 168 | (vector? w) [:placeholder {:label (second w), :type :abs, :displacement (last w)}] 169 | :else [(bit-and w 0xff) (bit-shift-right w 8)])))) 170 | 171 | (defn lenient-parse-int [x] 172 | (try 173 | (Integer/parseInt x) 174 | (catch NumberFormatException _ nil))) 175 | 176 | (defn make-modrm [rm-desc spare] 177 | (if (keyword? rm-desc) 178 | [(modrm 3 spare (-> rm-desc +registers+ :value))] 179 | (let [rm-desc (remove +memory-widths+ rm-desc) 180 | registers (vec (sort-by name (filter keyword? rm-desc))) 181 | displacement (reduce + 0 (filter integer? rm-desc)) 182 | rm-map {[:bx :si] 0 [:bx :di] 1 [:bp :si] 2 [:bp :di] 3 [:si] 4 [:di] 5 [:bp] 6 [] 6 [:bx] 7} 183 | mod (cond 184 | (or (and (zero? displacement) (not= registers [:bp])) (empty? registers)) 0 185 | (or (and (zero? displacement) (= registers [:bp])) (<= -128 displacement 127)) 1 186 | (<= -32768 displacement 32767) 2) 187 | rm (rm-map registers)] 188 | (when-not rm 189 | (throw (Exception. (format "Incorrect memory reference: %s" rm-desc)))) 190 | (into [(modrm mod spare rm)] (word-to-bytes [(* 8 (if (empty? registers) 2 mod)) displacement]))))) 191 | 192 | (defn parse-byte [[instr op1 op2] [instr-template op1-template op2-template] byte-desc] 193 | (let [imm (cond (#{imm8 imm16} op1-template) op1 (#{imm8 imm16} op2-template) op2) 194 | rm (cond (#{rm8 rm16} op1-template) op1 (#{rm8 rm16} op2-template) op2) 195 | not-rm (if (= rm op1) op2 op1)] 196 | (cond 197 | (integer? byte-desc) [byte-desc] 198 | (= byte-desc :ib) (word-to-bytes [8 imm]) 199 | (= byte-desc :iw) (word-to-bytes [16 imm]) 200 | (= byte-desc :rb) [(make-label op1 :byte)] 201 | (= byte-desc :rw) [:placeholder (make-label op1 :word)] 202 | (and (keyword? byte-desc) (lenient-parse-int (name byte-desc))) 203 | (make-modrm rm (lenient-parse-int (name byte-desc))) 204 | (= byte-desc :r) 205 | (make-modrm rm (-> not-rm +registers+ :value)) 206 | (and (sequential? byte-desc) (= (first byte-desc) :r+)) 207 | [(+ (second byte-desc) (-> op1 +registers+ :value))] 208 | (and (sequential? byte-desc) (= (first byte-desc) :cc+)) 209 | [(+ (second byte-desc) (-> instr (extract-cc instr-template) +condition-codes+))]))) 210 | 211 | (defn assemble-instruction [instr pc] 212 | (cond 213 | (= (first instr) 'string) (map int (second instr)) 214 | (= (first instr) 'bytes) (second instr) 215 | (= (first instr) 'align) (let [boundary (second instr) 216 | used (mod pc boundary)] 217 | (if (zero? used) 218 | [] 219 | (repeat (- boundary used) 0))) 220 | :otherwise 221 | (let [[template parts] (find-template instr)] 222 | (when-not template (throw (Exception. (str "Could not assemble instruction: " (pr-str instr))))) 223 | (let [assembled-parts (map (partial parse-byte instr template) parts)] 224 | (apply concat assembled-parts))))) 225 | 226 | ;; This is the value added to absolute addresses of labels, telling 227 | ;; the assembler where the code starts from. Defaults to 0x100 for 228 | ;; compatibility with COM format. 229 | (def ^:dynamic *origin* 0x100) 230 | 231 | (defn resolve-labels [code labels] 232 | (loop [result [] code code pos 0] 233 | (if-let [fb (first code)] 234 | (recur 235 | (cond (= fb :placeholder) result 236 | (label? fb :byte) (into result (word-to-bytes [8 (dec (- (-> fb :label labels) pos))])) 237 | (label? fb :word) (into result (word-to-bytes [16 (dec (- (-> fb :label labels) pos))])) 238 | (label? fb :abs) (into result (word-to-bytes [16 (+ *origin* (-> fb :label labels) (:displacement fb 0))])) 239 | :otherwise (conj result fb)) 240 | (next code) (inc pos)) 241 | result))) 242 | 243 | (defn strip-comments [prog] 244 | (remove #(and (vector? %) (= (first %) 'comment)) prog)) 245 | 246 | (defn asm [prog] 247 | (loop [prog (strip-comments prog) code [] pc 0 labels {}] 248 | (if-not (seq prog) 249 | (resolve-labels code labels) 250 | (let [ins (first prog)] 251 | (if (keyword? ins) 252 | (recur (next prog) code pc (assoc labels ins pc)) 253 | (let [assembled (assemble-instruction ins pc) 254 | cnt (count assembled)] 255 | (recur (next prog) (into code assembled) (+ pc cnt) labels))))))) 256 | 257 | (defn assemble [prog] 258 | (asm (if (string? prog) 259 | (read-string (str "[" (slurp prog) "]")) 260 | prog))) 261 | --------------------------------------------------------------------------------