├── Makefile ├── amd64-asm.asd ├── assembler.lisp ├── binaries.lisp ├── driver.lisp ├── encoders.lisp ├── mach-o-binaries.lisp ├── package.lisp ├── readme.txt ├── testsuite.lisp └── utilities.lisp /Makefile: -------------------------------------------------------------------------------- 1 | clean: 2 | rm -rf *.fasl 3 | rm -rf *~ 4 | 5 | -------------------------------------------------------------------------------- /amd64-asm.asd: -------------------------------------------------------------------------------- 1 | (defsystem "amd64-asm" 2 | :depends-on (:iterate) 3 | :components ((:file "package") 4 | (:file "utilities") 5 | (:file "encoders") 6 | (:file "assembler") 7 | (:file "binaries") 8 | (:file "mach-o-binaries") 9 | (:file "driver") 10 | (:file "testsuite")) 11 | :serial t) 12 | -------------------------------------------------------------------------------- /assembler.lisp: -------------------------------------------------------------------------------- 1 | ; amd64-asm.lisp 2 | ; Assembler for AMD64 code. Contains assembler, linker, stitcher. 3 | 4 | (in-package "AMD64-ASM") 5 | 6 | (defstruct asmrel 7 | symbol 8 | offset 9 | width 10 | type) 11 | 12 | (defstruct asmlbl 13 | symbol 14 | offset) 15 | 16 | (defstruct asmfrag 17 | buffer 18 | relocs 19 | labels 20 | buffer-chkpt 21 | relocs-chkpt 22 | sdi-source) 23 | 24 | (defstruct asmbin 25 | buffer 26 | relocs) 27 | 28 | (defstruct asmfun 29 | frags) 30 | 31 | (defun new-asmfrag () 32 | (make-asmfrag :buffer (make-array 0 :fill-pointer t) 33 | :relocs (make-array 0 :fill-pointer t) 34 | :labels (make-array 0 :fill-pointer t))) 35 | 36 | (defun new-asmbin () 37 | (make-asmbin :buffer (make-array 0 :fill-pointer t) 38 | :relocs (make-array 0 :fill-pointer t))) 39 | 40 | (defun new-asmfun () 41 | (make-asmfun :frags (make-array 0 :fill-pointer t))) 42 | 43 | (defun reset-asmfrag (ln) 44 | (setf (fill-pointer (asmfrag-relocs ln)) 0) 45 | (setf (fill-pointer (asmfrag-buffer ln)) 0)) 46 | 47 | (defun checkpoint-asmfrag (ln) 48 | (setf (asmfrag-buffer-chkpt ln) 49 | (fill-pointer (asmfrag-buffer ln))) 50 | (setf (asmfrag-relocs-chkpt ln) 51 | (fill-pointer (asmfrag-relocs ln)))) 52 | 53 | (defun restore-asmfrag (ln) 54 | (setf (fill-pointer (asmfrag-buffer ln)) 55 | (asmfrag-buffer-chkpt ln)) 56 | (setf (fill-pointer (asmfrag-relocs ln)) 57 | (asmfrag-relocs-chkpt ln))) 58 | 59 | (defun asmfrag-empty? (ln) 60 | (eql (fill-pointer (asmfrag-buffer ln)) 0)) 61 | 62 | (defun add-asmfrag (frag fun) 63 | (vector-push-extend frag (asmfun-frags fun))) 64 | 65 | (defun emit-byte (ln b) 66 | (vector-push-extend b (asmfrag-buffer ln))) 67 | 68 | (defun emit-bytes (ln w n) 69 | (iter (for i from 0 below n) 70 | (emit-byte ln (logand #xFF (ash w (- (* i 8))))))) 71 | 72 | (defun emit-half (ln w) 73 | (emit-bytes ln w 4)) 74 | 75 | (defun emit-word (ln w) 76 | (emit-bytes ln w 8)) 77 | 78 | (defun emit-wide (ln w) 79 | (emit-bytes ln w 16)) 80 | 81 | (defun emit-ascii (ln str &optional field) 82 | (iter (for char in-vector str) 83 | (emit-byte ln (char-code char))) 84 | (when field 85 | (iter (for i from (length str) below field) 86 | (emit-byte ln 0)))) 87 | 88 | (defun emit-byte-vector (ln vec) 89 | (iter (for byte in-vector vec) 90 | (emit-byte ln byte))) 91 | 92 | (defun emit-reloc (ln sym width type) 93 | (vector-push-extend (make-asmrel :symbol sym 94 | :offset (fill-pointer (asmfrag-buffer ln)) 95 | :width width 96 | :type type) 97 | (asmfrag-relocs ln))) 98 | 99 | (defun record-label (ln sym) 100 | (vector-push-extend (make-asmlbl :symbol sym 101 | :offset (fill-pointer (asmfrag-buffer ln))) 102 | (asmfrag-labels ln))) 103 | 104 | (defun save-sdi-source (ln line) 105 | (setf (asmfrag-sdi-source ln) line)) 106 | 107 | (defun label-line? (ln) 108 | (symbolp ln)) 109 | 110 | (defun translate-label-ref (insn) 111 | (list (first insn) (list :byte (second insn) 0))) 112 | 113 | (defun encode (source) 114 | (let ((fun (new-asmfun)) 115 | (frag (new-asmfrag))) 116 | (iter (for line in source) 117 | (cond 118 | ((label-line? line) (record-label frag line)) 119 | ((sdi? line) 120 | (checkpoint-asmfrag frag) 121 | (let ((tx-source (translate-label-ref line) frag)) 122 | (save-sdi-source frag tx-source) 123 | (encode-insn tx-source frag)) 124 | (add-asmfrag frag fun) 125 | (setf frag (new-asmfrag))) 126 | (t (encode-insn line frag)))) 127 | (unless (asmfrag-empty? frag) 128 | (add-asmfrag frag fun)) 129 | fun)) 130 | 131 | (defun compute-labels (fun) 132 | (let ((labels (make-hash-table)) 133 | (ip 0)) 134 | (iter (for frag in-vector (asmfun-frags fun)) 135 | (iter (for label in-vector (asmfrag-labels frag)) 136 | (setf (gethash (asmlbl-symbol label) labels) 137 | (+ ip (asmlbl-offset label)))) 138 | (setf ip (+ ip (length (asmfrag-buffer frag))))) 139 | labels)) 140 | 141 | (defun sdi-target (insn) 142 | (second insn)) 143 | 144 | (defun sdi-target-specifier (insn) 145 | (first (second insn))) 146 | 147 | (defun sdi-target-symbol (insn) 148 | (second (second insn))) 149 | 150 | (defun submaximal-sdi? (insn) 151 | (and (sdi? insn) 152 | (not (eql (sdi-target-specifier insn) :half)))) 153 | 154 | (defun jump-in-range (insn delta) 155 | (<= (signed-width delta) 156 | (specifier-width (sdi-target-specifier insn)))) 157 | 158 | (defun widen-sdi (sdi) 159 | (list (first sdi) (list (specifier-next (sdi-target-specifier sdi)) 160 | (sdi-target-symbol sdi) 0))) 161 | 162 | (defun resolve-sdi (sdi labels ip) 163 | (list (first sdi) (- (gethash (sdi-target-symbol sdi) labels) ip))) 164 | 165 | (defun link-jumps (fun labels) 166 | (let ((ip 0) 167 | (relaxed)) 168 | (iter (for frag in-vector (asmfun-frags fun)) 169 | (setf ip (+ ip (length (asmfrag-buffer frag)))) 170 | (when (not (asmfrag-sdi-source frag)) (next-iteration)) 171 | (let* ((sdi (asmfrag-sdi-source frag)) 172 | (del (- ip (gethash (sdi-target-symbol sdi) labels)))) 173 | (if (jump-in-range sdi del) 174 | (progn (setf (asmfrag-sdi-source frag) 175 | (resolve-sdi sdi labels ip)) 176 | (restore-asmfrag frag) 177 | (encode-insn (asmfrag-sdi-source frag) frag)) 178 | (progn (setf (asmfrag-sdi-source frag) (widen-sdi sdi)) 179 | (restore-asmfrag frag) 180 | (encode-insn (asmfrag-sdi-source frag) frag) 181 | (setf relaxed t))))) 182 | relaxed)) 183 | 184 | (defun link (fun) 185 | (to-fixpoint 186 | (when (link-jumps fun (compute-labels fun)) 187 | (mark-changed))) 188 | fun) 189 | 190 | (defun update-reloc-offsets (bin frag) 191 | (let ((base (fill-pointer (asmbin-buffer bin)))) 192 | (iter (for reloc in-vector (asmfrag-relocs frag)) 193 | (incf (asmrel-offset reloc) base)))) 194 | 195 | (defun stitch (fun) 196 | (let ((bin (new-asmbin))) 197 | (iter (for frag in-vector (asmfun-frags fun)) 198 | (update-reloc-offsets bin frag) 199 | (extend-vector-with-vector (asmfrag-buffer frag) 200 | (asmbin-buffer bin)) 201 | (extend-vector-with-vector (asmfrag-relocs frag) 202 | (asmbin-relocs bin))) 203 | bin)) 204 | 205 | (defun assemble-code (source) 206 | (handler-case 207 | (stitch (link (encode source))) 208 | (assertion-failed (as) (format t "Assertion failed: ~A~%" 209 | (assertion-failed-check as))) 210 | (encoding-error (ee) (format t "Error encoding form: ~A~%" 211 | (encoding-error-form ee))))) 212 | 213 | (defun encode-known-data (datum) 214 | (let ((frag (new-asmfrag))) 215 | (destructuring-bind (spec value) datum 216 | (emit-bytes frag value (specifier-width spec))) 217 | frag)) 218 | 219 | (defun encode-unknown-data (datum) 220 | (let ((frag (new-asmfrag))) 221 | (destructuring-bind (spec sym addn) datum 222 | (emit-reloc frag sym (specifier-width spec) :abs) 223 | (emit-bytes frag addn (specifier-width spec))) 224 | frag)) 225 | 226 | (defun aggregate (source) 227 | (let ((fun (new-asmfun))) 228 | (iter (for datum in source) 229 | (ecase (length datum) 230 | (2 (add-asmfrag (encode-known-data datum) fun)) 231 | (3 (add-asmfrag (encode-unknown-data datum) fun)))) 232 | fun)) 233 | 234 | (defun assemble-data (source) 235 | (handler-case 236 | (stitch (aggregate source)) 237 | (assertion-failed (as) (format t "Assertion failed: ~A~%" 238 | (assertion-failed-check as))) 239 | (encoding-error (ee) (format t "Error aggregating form: ~A~%" 240 | (encoding-error-form ee))))) 241 | 242 | -------------------------------------------------------------------------------- /binaries.lisp: -------------------------------------------------------------------------------- 1 | ; binaries.lisp 2 | ; Support for generating binary object files 3 | 4 | (in-package "AMD64-ASM") 5 | 6 | (defparameter *scopes* '(:int :ext :und)) 7 | 8 | (defstruct asmdef 9 | name 10 | scope 11 | bin) 12 | 13 | (defstruct asmobj 14 | cdefs 15 | ddefs) 16 | 17 | (defun new-asmdef (name sc bin) 18 | (make-asmdef :name name :scope sc :bin bin)) 19 | 20 | (defun new-asmobj () 21 | (make-asmobj :cdefs (make-array 0 :fill-pointer t) 22 | :ddefs (make-array 0 :fill-pointer t))) 23 | 24 | 25 | (defun emit-code-def (obj name sc bin) 26 | (vector-push-extend (new-asmdef name sc bin) 27 | (asmobj-cdefs obj))) 28 | 29 | (defun emit-data-def (obj name sc bin) 30 | (vector-push-extend (new-asmdef name sc bin) 31 | (asmobj-ddefs obj))) 32 | 33 | (defstruct strtab 34 | vec 35 | table) 36 | 37 | (defun new-strtab () 38 | (make-strtab :vec (make-array 1 :fill-pointer t :initial-element 0) 39 | :table (make-hash-table))) 40 | 41 | (defun strtab-intern (tab sym) 42 | (if (gethash sym (strtab-table tab)) 43 | (gethash sym (strtab-table tab)) 44 | (let ((ndx (fill-pointer (strtab-vec tab)))) 45 | (iter (for char in-vector (symbol-name sym)) 46 | (vector-push-extend (char-code char) (strtab-vec tab))) 47 | (vector-push-extend 0 (strtab-vec tab)) 48 | (setf (gethash sym (strtab-table tab)) ndx) 49 | ndx))) 50 | 51 | (defun strtab-member? (tab sym) 52 | (gethash sym (strtab-table tab))) 53 | 54 | (defun strtab-size (tab) 55 | (length (strtab-vec tab))) 56 | 57 | (defun emit-strtab (frag strtab) 58 | (emit-byte-vector frag (strtab-vec strtab))) 59 | 60 | (defgeneric emit-c-struct (struct frag)) 61 | 62 | (defgeneric sizeof-c-struct (struct)) 63 | 64 | (defmacro define-c-struct (name &body slots) 65 | (labels ((slot-accessor (name slot-name) 66 | (catsym- name slot-name)) 67 | (emitter-for-spec (spec) 68 | (ecase spec 69 | (:byte 'emit-byte) 70 | (:half 'emit-half) 71 | (:word 'emit-word) 72 | (:wide 'emit-wide))) 73 | (emitter-for-slot (struct frag slot) 74 | (ecase (length slot) 75 | (2 `(,(emitter-for-spec (second slot)) 76 | ,frag 77 | (,(slot-accessor name (first slot)) ,struct))) 78 | (3 `(let ((vec (,(slot-accessor name (first slot)) ,struct))) 79 | (assert (<= (length vec) ,(third slot))) 80 | (iter (for e in-vector vec) 81 | (,(emitter-for-spec (second slot)) ,frag e)) 82 | (iter (for i from (length vec) below ,(third slot)) 83 | (,(emitter-for-spec (second slot)) ,frag 0))))))) 84 | `(progn 85 | (defstruct ,name 86 | ,@(iter (for slot in slots) 87 | (collect (car slot)))) 88 | (defmethod emit-c-struct ((xstruct ,name) xfrag) 89 | ,@(iter (for slot in slots) 90 | (collect (emitter-for-slot 'xstruct 'xfrag slot)))) 91 | (defmethod sizeof-c-struct ((xstruct ,name)) 92 | ,(iter (for slot in slots) 93 | (ecase (length slot) 94 | (2 (sum (specifier-width (second slot)))) 95 | (3 (sum (* (third slot) 96 | (specifier-width (second slot))))))))))) 97 | -------------------------------------------------------------------------------- /driver.lisp: -------------------------------------------------------------------------------- 1 | ; driver.lisp 2 | ; Driver for assembler 3 | 4 | (in-package "AMD64-ASM") 5 | 6 | (defun assemble (source) 7 | (let ((obj (new-asmobj))) 8 | (iter (for def in source) 9 | (ecase (first def) 10 | (:proc (emit-code-def obj 11 | (third def) 12 | (second def) 13 | (assemble-code (cdddr def)))) 14 | (:var (emit-data-def obj 15 | (third def) 16 | (second def) 17 | (assemble-data (cdddr def)))))) 18 | obj)) 19 | 20 | (defun assemble-and-output (source type filename) 21 | (let* ((obj (assemble source)) 22 | (buf (ecase type 23 | (:mach-o (generate-mach-o-obj obj))))) 24 | (store-vector-into-file buf filename) 25 | filename)) 26 | -------------------------------------------------------------------------------- /encoders.lisp: -------------------------------------------------------------------------------- 1 | ; encoders.lisp 2 | ; Encoders for AMD64 instruction set. 3 | 4 | (in-package "AMD64-ASM") 5 | 6 | ; maybe move condition definitions somewhere else 7 | (define-condition assembler-error (error) 8 | (())) 9 | 10 | (define-condition encoding-error (assembler-error) 11 | ((form :initarg :form :reader encoding-error-form))) 12 | 13 | (define-condition assertion-failed (encoding-error) 14 | ((check :initarg :check :reader assertion-failed-check))) 15 | 16 | ; make this more sophisticated for error handling later 17 | (defmacro with-checks (pred &body body) 18 | `(if ,pred 19 | (progn ,@body) 20 | (error 'assertion-failed :check ',pred))) 21 | 22 | (defparameter *byte-regs* '(:al :bl :cl :dl :sil :dil :bpl :spl 23 | :r8b :r9b :r10b :r11b :r12b :r13b :r14b :r15b)) 24 | 25 | (defparameter *half-regs* '(:eax :ebx :ecx :edx :esi :edi :ebp :esp 26 | :r8d :r9d :r10d :r11d :r12d :r13d :r14d :r15d)) 27 | 28 | (defparameter *word-regs* '(:rax :rbx :rcx :rdx :rsi :rdi :rbp :rsp 29 | :r8 :r9 :r10 :r11 :r12 :r13 :r14 :r15)) 30 | 31 | (defparameter *vec-regs* '(:xmm0 :xmm3 :xmm1 :xmm2 :xmm6 :xmm7 :xmm5 :xmm4 32 | :xmm8 :xmm9 :xmm10 :xmm11 :xmm12 :xmm13 :xmm14 33 | :xmm15)) 34 | 35 | (defparameter *sdis* '(:jo :jno :jb :jnb :jz :jnz :jbe :jnbe 36 | :js :jns :jp :jnp :jl :jge :jle :jg :jmp)) 37 | 38 | (eval-when (:compile-toplevel :load-toplevel :execute) 39 | (defparameter *prefixes* '(#x66 #x67 #x64 #x65 #xF0 #xF3 #xF2))) 40 | 41 | (eval-when (:compile-toplevel :load-toplevel :execute) 42 | (defparameter *encoders* nil)) 43 | 44 | ; info about operands of an instruction 45 | (defstruct oprinfo 46 | oc.ext 47 | modrm.mod 48 | modrm.reg 49 | modrm.rm 50 | sib.scale 51 | sib.index 52 | sib.base 53 | disp 54 | imm 55 | imm.bytes 56 | imm.rel-type 57 | imm.rel-addn 58 | disp.bytes 59 | disp.rel-type 60 | disp.rel-addn) 61 | 62 | ; info about the opcodes of an instruction 63 | (defstruct ocinfo 64 | override? 65 | prefixes 66 | opcodes) 67 | 68 | (defun new-ocinfo () 69 | (make-ocinfo :opcodes (make-array 0 :fill-pointer t) 70 | :prefixes (make-array 0 :fill-pointer t))) 71 | 72 | (defun specifier-width (spec) 73 | (case spec 74 | (:byte 1) 75 | (:half 4) 76 | (:word 8) 77 | (:wide 16))) 78 | 79 | (defun specifier-next (spec) 80 | (case spec 81 | (:byte :half) 82 | (:half :word) 83 | (:word :word))) 84 | 85 | (defun register-number (reg) 86 | (let ((rnums '(0 3 1 2 6 7 5 4 8 9 10 11 12 13 14 15))) 87 | (let ((idx (or (position reg *byte-regs*) 88 | (position reg *half-regs*) 89 | (position reg *word-regs*) 90 | (position reg *vec-regs*)))) 91 | (when idx (nth idx rnums))))) 92 | 93 | (defun reg? (operand) 94 | (register-number operand)) 95 | 96 | (defun byte-reg? (reg) 97 | (member reg *byte-regs*)) 98 | 99 | (defun half-reg? (reg) 100 | (member reg *half-regs*)) 101 | 102 | (defun word-reg? (reg) 103 | (member reg *word-regs*)) 104 | 105 | (defun xmm-reg? (reg) 106 | (member reg *vec-regs*)) 107 | 108 | (defun same-reg? (rega regb) 109 | (eql (register-number rega) (register-number regb))) 110 | 111 | (defun immediate? (operand) 112 | (or (integerp operand) 113 | (and (listp operand) 114 | (or (and (eql (length operand) 2) 115 | (symbolp (first operand)) 116 | (symbolp (second operand))) 117 | (and (eql (length operand) 3) 118 | (symbolp (first operand)) 119 | (symbolp (second operand)) 120 | (integerp (third operand)) 121 | (or (<= (signed-width (third operand)) 122 | (specifier-width (first operand))) 123 | (<= (unsigned-width (third operand)) 124 | (specifier-width (first operand))))))))) 125 | 126 | (defun immediate-width (operand) 127 | (if (integerp operand) 128 | (signed-width operand) 129 | (specifier-width (first operand)))) 130 | 131 | (defun byte-immediate? (operand) 132 | (and (immediate? operand) (<= (immediate-width operand) 1))) 133 | 134 | (defun short-immediate? (operand) 135 | (and (immediate? operand) (<= (immediate-width operand) 2))) 136 | 137 | (defun half-immediate? (operand) 138 | (and (immediate? operand) (<= (immediate-width operand) 4))) 139 | 140 | (defun word-immediate? (operand) 141 | (and (immediate? operand) (<= (immediate-width operand) 8))) 142 | 143 | (defun mem? (operand) 144 | (and (listp operand) 145 | (eql (length operand) 5) 146 | (symbolp (first operand)) 147 | (symbolp (second operand)) 148 | (symbolp (third operand)) 149 | (integerp (fourth operand)) 150 | (immediate? (fifth operand)))) 151 | 152 | (defun byte-mem? (operand) 153 | (and (mem? operand) (eql (first operand) :byte))) 154 | 155 | (defun half-mem? (operand) 156 | (and (mem? operand) (eql (first operand) :half))) 157 | 158 | (defun word-mem? (operand) 159 | (and (mem? operand) (eql (first operand) :word))) 160 | 161 | (defun wide-mem? (operand) 162 | (and (mem? operand) (eql (first operand) :wide))) 163 | 164 | (defun sdi? (insn) 165 | (and (member (first insn) *sdis*) 166 | (symbolp (second insn)))) 167 | 168 | (defun compose-rex (w r x b) 169 | (with-checks (and (< w 2) (< r 2) (< x 2) (< b 2)) 170 | (+ #x40 b (ash x 1) (ash r 2) (ash w 3)))) 171 | 172 | (defun decode-rex (r) 173 | (with-checks (integerp r) 174 | (list :w (ash (logand r #x8) -3) 175 | :r (ash (logand r #x4) -2) 176 | :x (ash (logand r #x2) -1) 177 | :b (logand r #x1)))) 178 | 179 | (defun compose-modrm (mod reg rm) 180 | (with-checks (and (< mod 4) (< reg 8) (< rm 8)) 181 | (+ rm (ash reg 3) (ash mod 6)))) 182 | 183 | (defun decode-modrm (m) 184 | (with-checks (integerp m) 185 | (list :mod (logand (ash m -6) #x3) 186 | :reg (logand (ash m -3) #x7) 187 | :rm (logand m #x7)))) 188 | 189 | (defun compose-sib (scale index base) 190 | (with-checks (and (< scale 8) (< index 8) (< base 8)) 191 | (+ base (ash index 3) (ash scale 6)))) 192 | 193 | (defun decode-sib (s) 194 | (with-checks (integerp s) 195 | (list :scale (logand (ash s -6) #x3) 196 | :index (logand (ash s -3) #x7) 197 | :base (logand s #x7)))) 198 | 199 | (defun add-reg-operand (insn reg where) 200 | (with-checks (reg? reg) 201 | (let ((num (register-number reg))) 202 | (ecase where 203 | (reg (setf (oprinfo-modrm.reg insn) num)) 204 | (rm (setf (oprinfo-modrm.mod insn) #x3) 205 | (setf (oprinfo-modrm.rm insn) num)) 206 | (op (setf (oprinfo-oc.ext insn) num)))))) 207 | 208 | (defun add-immediate-operand (insn imm width type) 209 | (with-checks (immediate? imm) 210 | (if (integerp imm) 211 | (progn 212 | (setf (oprinfo-imm insn) imm) 213 | (setf (oprinfo-imm.bytes insn) width)) 214 | (progn 215 | (setf (oprinfo-imm insn) (second imm)) 216 | (setf (oprinfo-imm.bytes insn) width) 217 | (setf (oprinfo-imm.rel-type insn) type) 218 | (setf (oprinfo-imm.rel-addn insn) (or (third imm) 0)))))) 219 | 220 | (defun add-opcode-extension (insn subcode) 221 | (with-checks (integerp subcode) 222 | (setf (oprinfo-modrm.reg insn) subcode))) 223 | 224 | (defun modrm.mod-for-disp (disp) 225 | (cond 226 | ((eql disp 0) 0) 227 | ((integerp disp) 228 | (ecase (signed-width disp) 229 | (1 1) 230 | ((2 4) 2))) 231 | ((listp disp) 232 | (ecase (first disp) 233 | (:byte 1) 234 | (:half 2))))) 235 | 236 | (defun sib.scale-for-scale (scale) 237 | (ecase scale 238 | (1 0) 239 | (2 1) 240 | (4 2) 241 | (8 3))) 242 | 243 | (defun add-sib.index (insn index scale) 244 | (setf (oprinfo-sib.scale insn) (sib.scale-for-scale scale)) 245 | (if index 246 | (setf (oprinfo-sib.index insn) (register-number index)) 247 | (setf (oprinfo-sib.index insn) #x04))) 248 | 249 | (defun compute-disp.bytes (disp bytes) 250 | (or bytes 251 | (let ((sz (immediate-width disp))) 252 | (if (eql sz 2) 4 sz)))) 253 | 254 | (defun add-disp (insn disp type &optional bytes) 255 | (if (or (not (eql disp 0)) bytes) 256 | (let ((sz (compute-disp.bytes disp bytes))) 257 | (if (integerp disp) 258 | (progn 259 | (setf (oprinfo-disp insn) disp) 260 | (setf (oprinfo-disp.bytes insn) sz)) 261 | (progn 262 | (setf (oprinfo-disp insn) (second disp)) 263 | (setf (oprinfo-disp.bytes insn) sz) 264 | (setf (oprinfo-disp.rel-type insn) type) 265 | (setf (oprinfo-disp.rel-addn insn) (or (third disp) 0))))))) 266 | 267 | (defun add-mem-rest (insn base index scale) 268 | (if (or index (same-reg? base :rsp) (same-reg? base :r12)) 269 | (progn (setf (oprinfo-modrm.rm insn) #x04) 270 | (setf (oprinfo-sib.base insn) 271 | (or (register-number base) #x5)) 272 | (add-sib.index insn index scale)) 273 | (setf (oprinfo-modrm.rm insn) (register-number base)))) 274 | 275 | (defun add-modrm.mod-and-modrm.rm (insn mod rm) 276 | (setf (oprinfo-modrm.mod insn) mod) 277 | (setf (oprinfo-modrm.rm insn) rm)) 278 | 279 | (defun add-modrm.mod-only (insn mod) 280 | (setf (oprinfo-modrm.mod insn) mod)) 281 | 282 | (defun add-mem-operand (insn mem) 283 | (with-checks (mem? mem) 284 | (destructuring-bind (sz base index scale disp) mem 285 | (declare (ignore sz)) 286 | (unless (or (member base '(:rip :abs)) (register-number base)) 287 | (error 'encoding-error :form mem)) 288 | (cond 289 | ((eql base :rip) 290 | (add-modrm.mod-and-modrm.rm insn #x0 #x05) 291 | (add-disp insn disp :rel #x04)) 292 | ((eql base :abs) 293 | (add-modrm.mod-and-modrm.rm insn #x0 #x04) 294 | (setf (oprinfo-sib.base insn) #x05) 295 | (add-sib.index insn index scale) 296 | (add-disp insn disp :rel #x04)) 297 | ((and (or (same-reg? base :rbp) 298 | (same-reg? base :r13)) 299 | (eql disp 0)) 300 | (add-modrm.mod-only insn #x01) 301 | (add-disp insn disp :rel #x01) 302 | (add-mem-rest insn base index scale)) 303 | (t 304 | (add-modrm.mod-only insn (modrm.mod-for-disp disp)) 305 | (add-disp insn disp :rel) 306 | (add-mem-rest insn base index scale)))))) 307 | 308 | ; Syntax for defining instruction encoders. 309 | ; Encoder consists of sequences of clauses, each 310 | ; with two parts: a pattern, and a production. 311 | ; The pattern is a sequence of one or more of 312 | ; the following symbols: 313 | ; r8 rm8 r32 rm32 r64 rm64 imm8 imm32 imm64 s32 s64 314 | ; x xm32 xm64 xm128 315 | ; rX stands for a register of width X bits 316 | ; x stands for a vector register 317 | ; xmX stands for a vector register or memory 318 | ; operand of width X bits 319 | ; rmX stands for a register or memory operand 320 | ; of width X bits 321 | ; immX stands for an immediate of width X bits. 322 | ; sX stands for a symbolic immediate of width X bits. 323 | ; The product is a sequence of either integers, 324 | ; representing opcodes, or one or more of the 325 | ; following symbols: 326 | ; ib id iq cb cd /0 /1 /2 /3 /4 /5 /6 /7 /r /rm +r * 327 | ; * means that the instruction defaults to 64-bit, and needs 328 | ; no override prefix. It must be specified at the beginning. 329 | ; ib id and iq mean to follow the instruction 330 | ; with a 1, 4, or 8 byte immediate, respectively. 331 | ; /0 through /7 mean to specify that digit in modrm.reg 332 | ; /r means to use a regular modrm form, with modrm.reg as dest 333 | ; /rm means to use a regular modrm form, with modrm.rm as dest 334 | ; +r means to use a short form, adding the dest register to opcode 335 | ; The instruction width is determined by the form of the destination. 336 | ; The /0 through /7 /r /rm and +r terms are necessary to match 337 | ; the syntax of the processor reference manual, but are somewhat 338 | ; awkward to use programatically because they have multiple 339 | ; implications. These terms are transformed as follows: 340 | ; /0 through /7 -> /n /rm 341 | ; ib through iw -> ix 342 | ; /r -> /r /rm 343 | ; /rm -> /rm /r 344 | ; These terms are used as follows, mapped to corresponding operand 345 | ; /n -> set modrm.reg to subcode 346 | ; /rm -> add reg or mem operand to modrm.rm 347 | ; /r -> add reg parameter to modrm.reg 348 | ; ix -> add immediate operand 349 | ; cx -> add immediate operand (RIP-relative) 350 | 351 | (defun operand-matches? (opr constraint) 352 | (if (or (reg? constraint) (immediate? constraint)) 353 | (eql opr constraint) 354 | (ecase constraint 355 | (rm8 (or (byte-reg? opr) (byte-mem? opr))) 356 | (rm32 (or (half-reg? opr) (half-mem? opr))) 357 | (rm64 (or (word-reg? opr) (word-mem? opr))) 358 | (m8 (byte-mem? opr)) 359 | (m32 (half-mem? opr)) 360 | (m64 (word-mem? opr)) 361 | (m128 (wide-mem? opr)) 362 | (r8 (byte-reg? opr)) 363 | (r32 (half-reg? opr)) 364 | (r64 (word-reg? opr)) 365 | (x (xmm-reg? opr)) 366 | (xm32 (or (xmm-reg? opr) (half-mem? opr))) 367 | (xm64 (or (xmm-reg? opr) (word-mem? opr))) 368 | (xm128 (or (xmm-reg? opr) (wide-mem? opr))) 369 | (imm8 (byte-immediate? opr)) 370 | (imm16 (short-immediate? opr)) 371 | (imm32 (half-immediate? opr)) 372 | (imm64 (word-immediate? opr))))) 373 | 374 | (eval-when (:compile-toplevel :load-toplevel :execute) 375 | (defun operand-needs-override? (opr) 376 | (member opr '(rm64 r64 imm64))) 377 | 378 | (defun subcode-for-subcode-command (cmd) 379 | (case cmd 380 | (/0 0) 381 | (/1 1) 382 | (/2 2) 383 | (/3 3) 384 | (/4 4) 385 | (/5 5) 386 | (/6 6) 387 | (/7 7) 388 | (t nil))) 389 | 390 | (defun subcode-command? (cmd) 391 | (member cmd '(/0 /1 /2 /3 /4 /5 /6 /7))) 392 | 393 | (defun width-for-immediate-command (cmd) 394 | (case cmd 395 | ((ib cb) 1) 396 | (iw 2) 397 | ((id cd) 4) 398 | (iq 8) 399 | (t nil))) 400 | 401 | (defun rel-for-immediate-command (cmd) 402 | (case cmd 403 | ((ib iw id iq) :abs) 404 | ((cb cd) :bra))) 405 | 406 | (defun immediate-command? (cmd) 407 | (member cmd '(ib iw id iq cb cd))) 408 | 409 | (defun regularize-commands (cmds) 410 | (iter (for cmd in cmds) 411 | (cond 412 | ((subcode-command? cmd) 413 | (collect cmd) 414 | (collect '/rm)) 415 | ((immediate-command? cmd) 416 | (collect cmd)) 417 | ((eql cmd '/r) 418 | (collect '/r) 419 | (collect '/rm)) 420 | ((eql cmd '/rm) 421 | (collect '/rm) 422 | (collect '/r)) 423 | (t 424 | (collect cmd))))) 425 | 426 | (defun generate-operand-handlers (ocinfo oinfo cmds operands) 427 | (if (and cmds operands) 428 | (let ((cmd (car cmds)) 429 | (opr (car operands))) 430 | (flet ((advance () (generate-operand-handlers ocinfo oinfo (cdr cmds) 431 | (cdr operands))) 432 | (ignore () (generate-operand-handlers ocinfo oinfo (cdr cmds) 433 | operands))) 434 | (cond 435 | ((subcode-command? cmd) 436 | (cons `(add-opcode-extension ,oinfo 437 | ,(subcode-for-subcode-command cmd)) 438 | (ignore))) 439 | ((immediate-command? cmd) 440 | (cons `(add-immediate-operand ,oinfo ,opr 441 | ,(width-for-immediate-command cmd) 442 | ',(rel-for-immediate-command cmd)) 443 | (advance))) 444 | ((eql cmd '+r) 445 | (cons `(add-reg-operand ,oinfo ,opr 'op) (advance))) 446 | ((eql cmd '/r) 447 | (cons `(add-reg-operand ,oinfo ,opr 'reg) (advance))) 448 | ((eql cmd '/rm) 449 | (cons `(cond ((reg? ,opr) 450 | (add-reg-operand ,oinfo ,opr 'rm)) 451 | ((mem? ,opr) 452 | (add-mem-operand ,oinfo ,opr))) 453 | (advance))) 454 | ((eql cmd '*) 455 | (cons `(setf (ocinfo-override? ,ocinfo) nil) 456 | (ignore))) 457 | (t (ignore))))))) 458 | 459 | (defun find-first-non-prefix (ocs) 460 | (position (find-if-not #'(lambda (elt) 461 | (member elt *prefixes*)) 462 | ocs) 463 | ocs)) 464 | 465 | (defun collect-prefixes (ocs) 466 | (subseq ocs 0 (find-first-non-prefix ocs))) 467 | 468 | (defun collect-opcodes (ocs) 469 | (subseq ocs (find-first-non-prefix ocs) nil)) 470 | 471 | (defun generate-opcode-handlers (ocinfo cmds) 472 | (let* ((ocs (remove-if-not #'integerp cmds)) 473 | (pfxs (collect-prefixes ocs)) 474 | (opcodes (collect-opcodes ocs))) 475 | `(,@(mapcar #'(lambda (pfx) 476 | `(vector-push-extend ,pfx (ocinfo-prefixes ,ocinfo))) 477 | pfxs) 478 | ,@(mapcar #'(lambda (oc) 479 | `(vector-push-extend ,oc (ocinfo-opcodes ,ocinfo))) 480 | opcodes)))) 481 | 482 | ; note that this may latter be undone in the command handlers 483 | (defun maybe-generate-override-setter (ocinfo constraints) 484 | (if (some #'operand-needs-override? constraints) 485 | `(setf (ocinfo-override? ,ocinfo) t) 486 | `(progn))) 487 | 488 | (defun transform-production (pattern production operands) 489 | (let ((cmds (regularize-commands production)) 490 | (oprinfo (gensym)) 491 | (ocinfo (gensym))) 492 | `(let ((,oprinfo (make-oprinfo)) 493 | (,ocinfo (new-ocinfo))) 494 | ,(maybe-generate-override-setter ocinfo pattern) 495 | ,@(generate-operand-handlers ocinfo oprinfo cmds operands) 496 | ,@(generate-opcode-handlers ocinfo cmds) 497 | (values ,ocinfo ,oprinfo)))) 498 | 499 | (defun transform-constraint (constraint operand) 500 | `(operand-matches? ,operand ',constraint)) 501 | 502 | (defun transform-clause (clause operands) 503 | (let ((pattern (car clause)) 504 | (production (cadr clause))) 505 | `((and ,@(mapcar #'transform-constraint pattern operands)) 506 | ,(transform-production pattern production operands))))) 507 | 508 | (defmacro define-encoder (insn operands &body body) 509 | (let ((name (prefixsym "ENCODE-" insn))) 510 | (push (list (intern (symbol-name insn) "KEYWORD") body) *encoders*) 511 | `(defun ,name ,operands 512 | (cond ,@(mapcar #'(lambda (clause) 513 | (transform-clause clause operands)) 514 | body))))) 515 | 516 | (defun register-low-part (reg) 517 | (if (integerp reg) (logand reg #x7))) 518 | 519 | (defun req-rex-bit (&rest regs) 520 | (let ((vals (iter (for reg in regs) 521 | (if (and (integerp reg) (> reg 7)) 522 | (collect 1) 523 | (collect 0))))) 524 | (apply #'max vals))) 525 | 526 | (defun maybe-emit-rex (ln ocinfo oprinfo) 527 | (with-slots ((reg modrm.reg) 528 | (rm modrm.rm) 529 | (index sib.index) 530 | (base sib.base) 531 | (ext oc.ext)) oprinfo 532 | (let ((rex (compose-rex (if (ocinfo-override? ocinfo) 1 0) 533 | (req-rex-bit reg) 534 | (req-rex-bit index) 535 | (req-rex-bit base rm ext)))) 536 | (if (not (eql rex #x40)) 537 | (emit-byte ln rex))))) 538 | 539 | (defun maybe-emit-prefixes (ln ocinfo) 540 | (iter (for pfx in-vector (ocinfo-prefixes ocinfo)) 541 | (emit-byte ln pfx))) 542 | 543 | (defun emit-opcode-maybe-extended (ln opc oprinfo) 544 | (emit-byte ln (+ opc (register-low-part (or (oprinfo-oc.ext oprinfo) 0))))) 545 | 546 | (defun emit-opcodes (ln ocinfo oprinfo) 547 | (if (eql (elt (ocinfo-opcodes ocinfo) 0) #x0F) 548 | (progn 549 | (emit-byte ln #x0F) 550 | (emit-opcode-maybe-extended ln (elt (ocinfo-opcodes ocinfo) 1) oprinfo)) 551 | (emit-opcode-maybe-extended ln (elt (ocinfo-opcodes ocinfo) 0) oprinfo))) 552 | 553 | (defun maybe-emit-modrm (ln oprinfo) 554 | (with-slots ((mod modrm.mod) (reg modrm.reg) (rm modrm.rm)) oprinfo 555 | (and mod reg rm 556 | (emit-byte ln (compose-modrm mod 557 | (register-low-part reg) 558 | (register-low-part rm)))))) 559 | 560 | (defun maybe-emit-sib (ln oprinfo) 561 | (with-slots ((scale sib.scale) (index sib.index) (base sib.base)) oprinfo 562 | (and scale index base 563 | (emit-byte ln (compose-sib scale 564 | (register-low-part index) 565 | (register-low-part base)))))) 566 | 567 | (defun do-emit-disp-or-imm (ln disp-or-imm bytes type addn) 568 | (when (and disp-or-imm bytes) 569 | (if (integerp disp-or-imm) 570 | (emit-bytes ln disp-or-imm bytes) 571 | (progn 572 | (emit-reloc ln disp-or-imm bytes type) 573 | (emit-bytes ln addn bytes))))) 574 | 575 | (defun maybe-emit-disp (ln oprinfo) 576 | (with-slots ((disp disp) (bytes disp.bytes)) oprinfo 577 | (do-emit-disp-or-imm ln disp bytes (oprinfo-disp.rel-type oprinfo) 578 | (oprinfo-disp.rel-addn oprinfo)))) 579 | 580 | (defun maybe-emit-imm (ln oprinfo) 581 | (with-slots ((imm imm) (bytes imm.bytes)) oprinfo 582 | (do-emit-disp-or-imm ln imm bytes (oprinfo-imm.rel-type oprinfo) 583 | (oprinfo-imm.rel-addn oprinfo)))) 584 | 585 | (defun encode-instruction (ln ocinfo oprinfo) 586 | (maybe-emit-prefixes ln ocinfo) 587 | (maybe-emit-rex ln ocinfo oprinfo) 588 | (emit-opcodes ln ocinfo oprinfo) 589 | (maybe-emit-modrm ln oprinfo) 590 | (maybe-emit-sib ln oprinfo) 591 | (maybe-emit-disp ln oprinfo) 592 | (maybe-emit-imm ln oprinfo)) 593 | 594 | (defun do-encode (ln fun args) 595 | (multiple-value-bind (ocinfo oprinfo) 596 | (apply fun args) 597 | (encode-instruction ln ocinfo oprinfo))) 598 | 599 | (defun encode-insn (insn ln) 600 | (handler-case 601 | (let ((fun (prefixsym "ENCODE-" (car insn) "AMD64-ASM"))) 602 | (do-encode ln (symbol-function fun) (cdr insn))) 603 | (assertion-failed (as) (error 'assertion-failed :form insn 604 | :check (assertion-failed-check as))) 605 | (condition (condition) (declare (ignore condition)) 606 | (error 'encoding-error :form insn)))) 607 | 608 | ; Encoders for general 8/32/64-bit integer instructions 609 | 610 | ; instructions not encoded 611 | ; aaa, aad, aam, aas, bound, call (far), cbw, cwde, 612 | ; cdqe, cwd, cdq, cqo, cmov, cmps, cmps, cmpsw, cmpsd, cmpsq, 613 | ; daa, das, enter, in, ins, insb, insw, insd, into, jcx, jecx, jrcx, 614 | ; lahf, lds, les, lfs, lgs, lss, lfence, lods, lodsb, lodsw, lodsd, 615 | ; lodsq, loop, loope, loopne, loopnz, loopz, mfence, movs, movsb, 616 | ; movsw, movsd, movsq, outs, outsb, outsw, outsd, popa, popad, popf, 617 | ; popa, popad, popf, popfd, popfq, prefetch, prefetchw, pusha, pushad, 618 | ; pushf, pushfd, ret (far), sahf, scas, scasb, scasw, scasd, scasq, 619 | ; sfence, shld, shrd, std, stos, stosb, stosw, stosd, stosq, xlat, xlatb 620 | 621 | (defmacro define-type0-encoder (name base subcode) 622 | (let ((base1 base) 623 | (base2 (+ base 1)) 624 | (base3 (+ base 2)) 625 | (base4 (+ base 3))) 626 | `(define-encoder ,name (dest source) 627 | ((rm8 imm8) (#x80 ,subcode ib)) 628 | ((rm32 imm8) (#x83 ,subcode ib)) 629 | ((rm64 imm8) (#x83 ,subcode ib)) 630 | ((rm32 imm32) (#x81 ,subcode id)) 631 | ((rm64 imm32) (#x81 ,subcode id)) 632 | ((rm8 r8) (,base1 /rm)) 633 | ((rm32 r32) (,base2 /rm)) 634 | ((rm64 r64) (,base2 /rm)) 635 | ((r8 rm8) (,base3 /r)) 636 | ((r32 rm32) (,base4 /r)) 637 | ((r64 rm64) (,base4 /r))))) 638 | 639 | (defmacro define-type1-encoder (name base code) 640 | (let ((base1 base) 641 | (base2 (+ base 1))) 642 | `(define-encoder ,name (dest) 643 | ((rm8) (,base1 ,code)) 644 | ((rm32) (,base2 ,code)) 645 | ((rm64) (,base2 ,code))))) 646 | 647 | (defmacro define-type2-encoder (name subcode) 648 | `(define-encoder ,name (dest source) 649 | ((rm8 1) (#xD0 ,subcode)) 650 | ((rm32 1) (#xD1 ,subcode)) 651 | ((rm64 1) (#xD1 ,subcode)) 652 | ((rm8 imm8) (#xC0 ,subcode ib)) 653 | ((rm32 imm8) (#xC1 ,subcode ib)) 654 | ((rm64 imm8) (#xC1 ,subcode ib)))) 655 | 656 | (defmacro define-type3-encoder (name &rest opcodes) 657 | `(define-encoder ,name () 658 | (() (,@opcodes)))) 659 | 660 | (defmacro define-type4-encoder (name base1 base2 code) 661 | `(define-encoder ,name (dest source) 662 | ((rm32 r32) (#x0F ,base1 /rm)) 663 | ((rm64 r64) (#x0F ,base1 /rm)) 664 | ((rm32 imm8) (#x0F ,base2 ,code ib)) 665 | ((rm64 imm8) (#x0F ,base2 ,code ib)))) 666 | 667 | (defmacro define-type5-encoder (name code) 668 | `(define-encoder ,name (dest count) 669 | ((rm8 1) (#xD0 ,code)) 670 | ((rm8 :cl) (#xD2 ,code)) 671 | ((rm8 imm8) (#xC0 ,code ib)) 672 | ((rm32 1) (#xD1 ,code)) 673 | ((rm32 :cl) (#xD3 ,code)) 674 | ((rm32 imm8) (#xC1 ,code ib)) 675 | ((rm64 1) (#xD1 ,code)) 676 | ((rm64 :cl) (#xD3 ,code)) 677 | ((rm64 imm8) (#xC1 ,code ib)))) 678 | 679 | (define-type0-encoder add #x00 /0) 680 | (define-type0-encoder adc #x10 /2) 681 | (define-type0-encoder and #x20 /4) 682 | (define-type0-encoder xor #x30 /6) 683 | (define-type0-encoder or #x08 /1) 684 | (define-type0-encoder sbb #x18 /3) 685 | (define-type0-encoder sub #x28 /5) 686 | (define-type0-encoder cmp #x38 /7) 687 | 688 | (define-encoder bsf (dest source) 689 | ((r32 rm32) (#x0F #xBC /r)) 690 | ((r64 rm64) (#x0F #xBC /r))) 691 | 692 | (define-encoder bsr (dest source) 693 | ((r32 rm32) (#x0F #xBD /r)) 694 | ((r64 rm64) (#x0F #xBD /r))) 695 | 696 | (define-encoder bswap (dest) 697 | ((r32) (#x0F #xC8 +r)) 698 | ((r64) (#x0F #xC8 +r))) 699 | 700 | (define-type4-encoder bt #xA3 #xBA /4) 701 | (define-type4-encoder btc #xBB #xBA /7) 702 | (define-type4-encoder btr #xB3 #xBA /6) 703 | (define-type4-encoder bts #xAB #xBA /5) 704 | 705 | (define-encoder call (target) 706 | ((imm32) (#xE8 cd)) 707 | ((rm64) (* #xFF /2))) 708 | 709 | (define-type3-encoder clc #xF8) 710 | 711 | (define-encoder clflush (addr) 712 | ((m8) (#x0F #xAE /7))) 713 | 714 | (define-type3-encoder cmc #xF5) 715 | 716 | (defmacro define-cmovcc-encoders () 717 | `(progn ,@(iter (for oc from #x40 to #x4F) 718 | (for insn in '(cmovo cmovno cmovb cmovnb 719 | cmovz cmovnz cmovbe cmovnbe 720 | cmovs cmovns cmovp cmovnp 721 | cmovl cmovge cmovle cmovg)) 722 | (collect 723 | `(define-encoder ,insn (dest source) 724 | ((r32 rm32) (#x0F ,oc /r)) 725 | ((r64 rm64) (#x0F ,oc /r))))))) 726 | 727 | (define-cmovcc-encoders) 728 | 729 | (define-encoder cmpxchg (dest source) 730 | ((rm8 r8) (#x0F #xB0 /rm)) 731 | ((rm32 r32) (#x0F #xB1 /rm)) 732 | ((rm64 r64) (#x0F #xB1 /rm))) 733 | 734 | (define-type3-encoder cpuid #x0F #xA2) 735 | 736 | (define-type1-encoder dec #xFE /1) 737 | (define-type1-encoder div #xF6 /6) 738 | (define-type1-encoder idiv #xF6 /7) 739 | (define-type1-encoder inc #xFE /0) 740 | (define-type1-encoder mul #xF6 /4) 741 | (define-type1-encoder neg #xF6 /3) 742 | (define-type1-encoder not #xF6 /2) 743 | 744 | (define-encoder imul (dest source) 745 | ((r32 rm32) (#x0F #xAF /r)) 746 | ((r64 rm64) (#x0F #xAF /r))) 747 | 748 | (define-encoder imul3 (dest source scale) 749 | ((r32 rm32 imm8) (#x6B /r ib)) 750 | ((r64 rm64 imm8) (#x6B /r ib)) 751 | ((r32 rm32 imm32) (#x69 /r id)) 752 | ((r64 rm64 imm32) (#x69 /r id))) 753 | 754 | (define-encoder int (idx) 755 | ((imm8) (#xCD ib))) 756 | 757 | (defmacro define-jcc-encoders () 758 | `(progn ,@(iter (for oc from #x70 to #x7F) 759 | (for oc2 from #x80 to #x8F) 760 | (for insn in '(jo jno jb jnb 761 | jz jnz jbe jnbe 762 | js jns jp jnp 763 | jl jge jle jg)) 764 | (collect 765 | `(define-encoder ,insn (offset) 766 | ((imm8) (,oc cb)) 767 | ((imm32) (#x0F ,oc2 cd))))))) 768 | 769 | (define-jcc-encoders) 770 | 771 | (define-encoder jmp (target) 772 | ((imm8) (#xEB cb)) 773 | ((imm32) (#xE9 cd)) 774 | ((rm64) (* #xFF /4))) 775 | 776 | (define-type3-encoder leave #xC9) 777 | 778 | (define-encoder mov (dest source) 779 | ((rm8 r8) (#x88 /rm)) 780 | ((rm32 r32) (#x89 /rm)) 781 | ((rm64 r64) (#x89 /rm)) 782 | ((r8 rm8) (#x8A /r)) 783 | ((r32 rm32) (#x8B /r)) 784 | ((r64 rm64) (#x8B /r)) 785 | ((r8 imm8) (#xB0 +r ib)) 786 | ((r32 imm32) (#xB8 +r id)) 787 | ((rm64 imm32) (#xC7 /0 id)) 788 | ((r64 imm64) (#xB8 +r iq)) 789 | ((rm8 imm8) (#xC6 /0 ib)) 790 | ((rm32 imm32) (#xC7 /0 id))) 791 | 792 | (define-encoder movnti (dest source) 793 | ((m32 r32) (#x0F #xC3 /rm)) 794 | ((m64 r64) (#x0F #xC3 /rm))) 795 | 796 | (define-encoder movsx (dest source) 797 | ((r32 rm8) (#x0F #xBE /r)) 798 | ((r64 rm8) (#x0F #xBE /r))) 799 | 800 | (define-encoder movsxd (dest source) 801 | ((r64 rm32) (#x63 /r))) 802 | 803 | (define-encoder movzx (dest source) 804 | ((r32 rm8) (#x0F #xB6 /r)) 805 | ((r64 rm8) (#x0F #xB6 /r))) 806 | 807 | (define-type3-encoder nop #x90) 808 | (define-type3-encoder pause #xF3 #x90) 809 | 810 | (define-encoder pop (dest) 811 | ((r64) (* #x58 +r)) 812 | ((rm64) (* #x8F /0))) 813 | 814 | (define-encoder push (source) 815 | ((r64) (* #x50 +r)) 816 | ((rm64) (* #xFF /6)) 817 | ((imm8) (#x6A ib)) 818 | ((imm32) (#x68 id))) 819 | 820 | (define-type5-encoder rcl /2) 821 | (define-type5-encoder rcr /3) 822 | 823 | (define-type3-encoder ret #xC3) 824 | 825 | (define-type5-encoder rol /0) 826 | (define-type5-encoder ror /1) 827 | 828 | (define-encoder retn (bytes) 829 | ((imm16) (#xC2 iw))) 830 | 831 | (defmacro define-setcc-encoders () 832 | `(progn ,@(iter (for oc from #x90 to #x9F) 833 | (for insn in '(seto setno setb setnb 834 | setz setnz setbe setnbe 835 | sets setns setp setnp 836 | setl setge setle setg)) 837 | (collect 838 | `(define-encoder ,insn (offset) 839 | ((rm8) (#x0F ,oc /2))))))) 840 | 841 | (define-setcc-encoders) 842 | 843 | (define-type2-encoder sal /4) 844 | (define-type2-encoder sar /7) 845 | (define-type2-encoder shr /5) 846 | 847 | (define-type3-encoder stc #xF9) 848 | 849 | (define-encoder test (dest source) 850 | ((rm8 imm8) (#xF6 /0 ib)) 851 | ((rm32 imm32) (#xF7 /0 id)) 852 | ((rm64 imm32) (#xF7 /0 id)) 853 | ((rm8 r8) (#x84 /rm)) 854 | ((rm32 r32) (#x85 /rm)) 855 | ((rm64 r64) (#x85 /rm))) 856 | 857 | (define-encoder xchg (dest source) 858 | ((rm8 r8) (#x86 /rm)) 859 | ((r8 rm8) (#x86 /r)) 860 | ((rm32 r32) (#x87 /rm)) 861 | ((r32 rm32) (#x87 /r)) 862 | ((rm64 r64) (#x87 /rm)) 863 | ((r64 rm64) (#x87 /r))) 864 | 865 | (define-encoder xadd (dest source) 866 | ((rm8 r8) (#x0F #xC0 /rm)) 867 | ((rm32 r32) (#x0F #xC1 /rm)) 868 | ((rm64 r64) (#x0F #xC1 /rm))) 869 | 870 | ; Man there are a lot of SSE instructions 871 | ; The choice of xm32/xm64/xm128 is seemingly random. 872 | ; The specifier chosen is the one that makes yasm happy. 873 | 874 | (defmacro define-x-encoder (name &rest opcodes) 875 | `(define-encoder ,name (dest source) 876 | ((x x) (,@opcodes /r)))) 877 | 878 | (defmacro define-xm128-encoder (name &rest opcodes) 879 | `(define-encoder ,name (dest source) 880 | ((x xm128) (,@opcodes /r)))) 881 | 882 | (defmacro define-xm64-encoder (name &rest opcodes) 883 | `(define-encoder ,name (dest source) 884 | ((x xm64) (,@opcodes /r)))) 885 | 886 | (defmacro define-xm32-encoder (name &rest opcodes) 887 | `(define-encoder ,name (dest source) 888 | ((x xm32) (,@opcodes /r)))) 889 | 890 | (defmacro define-cmp-encoder (name &rest opcodes) 891 | `(define-encoder ,name (dest source cmp) 892 | ((x xm128 imm8) (,@opcodes /r ib)))) 893 | 894 | (defmacro define-rx64-encoder (name &rest opcodes) 895 | `(define-encoder ,name (dest source) 896 | ((r32 xm64) (,@opcodes /r)) 897 | ((r64 xm64) (,@opcodes /r)))) 898 | 899 | (defmacro define-rx32-encoder (name &rest opcodes) 900 | `(define-encoder ,name (dest source) 901 | ((r32 xm32) (,@opcodes /r)) 902 | ((r64 xm32) (,@opcodes /r)))) 903 | 904 | (defmacro define-rx-encoder (name &rest opcodes) 905 | `(define-encoder ,name (dest source) 906 | ((r32 xm32) (,@opcodes /r)) 907 | ((r64 xm64) (,@opcodes /r)))) 908 | 909 | (defmacro define-xr-encoder (name &rest opcodes) 910 | `(define-encoder ,name (dest source) 911 | ((x rm32) (,@opcodes /r)) 912 | ((x rm64) (,@opcodes /r)))) 913 | 914 | (defmacro define-shift0-encoder (name code1 code2 sub) 915 | `(define-encoder ,name (dest shift) 916 | ((x xm128) (#x66 #x0F ,code1 /r)) 917 | ((x imm8) (#x66 #x0F ,code2 ,sub ib)))) 918 | 919 | (defmacro define-shift1-encoder (name code sub) 920 | `(define-encoder ,name (dest shift) 921 | ((x imm8) (#x66 #x0F ,code ,sub ib)))) 922 | 923 | (defmacro define-mov1-encoder (name opcodes1 opcodes2) 924 | `(define-encoder ,name (dest source) 925 | ((x m64) (,@opcodes1 /r)) 926 | ((m64 x) (,@opcodes2 /rm)))) 927 | 928 | (defmacro define-mov2-encoder (name &rest opcodes) 929 | `(define-encoder ,name (dest source) 930 | ((r32 x) (,@opcodes /r)))) 931 | 932 | (defmacro define-mov3-encoder (name &rest opcodes) 933 | `(define-encoder ,name (dest source) 934 | ((m128 x) (,@opcodes /rm)))) 935 | 936 | (defmacro define-mov0-128-encoder (name opcodes1 opcodes2) 937 | `(define-encoder ,name (dest source) 938 | ((x xm128) (,@opcodes1 /r)) 939 | ((xm128 x) (,@opcodes2 /rm)))) 940 | 941 | (defmacro define-mov0-64-encoder (name opcodes1 opcodes2) 942 | `(define-encoder ,name (dest source) 943 | ((x xm64) (,@opcodes1 /r)) 944 | ((xm64 x) (,@opcodes2 /rm)))) 945 | 946 | (defmacro define-mov0-32-encoder (name opcodes1 opcodes2) 947 | `(define-encoder ,name (dest source) 948 | ((x xm32) (,@opcodes1 /r)) 949 | ((xm32 x) (,@opcodes2 /rm)))) 950 | 951 | (define-xm128-encoder addpd #x66 #x0F #x58) 952 | (define-xm128-encoder addps #x0F #x58) 953 | (define-xm128-encoder addsd #xF2 #x0F #x58) 954 | (define-xm128-encoder addss #xF3 #x0F #x58) 955 | 956 | (define-xm128-encoder addsubpd #x66 #x0F #xD0) 957 | (define-xm128-encoder addsubps #xF2 #x0F #xD0) 958 | 959 | (define-xm128-encoder andnpd #x66 #x0F #x55) 960 | (define-xm128-encoder andnps #x0F #x55) 961 | (define-xm128-encoder andpd #x66 #x0F #x54) 962 | (define-xm128-encoder andps #x0F #x54) 963 | 964 | (define-cmp-encoder cmppd #x66 #x0F #xC2) 965 | (define-cmp-encoder cmpps #x0F #xC2) 966 | (define-cmp-encoder cmpsd #xF2 #x0F #xC2) 967 | (define-cmp-encoder cmpss #xF3 #x0F #xC2) 968 | 969 | (define-xm128-encoder comisd #x66 #x0F #x2F) 970 | (define-xm128-encoder comiss #x0F #x2F) 971 | 972 | (define-xm64-encoder cvtdq2pd #xF3 #x0F #xE6) 973 | (define-xm128-encoder cvtdq2ps #x0F #x5B) 974 | (define-xm128-encoder cvtpd2dq #xF2 #x0F #xE6) 975 | 976 | ; cvtpd2pi 977 | 978 | (define-xm128-encoder cvtpd2ps #x66 #x0F #x5A) 979 | 980 | ; cvtpi2pd 981 | ; cvtpi2ps 982 | 983 | (define-xm128-encoder cvtps2dq #x66 #x0F #x5B) 984 | (define-xm64-encoder cvtps2pd #x0F #x5A) 985 | 986 | ; cvtps2pi 987 | 988 | (define-rx64-encoder cvtsd2si #xF2 #x0F #x2D) 989 | (define-xm64-encoder cvtsd2ss #xF2 #x0F #x5A) 990 | (define-xr-encoder cvtsi2sd #xF2 #x0F #x2A) 991 | (define-xr-encoder cvtsi2ss #xF3 #x0F #x2A) 992 | (define-xm32-encoder cvtss2sd #xF3 #x0F #x5A) 993 | (define-rx32-encoder cvtss2si #xF3 #x0F #x2D) 994 | (define-xm128-encoder cvttpd2dq #x66 #x0F #xE6) 995 | 996 | ; cvtpd2pi 997 | 998 | (define-xm128-encoder cvttps2dq #xF3 #x0F #x5b) 999 | 1000 | ; cvttpd2pi 1001 | 1002 | (define-rx64-encoder cvttsd2si #xF2 #x0F #x2C) 1003 | (define-rx32-encoder cvttss2si #xF3 #x0F #x2C) 1004 | 1005 | (define-xm128-encoder divpd #x66 #x0F #x5E) 1006 | (define-xm128-encoder divps #x0F #x5E) 1007 | (define-xm128-encoder divsd #xF2 #x0F #x5E) 1008 | (define-xm128-encoder divss #xF3 #x0F #x5E) 1009 | 1010 | ; fxrstor 1011 | ; fxsave 1012 | 1013 | (define-xm128-encoder haddpd #x66 #x0F #x7C) 1014 | (define-xm128-encoder haddps #xF2 #x0F #x7C) 1015 | 1016 | (define-xm128-encoder hsubpd #x66 #x0F #x7D) 1017 | (define-xm128-encoder hsubps #xF2 #x0F #x7D) 1018 | 1019 | ; lddqu 1020 | 1021 | (define-encoder ldmxcsr (source) 1022 | ((m32) (#x0F #xAE /2))) 1023 | 1024 | (define-x-encoder maskmovdqu #x66 #x0F #xF7) 1025 | 1026 | (define-xm128-encoder maxpd #x66 #x0F #x5F) 1027 | (define-xm128-encoder maxps #x0F #x5F) 1028 | (define-xm128-encoder maxsd #xF2 #x0F #x5F) 1029 | (define-xm128-encoder maxss #xF3 #x0F #x5F) 1030 | 1031 | (define-xm128-encoder minpd #x66 #x0F #x5D) 1032 | (define-xm128-encoder minps #x0F #x5D) 1033 | (define-xm128-encoder minsd #xF2 #x0F #x5D) 1034 | (define-xm128-encoder minss #xF3 #x0F #x5D) 1035 | 1036 | (define-mov0-128-encoder movapd (#x66 #x0F #x28) (#x66 #x0F #x29)) 1037 | (define-mov0-128-encoder movaps (#x0F #x28) (#x0F #x29)) 1038 | 1039 | (define-encoder movd (dest source) 1040 | ((x rm32) (#x66 #x0F #x6E /r)) 1041 | ((x rm64) (#x66 #x0F #x6E /r)) 1042 | ((rm32 x) (#x66 #x0F #x7E /rm)) 1043 | ((rm64 x) (#x66 #x0F #x7E /rm))) 1044 | 1045 | (define-xm64-encoder movddup #xF2 #x0F #x12) 1046 | 1047 | ; movdq2q 1048 | 1049 | (define-mov0-128-encoder movdqa (#x66 #x0F #x6F) (#x66 #x0F #x7F)) 1050 | (define-mov0-128-encoder movdqu (#xF3 #x0F #x6F) (#xF3 #x0F #x7F)) 1051 | (define-x-encoder movhlps #x0F #x12) 1052 | (define-mov1-encoder movhpd (#x66 #x0F #x16) (#x66 #x0F #x17)) 1053 | (define-mov1-encoder movhps (#x0F #x16) (#x0F #x17)) 1054 | (define-x-encoder movlhps #x0F #x16) 1055 | (define-mov1-encoder movlpd (#x66 #x0F #x12) (#x66 #x0F #x13)) 1056 | (define-mov1-encoder movlps (#x0F #x12) (#x0F #x13)) 1057 | 1058 | (define-mov2-encoder movmskpd #x66 #x0F #x50) 1059 | (define-mov2-encoder movmskps #x0F #x50) 1060 | 1061 | (define-mov3-encoder movntdq #x66 #x0F #xE7) 1062 | (define-mov3-encoder movntpd #x66 #x0F #x2B) 1063 | (define-mov3-encoder movntps #x0F #x2B) 1064 | 1065 | (define-mov0-64-encoder movq (#xF3 #x0F #x7E) (#x66 #x0F #xD6)) 1066 | 1067 | ; movq2dq 1068 | 1069 | (define-mov0-64-encoder movsd (#xF2 #x0F #x10) (#xF2 #x0F #x11)) 1070 | (define-xm128-encoder movshdup #xF3 #x0F #x16) 1071 | (define-xm128-encoder movsldup #xF3 #x0F #x12) 1072 | (define-mov0-32-encoder movss (#xF3 #x0F #x10) (#xF3 #x0F #x11)) 1073 | (define-mov0-128-encoder movupd (#x66 #x0F #x10) (#x66 #x0F #x11)) 1074 | (define-mov0-128-encoder movups (#x0F #x10) (#x0F #x11)) 1075 | 1076 | (define-xm128-encoder mulpd #x66 #x0F #x59) 1077 | (define-xm128-encoder mulps #x0F #x59) 1078 | (define-xm128-encoder mulsd #xF2 #x0F #x59) 1079 | (define-xm128-encoder mulss #xF3 #x0F #x59) 1080 | 1081 | (define-xm128-encoder orpd #x66 #x0F #x56) 1082 | (define-xm128-encoder orps #x0F #x56) 1083 | 1084 | (define-xm128-encoder packssdw #x66 #x0F #x6B) 1085 | (define-xm128-encoder packsswb #x66 #x0F #x63) 1086 | (define-xm128-encoder packuswb #x66 #x0F #x67) 1087 | 1088 | (define-xm128-encoder paddb #x66 #x0F #xFC) 1089 | (define-xm128-encoder paddd #x66 #x0F #xFE) 1090 | (define-xm128-encoder paddq #x66 #x0F #xD4) 1091 | (define-xm128-encoder paddsb #x66 #x0F #xEC) 1092 | (define-xm128-encoder paddsw #x66 #x0F #xED) 1093 | (define-xm128-encoder paddusb #x66 #x0F #xDC) 1094 | (define-xm128-encoder paddusw #x66 #x0F #xDD) 1095 | (define-xm128-encoder paddw #x66 #x0F #xFD) 1096 | 1097 | (define-xm128-encoder pand #x66 #x0F #xDB) 1098 | (define-xm128-encoder pandn #x66 #x0F #xDF) 1099 | 1100 | (define-xm128-encoder pavgb #x66 #x0F #xE0) 1101 | (define-xm128-encoder pavgw #x66 #x0F #xE3) 1102 | 1103 | (define-xm128-encoder pcmpeqb #x66 #x0F #x74) 1104 | (define-xm128-encoder pcmpeqd #x66 #x0F #x76) 1105 | (define-xm128-encoder pcmpeqw #x66 #x0F #x75) 1106 | (define-xm128-encoder pcmpgtb #x66 #x0F #x64) 1107 | (define-xm128-encoder pcmpgtd #x66 #x0F #x66) 1108 | (define-xm128-encoder pcmpgtw #x66 #x0F #x65) 1109 | 1110 | (define-encoder pextrw (dest source sel) 1111 | ((r32 x imm8) (#x66 #x0F #xC5 /r ib))) 1112 | 1113 | (define-encoder pinsrw (dest source sel) 1114 | ((x rm32 imm8) (#x66 #x0F #xC4 /r ib))) 1115 | 1116 | (define-xm128-encoder pmaddwd #x66 #x0F #xF5) 1117 | 1118 | (define-xm128-encoder pmaxsw #x66 #x0F #xEE) 1119 | (define-xm128-encoder pmaxub #x66 #x0F #xDE) 1120 | 1121 | (define-xm128-encoder pminsw #x66 #x0F #xEA) 1122 | (define-xm128-encoder pminub #x66 #x0F #xDA) 1123 | 1124 | (define-mov2-encoder pmovmskb #x66 #x0F #xD7) 1125 | 1126 | (define-xm128-encoder pmulhuw #x66 #x0F #xE4) 1127 | (define-xm128-encoder pmulhw #x66 #x0F #xE5) 1128 | (define-xm128-encoder pmullw #x66 #x0F #xD5) 1129 | (define-xm128-encoder pmuludq #x66 #x0F #xF4) 1130 | 1131 | (define-xm128-encoder por #x66 #x0F #xEB) 1132 | 1133 | (define-xm128-encoder psadbw #x66 #x0F #xF6) 1134 | 1135 | (define-cmp-encoder pshufd #x66 #x0F #x70) 1136 | (define-cmp-encoder pshufhw #xF3 #x0F #x70) 1137 | (define-cmp-encoder pshuflw #xF2 #x0F #x70) 1138 | 1139 | (define-shift0-encoder pslld #xF2 #x72 /6) 1140 | (define-shift1-encoder pslldq #x73 /7) 1141 | (define-shift0-encoder psllq #xF3 #x73 /6) 1142 | (define-shift0-encoder psllw #xF1 #x71 /6) 1143 | (define-shift0-encoder psrad #xE2 #x72 /4) 1144 | (define-shift0-encoder psraw #xE1 #x71 /4) 1145 | (define-shift0-encoder psrld #xD2 #x72 /2) 1146 | (define-shift1-encoder psrldq #x73 /3) 1147 | (define-shift0-encoder psrlq #xD3 #x73 /2) 1148 | (define-shift0-encoder psrlw #xD1 #x71 /2) 1149 | 1150 | (define-xm128-encoder psubb #x66 #x0F #xF8) 1151 | (define-xm128-encoder psubd #x66 #x0F #xFA) 1152 | (define-xm128-encoder psubq #x66 #x0F #xFB) 1153 | (define-xm128-encoder psubsb #x66 #x0F #xE8) 1154 | (define-xm128-encoder psubsw #x66 #x0F #xE9) 1155 | (define-xm128-encoder psubusb #x66 #x0F #xD8) 1156 | (define-xm128-encoder psubusw #x66 #x0F #xD9) 1157 | (define-xm128-encoder psubw #x66 #x0F #xF9) 1158 | 1159 | (define-xm128-encoder punpckhbw #x66 #x0F #x68) 1160 | (define-xm128-encoder punpckhdq #x66 #x0F #x6A) 1161 | (define-xm128-encoder punpckhqdq #x66 #x0F #x6D) 1162 | (define-xm128-encoder punpckhwd #x66 #x0F #x69) 1163 | (define-xm128-encoder punpcklbw #x66 #x0F #x60) 1164 | (define-xm128-encoder punpckldq #x66 #x0F #x62) 1165 | (define-xm128-encoder punpcklqdq #x66 #x0F #x6C) 1166 | (define-xm128-encoder punpcklwd #x66 #x0F #x61) 1167 | 1168 | (define-xm128-encoder pxor #x66 #x0F #xEF) 1169 | 1170 | (define-xm128-encoder rcpps #x0F #x53) 1171 | (define-xm128-encoder rcpss #xF3 #x0F #x53) 1172 | (define-xm128-encoder rsqrtps #x0F #x52) 1173 | (define-xm128-encoder rsqrtss #xF3 #x0F #x52) 1174 | 1175 | (define-cmp-encoder shufpd #x66 #x0F #xC6) 1176 | (define-cmp-encoder shufps #x0F #xC6) 1177 | 1178 | (define-xm128-encoder sqrtpd #x66 #x0F #x51) 1179 | (define-xm128-encoder sqrtps #x0F #x51) 1180 | (define-xm128-encoder sqrtsd #xF2 #x0F #x51) 1181 | (define-xm128-encoder sqrtss #xF3 #x0F #x51) 1182 | 1183 | (define-encoder stmxcsr (dest) 1184 | ((m32) (#x0F #xAE /3))) 1185 | 1186 | (define-xm128-encoder subpd #x66 #x0F #x5C) 1187 | (define-xm128-encoder subps #x0F #x5C) 1188 | (define-xm128-encoder subsd #xF2 #x0F #x5C) 1189 | (define-xm128-encoder subss #xF3 #x0F #x5C) 1190 | 1191 | (define-xm128-encoder ucomisd #x66 #x0F #x2E) 1192 | (define-xm128-encoder ucomiss #x0F #x2E) 1193 | 1194 | (define-xm128-encoder unpckhpd #x66 #x0F #x15) 1195 | (define-xm128-encoder unpckhps #x0F #x15) 1196 | (define-xm128-encoder unpcklpd #x66 #x0F #x14) 1197 | (define-xm128-encoder unpcklps #x0F #x14) 1198 | 1199 | (define-xm128-encoder xorpd #x66 #x0F #x57) 1200 | (define-xm128-encoder xorps #x0F #x57) 1201 | -------------------------------------------------------------------------------- /mach-o-binaries.lisp: -------------------------------------------------------------------------------- 1 | ; macho-binaries.lisp 2 | ; Support for generating mach-o object files 3 | 4 | (in-package "AMD64-ASM") 5 | 6 | (defconstant +mh-magic-64+ #xFEEDFACF) 7 | (defconstant +cpu-type-x86-64+ #x01000007) 8 | (defconstant +cpu-subtype-all+ 3) 9 | (defconstant +mh-object+ 1) 10 | (defconstant +mh-def-flags+ #x0) 11 | (defconstant +lc-segment-64+ #x19) 12 | (defconstant +mh-def-prot+ #x7) 13 | (defconstant +mh-def-text-flags+ #x80000000) 14 | (defconstant +mh-def-data-flags+ #x0) 15 | (defconstant +lc-symtab+ #x2) 16 | (defconstant +n-ext+ #x1) 17 | (defconstant +n-undef+ #x0) 18 | (defconstant +n-abs+ #x2) 19 | (defconstant +n-sect+ #xe) 20 | (defconstant +no-sect+ 0) 21 | (defconstant +cs-num+ 1) 22 | (defconstant +ds-num+ 2) 23 | (defconstant +x86-64-reloc-unsigned+ 0) 24 | (defconstant +x86-64-reloc-signed+ 1) 25 | (defconstant +x86-64-reloc-branch+ 2) 26 | (defconstant +r-pcrel-shift+ 24) 27 | (defconstant +r-length-shift+ 25) 28 | (defconstant +r-extern-shift+ 27) 29 | (defconstant +r-type-shift+ 28) 30 | 31 | (define-c-struct mach-header-64 32 | (magic :half) 33 | (cputype :half) 34 | (cpusubtype :half) 35 | (filetype :half) 36 | (ncmds :half) 37 | (sizeofcmds :half) 38 | (flags :half) 39 | (reserved :half)) 40 | 41 | (define-c-struct segment-command-64 42 | (cmd :half) 43 | (cmdsize :half) 44 | (segname :byte 16) 45 | (vmaddr :word) 46 | (vmsize :word) 47 | (fileoff :word) 48 | (filesize :word) 49 | (maxprot :half) 50 | (initprot :half) 51 | (nsects :half) 52 | (flags :half)) 53 | 54 | (define-c-struct section-64 55 | (sectname :byte 16) 56 | (segname :byte 16) 57 | (addr :word) 58 | (size :word) 59 | (offset :half) 60 | (align :half) 61 | (reloff :half) 62 | (nreloc :half) 63 | (flags :half) 64 | (reserved1 :half) 65 | (reserved2 :half) 66 | (reserved3 :half)) 67 | 68 | (define-c-struct symtab-command 69 | (cmd :half) 70 | (cmdsize :half) 71 | (symoff :half) 72 | (nsyms :half) 73 | (stroff :half) 74 | (strsize :half)) 75 | 76 | (define-c-struct nlist-64 77 | (nstrx :half) 78 | (ntype :byte) 79 | (nsect :byte) 80 | (ndesc :byte 2) 81 | (nvalue :word)) 82 | 83 | (define-c-struct relocation-info 84 | (r-address :half) 85 | (r-symbolnum-and-flags :half)) 86 | 87 | (defstruct mach-o-symtab 88 | strtab 89 | ntab 90 | nvec) 91 | 92 | (defun new-mach-o-symtab () 93 | (make-mach-o-symtab :strtab (new-strtab) 94 | :ntab (make-hash-table) 95 | :nvec (make-array 0 :fill-pointer t))) 96 | 97 | (defun add-mach-o-sym (st name sc sect addr) 98 | (assert (not (gethash name (mach-o-symtab-ntab st)))) 99 | (let* ((nl (make-nlist-64 :nstrx (strtab-intern (mach-o-symtab-strtab st) 100 | name) 101 | :ntype (ecase sc 102 | (:int +n-sect+) 103 | (:ext (+ +n-sect+ +n-ext+)) 104 | (:und (+ +n-undef+ +n-ext+))) 105 | :nsect sect 106 | :ndesc #(0 0) 107 | :nvalue addr))) 108 | (setf (gethash name (mach-o-symtab-ntab st)) 109 | (length (mach-o-symtab-nvec st))) 110 | (vector-push-extend nl (mach-o-symtab-nvec st)))) 111 | 112 | (defun mach-o-symtab-member? (st name) 113 | (gethash name (mach-o-symtab-ntab st))) 114 | 115 | (defun mach-o-symtab-strsize (st) 116 | (strtab-size (mach-o-symtab-strtab st))) 117 | 118 | (defun mach-o-symtab-count (st) 119 | (length (mach-o-symtab-nvec st))) 120 | 121 | (defun mach-o-sym-index (st sym) 122 | (gethash sym (mach-o-symtab-ntab st))) 123 | 124 | (defun generate-mach-o-symtab (obj) 125 | (let ((tab (new-mach-o-symtab)) 126 | (fp 0)) 127 | (iter (for def in-vector (asmobj-cdefs obj)) 128 | (add-mach-o-sym tab 129 | (asmdef-name def) 130 | (asmdef-scope def) 131 | +cs-num+ 132 | fp) 133 | (incf fp (length (asmbin-buffer (asmdef-bin def))))) 134 | (iter (for def in-vector (asmobj-ddefs obj)) 135 | (add-mach-o-sym tab 136 | (asmdef-name def) 137 | (asmdef-scope def) 138 | +ds-num+ 139 | fp) 140 | (incf fp (length (asmbin-buffer (asmdef-bin def))))) 141 | (iter (for def in-vector (asmobj-cdefs obj)) 142 | (iter (for rel in-vector (asmbin-relocs (asmdef-bin def))) 143 | (unless (mach-o-symtab-member? tab (asmrel-symbol rel)) 144 | (add-mach-o-sym tab 145 | (asmrel-symbol rel) 146 | :und 147 | +no-sect+ 148 | 0)))) 149 | (iter (for def in-vector (asmobj-ddefs obj)) 150 | (iter (for rel in-vector (asmbin-relocs (asmdef-bin def))) 151 | (unless (mach-o-symtab-member? tab (asmrel-symbol rel)) 152 | (add-mach-o-sym tab 153 | (asmrel-symbol rel) 154 | :und 155 | +no-sect+ 156 | 0)))) 157 | tab)) 158 | 159 | (defun compose-mach-o-rel-num-and-flags (snum pcrel length ext type) 160 | (assert (and (<= pcrel 1) (<= length 3) (<= ext 1) (<= type 15))) 161 | (+ snum 162 | (ash pcrel +r-pcrel-shift+) 163 | (ash length +r-length-shift+) 164 | (ash ext +r-extern-shift+) 165 | (ash type +r-type-shift+))) 166 | 167 | (defun add-mach-o-rel (rel fp st relvec) 168 | (let* ((addr (+ fp (asmrel-offset rel))) 169 | (sn (mach-o-sym-index st (asmrel-symbol rel))) 170 | (pcrel (ecase (asmrel-type rel) 171 | (:abs 0) 172 | (:rel 1) 173 | (:bra 1))) 174 | (len (floor (log (asmrel-width rel) 2))) 175 | (ext 1) 176 | (type (ecase (asmrel-type rel) 177 | (:abs +x86-64-reloc-unsigned+) 178 | (:rel +x86-64-reloc-signed+) 179 | (:bra +x86-64-reloc-branch+))) 180 | (snf (compose-mach-o-rel-num-and-flags sn pcrel len ext type))) 181 | (vector-push-extend (make-relocation-info :r-address addr 182 | :r-symbolnum-and-flags snf) 183 | relvec))) 184 | 185 | (defun add-mach-o-rels-for-defs (defs st relvec) 186 | (let ((fp 0)) 187 | (iter (for def in-vector defs) 188 | (iter (for rel in-vector (asmbin-relocs (asmdef-bin def))) 189 | (add-mach-o-rel rel fp st relvec)) 190 | (incf fp (length (asmbin-buffer (asmdef-bin def))))))) 191 | 192 | (defun generate-mach-o-relvec (obj st) 193 | (let ((relvec (make-array 0 :fill-pointer t))) 194 | (add-mach-o-rels-for-defs (asmobj-cdefs obj) st relvec) 195 | (add-mach-o-rels-for-defs (asmobj-ddefs obj) st relvec) 196 | relvec)) 197 | 198 | (defstruct mach-o-metrics 199 | header-off 200 | lc-off 201 | cs-off 202 | ds-off 203 | crel-off 204 | drel-off 205 | symtab-off 206 | strtab-off 207 | strtab-sz 208 | crel-count 209 | drel-count 210 | sym-count) 211 | 212 | (defun code-section-size (obj) 213 | (iter (for def in-vector (asmobj-cdefs obj)) 214 | (sum (length (asmbin-buffer (asmdef-bin def)))))) 215 | 216 | (defun data-section-size (obj) 217 | (iter (for def in-vector (asmobj-ddefs obj)) 218 | (sum (length (asmbin-buffer (asmdef-bin def)))))) 219 | 220 | (defun count-code-relocations (obj) 221 | (iter (for def in-vector (asmobj-cdefs obj)) 222 | (sum (length (asmbin-relocs (asmdef-bin def)))))) 223 | 224 | (defun count-data-relocations (obj) 225 | (iter (for def in-vector (asmobj-ddefs obj)) 226 | (sum (length (asmbin-relocs (asmdef-bin def)))))) 227 | 228 | ; mach-o file map 229 | ; mach header (32 bytes) 230 | ; segment command (72 bytes) 231 | ; code section descriptor (80 bytes) 232 | ; data section descriptor (80 bytes) 233 | ; symtab command (24 bytes) 234 | ; code section data 235 | ; data section data 236 | ; relocation entries 237 | ; symbol table entries 238 | ; string table 239 | 240 | (defun compute-mach-o-metrics (obj st) 241 | (let* ((mh-size (sizeof-c-struct (make-mach-header-64))) 242 | (sc-size (sizeof-c-struct (make-segment-command-64))) 243 | (sectc-size (sizeof-c-struct (make-section-64))) 244 | (symc-size (sizeof-c-struct (make-symtab-command))) 245 | (nlist-size (sizeof-c-struct (make-nlist-64))) 246 | (rel-size (sizeof-c-struct (make-relocation-info))) 247 | (cs-size (code-section-size obj)) 248 | (ds-size (data-section-size obj)) 249 | (crel-count (count-code-relocations obj)) 250 | (drel-count (count-data-relocations obj)) 251 | (crel-size (* rel-size crel-count)) 252 | (drel-size (* rel-size drel-count)) 253 | (sym-count (mach-o-symtab-count st)) 254 | (symt-size (* nlist-size sym-count)) 255 | (header-off 0) 256 | (lc-off mh-size) 257 | (lc-size (+ sc-size (* 2 sectc-size) symc-size)) 258 | (cs-off (+ lc-off lc-size)) 259 | (ds-off (+ cs-off cs-size)) 260 | (crel-off (+ ds-off ds-size)) 261 | (drel-off (+ crel-off crel-size)) 262 | (symtab-off (+ drel-off drel-size)) 263 | (strtab-off (+ symtab-off symt-size)) 264 | (strtab-sz (mach-o-symtab-strsize st))) 265 | (make-mach-o-metrics :header-off header-off 266 | :lc-off lc-off 267 | :cs-off cs-off 268 | :ds-off ds-off 269 | :crel-off crel-off 270 | :drel-off drel-off 271 | :symtab-off symtab-off 272 | :strtab-off strtab-off 273 | :strtab-sz strtab-sz 274 | :crel-count crel-count 275 | :drel-count drel-count 276 | :sym-count sym-count))) 277 | 278 | 279 | (defun emit-mach-o-header (met frag) 280 | (let* ((cmdsize (- (mach-o-metrics-cs-off met) (mach-o-metrics-lc-off met))) 281 | (mh (make-mach-header-64 :magic +mh-magic-64+ 282 | :cputype +cpu-type-x86-64+ 283 | :cpusubtype +cpu-subtype-all+ 284 | :filetype +mh-object+ 285 | :ncmds 2 286 | :sizeofcmds cmdsize 287 | :flags +mh-def-flags+ 288 | :reserved 0))) 289 | (emit-c-struct mh frag))) 290 | 291 | (defun emit-mach-o-seg-lc (met frag) 292 | (let* ((sc-size (sizeof-c-struct (make-segment-command-64))) 293 | (sct-size (sizeof-c-struct (make-section-64))) 294 | (lc-size (+ sc-size (* 2 sct-size))) 295 | (seg-size (- (mach-o-metrics-crel-off met) 296 | (mach-o-metrics-cs-off met))) 297 | (cs-size (- (mach-o-metrics-ds-off met) (mach-o-metrics-cs-off met))) 298 | (ds-size (- (mach-o-metrics-crel-off met) (mach-o-metrics-ds-off met))) 299 | (cs-addr 0) 300 | (ds-addr cs-size) 301 | (crel-count (mach-o-metrics-crel-count met)) 302 | (drel-count (mach-o-metrics-drel-count met)) 303 | (crel-off (if (> crel-count 0) (mach-o-metrics-crel-off met) 0)) 304 | (drel-off (if (> drel-count 0) (mach-o-metrics-drel-off met) 0)) 305 | (seg-lc (make-segment-command-64 :cmd +lc-segment-64+ 306 | :cmdsize lc-size 307 | :segname "" 308 | :vmaddr 0 309 | :vmsize seg-size 310 | :fileoff (mach-o-metrics-cs-off met) 311 | :filesize seg-size 312 | :maxprot +mh-def-prot+ 313 | :initprot +mh-def-prot+ 314 | :nsects 2 315 | :flags 0)) 316 | (cs-lc (make-section-64 :sectname (asciify-string "__text") 317 | :segname (asciify-string "__TEXT") 318 | :addr cs-addr 319 | :size cs-size 320 | :offset (mach-o-metrics-cs-off met) 321 | :align 0 322 | :reloff crel-off 323 | :nreloc crel-count 324 | :flags +mh-def-text-flags+ 325 | :reserved1 0 326 | :reserved2 0 327 | :reserved3 0)) 328 | (ds-lc (make-section-64 :sectname (asciify-string "__data") 329 | :segname (asciify-string "__DATA") 330 | :addr ds-addr 331 | :size ds-size 332 | :offset (mach-o-metrics-ds-off met) 333 | :align 0 334 | :reloff drel-off 335 | :nreloc drel-count 336 | :flags +mh-def-data-flags+ 337 | :reserved1 0 338 | :reserved2 0 339 | :reserved3 0))) 340 | (emit-c-struct seg-lc frag) 341 | (emit-c-struct cs-lc frag) 342 | (emit-c-struct ds-lc frag))) 343 | 344 | (defun emit-mach-o-symtab-lc (met frag) 345 | (let* ((symc-size (sizeof-c-struct (make-symtab-command))) 346 | (symt-lc 347 | (make-symtab-command :cmd +lc-symtab+ 348 | :cmdsize symc-size 349 | :symoff (mach-o-metrics-symtab-off met) 350 | :nsyms (mach-o-metrics-sym-count met) 351 | :stroff (mach-o-metrics-strtab-off met) 352 | :strsize (mach-o-metrics-strtab-sz met)))) 353 | (emit-c-struct symt-lc frag))) 354 | 355 | (defun emit-mach-o-def-vec (defs frag) 356 | (iter (for def in-vector defs) 357 | (emit-byte-vector frag (asmbin-buffer (asmdef-bin def))))) 358 | 359 | (defun emit-mach-o-sects (obj frag) 360 | (emit-mach-o-def-vec (asmobj-cdefs obj) frag) 361 | (emit-mach-o-def-vec (asmobj-ddefs obj) frag)) 362 | 363 | (defun emit-mach-o-symtab (tab frag) 364 | (iter (for nl in-vector (mach-o-symtab-nvec tab)) 365 | (emit-c-struct nl frag)) 366 | (emit-byte-vector frag (strtab-vec (mach-o-symtab-strtab tab)))) 367 | 368 | (defun emit-mach-o-relocs (rv frag) 369 | (iter (for rel in-vector rv) 370 | (emit-c-struct rel frag))) 371 | 372 | (defun generate-mach-o-obj (obj) 373 | (let* ((frag (new-asmfrag)) 374 | (st (generate-mach-o-symtab obj)) 375 | (rv (generate-mach-o-relvec obj st)) 376 | (met (compute-mach-o-metrics obj st))) 377 | (emit-mach-o-header met frag) 378 | (emit-mach-o-seg-lc met frag) 379 | (emit-mach-o-symtab-lc met frag) 380 | (emit-mach-o-sects obj frag) 381 | (emit-mach-o-relocs rv frag) 382 | (emit-mach-o-symtab st frag) 383 | (asmfrag-buffer frag))) 384 | -------------------------------------------------------------------------------- /package.lisp: -------------------------------------------------------------------------------- 1 | ; package.lisp 2 | ; Package definition for the assembler. 3 | 4 | (defpackage "AMD64-ASM" 5 | (:nicknames "ASM") 6 | (:use "COMMON-LISP" "ITERATE") 7 | (:export "RUN-TESTS" "ASSEMBLE-AND-OUTPUT")) 8 | -------------------------------------------------------------------------------- /readme.txt: -------------------------------------------------------------------------------- 1 | amd64-asm is a Lisp library for generating AMD64 machine code. 2 | 3 | It features: 4 | 5 | - A custom, sexpr-based assembler syntax 6 | - Support for much of the "general purpose instructions" subset 7 | - Nearly complete support for the "128-bit media instructions" subset 8 | - A table-driven encoder for easy definition of new instructions 9 | - Support for undefined references and data relocations 10 | - A test-suite built around comparing the library's output to that of yasm 11 | - A random tester, guided by the encoding tables to ensure coverage 12 | - The expected set of encoding optimizations, including jump relaxation 13 | - Direct generation of Mach-O object files 14 | 15 | Notable omissions: 16 | 17 | - Support for 16-bit or 32-bit modes 18 | - Instruction aliases (for jxx, cmovxx, setxx) 19 | - Many "antiquated" general purpose instructions 20 | - Instructions that operate on segment, debug, or condition registers 21 | - Segment register overrides for memory operations 22 | - The "system instructions" subset 23 | - The "64-bit media instructions" subset 24 | - The "x87 floating-point" instructions subset 25 | 26 | Future work: 27 | 28 | - Support for specifying segment overrides in memory references 29 | - A subset of system instructions useful to user-mode code 30 | - Better checking and error messages for incorrect source code 31 | 32 | Installation notes: 33 | 34 | The library is distributed as a standard ASDF system. It requires cl-iterate. 35 | It has only been tested on SBCL/Darwin. It should, however, work on any CL. 36 | For the testsuite to work, yasm must be installed. The location of the binary 37 | is given by the variable *yasmbin*, which defaults to /usr/local/bin/yasm. 38 | 39 | Assembler syntax: 40 | 41 | This section gives a pseudo-grammar for assembly fragments. Note that in the 42 | description {FOO} means "one or more FOO", while [FOO] means "zero or one FOO". 43 | 44 | MODULE: ({DEFINITION}) 45 | DEFINITION: (DECL SCOPE NAME {STATEMENT}) 46 | DECL: :proc | :var 47 | SCOPE: :int | :ext 48 | NAME: a symbol specifying the name of the definition 49 | 50 | For data definitions (DECL = :var), the syntax for statements is: 51 | 52 | STATEMENT: (WIDTH-SPECIFIER VALUE) | SYMCONST 53 | WIDTH-SPECIFIER: one of :byte, :half, :word, or :wide 54 | VALUE: an appropriately-sized integer 55 | SYMCONST: (WIDTH-SPECIFIER NAME [ADDEND]) 56 | NAME: a symbol naming an external value 57 | ADDEND: a signed integer offset from the named symbol 58 | 59 | For code definitions (DECL = :proc), the syntax for statements is: 60 | 61 | STATEMENT: LABEL | INSTRUCTION 62 | LABEL: a symbol naming the label 63 | INSTRUCTION: (MNEMONIC {OPERAND}) 64 | MNEMONIC: an AMD64 instruction name, as a keyword 65 | OPERAND: REGISTER | IMMEDIATE | MEM-REF 66 | REGISTER: an AMD64 byte, dword, qword, or xmm register, as a keyword 67 | IMMEDIATE: an appropriately-sized integer | SYMCONST 68 | MEM-REF: (WIDTH-SPECIFIER BASE-SPECIFIER INDEX-SPECIFIER SCALE IMMEDIATE) 69 | BASE-SPECIFIER: REGISTER | :rip | :abs 70 | INDEX-SPECIFIER: REGISTER | nil 71 | SCALE: one of 1, 2, 4, or 8 72 | 73 | For memory operands, a width specifier is required, and that specifier must be 74 | compatible with the other operand to the instruction. Addressing is 64-bit, so 75 | BASE-SPECIFIER and INDEX-SPECIFIER should be 64-bit registers. The special 76 | keywords :rip and :abs as BASE-SPECIFIER's signify RIP-relative and absolute 77 | addressing, respectively. Stack instructions are always 64-bit in AMD64, so 78 | pushing less than a full register is not possible. Scale and displacement must 79 | be integers (specifying nil to signify no scale or displacement is not allowed). 80 | 81 | Since instruction aliases aren't supported, only the following cc codes are 82 | recognized for jxx/cmovxx/setxx instructions: 83 | o no b nb z nz be nbe s ns p np l ge le g 84 | 85 | Since the assembler does not default the width of symbolic constants even when 86 | doing so would be unambiguous, the call syntax is slightly awkward: 87 | 88 | (call (:half foo)) instead of (call foo) 89 | 90 | Note that the binary emitter doesn't try to translate Lisp symbol names. When 91 | interfacing with C code, it is generally necessary to use the case-preserving 92 | syntax for symbols and to avoid special characters in names. 93 | 94 | The following bit of code demonstrates an assembly source fragment. 95 | 96 | '((:proc :ext |_memcpy16b| 97 | ; arg dst in rdi 98 | ; arg src in rsi 99 | ; arg count, in bytes, in rdx 100 | ; temp loop-count in rcx 101 | (:xor :rcx :rcx) 102 | (:cmp :rcx :rdx) 103 | (:jz loopexit) 104 | loophead 105 | (:movdqa :xmm0 (:wide :rsi :rcx 1 0)) 106 | (:movdqa (:wide :rdi :rcx 1 0) :xmm0) 107 | (:add :rcx 16) 108 | (:cmp :rcx :rdx) 109 | (:jnz loophead) 110 | loopexit 111 | (:ret)))) 112 | 113 | Assembler interface: 114 | 115 | The assembler has a very simple interface. The library is contained in a package 116 | AMD64-ASM, which has the nickname ASM. It exposes one main function: 117 | 118 | (assemble-and-output source type file) 119 | 120 | This function assembles a source module and writes it to a binary object file. 121 | 122 | 'source' is a source fragment 123 | 'type' is the object type --- the only supported one right now is :mach-o 124 | 'file' is the name of the output file 125 | 126 | The test suite is exposed via another function: 127 | 128 | (run-tests) 129 | 130 | This function runs all the tests defined in the test suite. 131 | 132 | Assembler internals: 133 | 134 | The code is small and simple. Read it ;) Adding a new instruction is usually as 135 | easy as adding a new pattern to encoders.lisp. There is a large comment in that 136 | file that gives the syntax for defining new encoders. 137 | 138 | Licensing: 139 | 140 | This work is copyright 2007 by Rayiner Hashem. Others may use this code 141 | freely under the terms of the GNU Lesser General Public License (LGPL) with 142 | Franz Inc's clarified preamble for Lisp libraries. 143 | See: http://opensource.franz.com/preamble.html 144 | -------------------------------------------------------------------------------- /testsuite.lisp: -------------------------------------------------------------------------------- 1 | ; testsuite.lisp 2 | ; Testsuite for the assembler. 3 | 4 | (in-package "AMD64-ASM") 5 | 6 | (eval-when (:compile-toplevel) 7 | (defparameter *tests* nil)) 8 | 9 | (defparameter *yasmbin* "/usr/local/bin/yasm") 10 | (defparameter *tempfile* "test.nasm") 11 | (defparameter *tempbin* "test") 12 | 13 | (defparameter *skip-tests* (append '(:call :pinsrw) *sdis*)) 14 | 15 | (defparameter *insn-mapping* 16 | '((:imul3 . :imul))) 17 | 18 | (defmacro defasmtest (name &body body) 19 | (if (not (member name *tests*)) 20 | (push name *tests*)) 21 | `(defun ,name () 22 | (let ((asm ',body)) 23 | (do-test ',name asm)))) 24 | 25 | (defun do-test (name source) 26 | (declare (special *passes*) 27 | (special *fails*)) 28 | (let ((bin (assemble-code source)) 29 | (ref (yasm-assemble source))) 30 | (format t "Running ~A... " name) 31 | (if (equalp (asmbin-buffer bin) ref) 32 | (progn 33 | (incf *passes*) 34 | (format t "pass~%")) 35 | (progn 36 | (incf *fails*) 37 | (format t "fail~%") 38 | (print-tempfile *tempfile*) 39 | (format t "~A~%" source) 40 | (print-codevector ref) 41 | (print-codevector (asmbin-buffer bin)))))) 42 | 43 | (defun run-tests (&optional inc exc) 44 | (let ((*passes* 0) 45 | (*fails* 0)) 46 | (declare (special *passes*) 47 | (special *fails*)) 48 | (when (not (or inc exc)) 49 | (iter (for name in (reverse *tests*)) 50 | (funcall name))) 51 | (randomly-test-all-encoders *encoders* inc (or exc *skip-tests*)) 52 | (map nil #'delete-file (list *tempfile* *tempbin*)) 53 | (format t "Passed ~A, Failed ~A~%" *passes* *fails*))) 54 | 55 | (defun random-reg-operand (width) 56 | (nth (random 5) (ecase width 57 | (:byte '(:bl :cl :r8b :r12b :r13b)) 58 | (:half '(:ebx :ecx :r8d :r12d :r13d)) 59 | (:word '(:rbx :rcx :r8 :r12 :r13))))) 60 | 61 | (defun random-xreg-operand () 62 | (nth (random 5) '(:xmm0 :xmm1 :xmm9 :xmm10 :xmm11))) 63 | 64 | (defun random-imm-operand (width) 65 | (let ((range (ecase width 66 | (:byte 100) 67 | (:short 1000) 68 | (:half 1000000) 69 | (:word 100000000000)))) 70 | (- (random (* 2 range)) range))) 71 | 72 | (defun random-mem-operand (width) 73 | (nth (random 8) 74 | `((,width :rax nil 1 0) 75 | (,width :rbp nil 1 0) 76 | (,width :rcx nil 1 100) 77 | (,width :rcx :rax 4 1000) 78 | (,width :rsp nil 1 100) 79 | (,width :rip nil 1 1000) 80 | (,width :abs nil 1 100) 81 | (,width :abs :rcx 8 1000)))) 82 | 83 | (defun random-reg-or-mem-operand (width) 84 | (if (eql (random 2) 0) 85 | (random-reg-operand width) 86 | (random-mem-operand width))) 87 | 88 | (defun random-xreg-or-mem-operand (width) 89 | (if (eql (random 2) 0) 90 | (random-xreg-operand) 91 | (random-mem-operand width))) 92 | 93 | (defun random-operand (constraint) 94 | (if (or (reg? constraint) (immediate? constraint)) 95 | constraint 96 | (ecase constraint 97 | (rm8 (random-reg-or-mem-operand :byte)) 98 | (rm32 (random-reg-or-mem-operand :half)) 99 | (rm64 (random-reg-or-mem-operand :word)) 100 | (m8 (random-mem-operand :byte)) 101 | (m32 (random-mem-operand :half)) 102 | (m64 (random-mem-operand :word)) 103 | (m128 (random-mem-operand :wide)) 104 | (r8 (random-reg-operand :byte)) 105 | (r32 (random-reg-operand :half)) 106 | (r64 (random-reg-operand :word)) 107 | (x (random-xreg-operand)) 108 | (xm32 (random-xreg-or-mem-operand :half)) 109 | (xm64 (random-xreg-or-mem-operand :word)) 110 | (xm128 (random-xreg-or-mem-operand :wide)) 111 | (imm8 (random-imm-operand :byte)) 112 | (imm16 (random-imm-operand :short)) 113 | (imm32 (random-imm-operand :half)) 114 | (imm64 (random-imm-operand :word))))) 115 | 116 | (defun generate-random-test (insn pat) 117 | (cons insn (mapcar #'random-operand pat))) 118 | 119 | (defun generate-test-name (insn i) 120 | (intern (concatenate 'string "TEST-" (symbol-name insn) 121 | (format nil "-~D" i)))) 122 | 123 | (defun randomly-test-clause (num insn clause) 124 | (let ((src (iter (for i from 0 below 4) 125 | (collect (generate-random-test insn clause))))) 126 | (do-test (generate-test-name insn num) src))) 127 | 128 | (defun randomly-test-encoder (enc) 129 | (let ((insn (first enc))) 130 | (iter (for clause in (second enc)) 131 | (for num from 0 below (length (second enc))) 132 | (randomly-test-clause num insn (first clause))))) 133 | 134 | (defun should-run? (insn inc exc) 135 | (or (and (not inc) (not exc)) 136 | (and inc (not exc) (member insn inc)) 137 | (and (not inc) exc (not (member insn exc))) 138 | (and inc exc (member insn inc) (not (member insn exc))))) 139 | 140 | (defun randomly-test-all-encoders (encs inc exc) 141 | (iter (for enc in encs) 142 | (when (should-run? (first enc) inc exc) 143 | (randomly-test-encoder enc)))) 144 | 145 | (defun nasmize-symbol (sym) 146 | (string-downcase (symbol-name sym))) 147 | 148 | (defun nasmize-specifier (spec) 149 | (case spec 150 | (:byte "byte") 151 | (:half "dword") 152 | (:word "qword") 153 | (:wide "dqword"))) 154 | 155 | (defun nasmize-mem (mem) 156 | (with-output-to-string (str) 157 | (format str "~A " (nasmize-specifier (first mem))) 158 | (format str "[ ") 159 | (cond 160 | ((eql (second mem) :rip) 161 | (format str "rip ") 162 | (unless (eql (fifth mem) 0) 163 | (format str "+ ~A " (fifth mem)))) 164 | ((eql (second mem) :abs) 165 | (format str "~A" (fifth mem)) 166 | (when (third mem) 167 | (format str " + ~A * ~A " (nasmize-symbol (third mem)) (fourth mem)))) 168 | (t 169 | (format str "~A " (nasmize-symbol (second mem))) 170 | (when (third mem) 171 | (format str " + ~A * ~A " (nasmize-symbol (third mem)) (fourth mem))) 172 | (unless (eql (fifth mem) 0) 173 | (format str " + ~A " (fifth mem))))) 174 | (format str " ]"))) 175 | 176 | (defun nasmize-label-ref (elt) 177 | (string-downcase (symbol-name elt))) 178 | 179 | (defun nasmize-element (str elt) 180 | (cond 181 | ((reg? elt) 182 | (format str "~A " (nasmize-symbol elt))) 183 | ((immediate? elt) 184 | (format str "~D " elt)) 185 | ((mem? elt) 186 | (format str "~A " (nasmize-mem elt))) 187 | ((symbolp elt) 188 | (format str "~A" (nasmize-label-ref elt))))) 189 | 190 | (defun nasmize-opcode (str opc) 191 | (format str "~A " (nasmize-symbol (or (cdr (assoc opc *insn-mapping*)) opc)))) 192 | 193 | (defun nasmize-instruction (insn) 194 | (with-output-to-string (str) 195 | (nasmize-opcode str (first insn)) 196 | (iter (for elt in (butlast (rest insn))) 197 | (nasmize-element str elt) 198 | (format str ", ")) 199 | (when (rest insn) 200 | (nasmize-element str (first (last insn)))))) 201 | 202 | (defun nasmize-label (label) 203 | (string-downcase (concatenate 'string (symbol-name label) ":"))) 204 | 205 | (defun nasmize-source (source file) 206 | (with-open-file (file file :direction :output :if-exists :supersede) 207 | (format file "bits 64~%") 208 | (iter (for line in source) 209 | (if (label-line? line) 210 | (format file "~A~%" (nasmize-label line)) 211 | (format file "~A~%" (nasmize-instruction line)))))) 212 | 213 | (defun run-yasm (filename) 214 | (sb-ext:run-program *yasmbin* (list "-fbin" filename) 215 | :input nil :output *trace-output*)) 216 | 217 | (defun print-tempfile (filename) 218 | (sb-ext:run-program "/bin/cat" (list filename) 219 | :input nil :output *trace-output*)) 220 | 221 | (defun yasm-assemble (source) 222 | (nasmize-source source *tempfile*) 223 | (run-yasm *tempfile*) 224 | (load-file-into-vector *tempbin*)) 225 | 226 | ; Note that we can't specify tests that might invoke 227 | ; special encodings for eax as the destination, since 228 | ; yasm does those optimizations and we don't. Also 229 | ; remember that we only emit 64-bit code. We don't 230 | ; ever generate address-size overrides, so use only 231 | ; 64-bit registers in mem forms. 232 | 233 | (defasmtest reg-test 234 | (:add :al :bl) 235 | (:add :al :r10b) 236 | (:add :eax :ebx) 237 | (:add :eax :r10d) 238 | (:add :r10d :ebx) 239 | (:add :rax :r10) 240 | (:add :r10 :rax)) 241 | 242 | (defasmtest reg-test-2 243 | (:add :ebx 1) 244 | (:add :ecx 1) 245 | (:add :edx 1) 246 | (:add :esi 1) 247 | (:add :edi 1) 248 | (:add :esp 1) 249 | (:add :ebp 1)) 250 | 251 | (defasmtest reg-test-3 252 | (:add :r8d 1) 253 | (:add :r9d 1) 254 | (:add :r10d 1) 255 | (:add :r11d 1)) 256 | 257 | (defasmtest reg-test-4 258 | (:add :r12d 1) 259 | (:add :r13d 1) 260 | (:add :r14d 1) 261 | (:add :r15d 1)) 262 | 263 | (defasmtest rip-test 264 | (:mov :al (:byte :rip nil 1 25)) 265 | (:mov :eax (:half :rip nil 1 35)) 266 | (:mov :rbx (:word :rip nil 1 45))) 267 | 268 | (defasmtest mem-test 269 | (:mov :al (:byte :rbp nil 1 0)) 270 | (:mov :r10d (:half :rbp nil 1 10)) 271 | (:mov :r10d (:half :rbp :rcx 4 10)) 272 | (:mov :rax (:word :rbp :rcx 8 130))) 273 | 274 | (defasmtest mem-test-2 275 | (:mov (:byte :rbp nil 1 0) :al) 276 | (:mov (:half :rbx nil 1 10) :eax) 277 | (:mov (:word :rbx :rbp 1 150) :r10)) 278 | 279 | (defasmtest mem-test-3 280 | (:add (:half :rbp nil 1 0) :ebx) 281 | (:add (:word :rbp nil 1 0) :rbx) 282 | (:add (:half :rbp :rbx 1 10) :ecx)) 283 | 284 | (defasmtest abs-test 285 | (:mov :al (:byte :abs nil 1 10)) 286 | (:mov :eax (:half :abs nil 1 10)) 287 | (:mov :rax (:word :abs nil 1 1000))) 288 | 289 | (defasmtest abs-test-2 290 | (:mov :al (:byte :abs :rbp 4 10)) 291 | (:mov :eax (:half :abs :rbp 8 25)) 292 | (:mov :rbx (:word :abs :rcx 8 1025))) 293 | 294 | (defasmtest test-imm 295 | (:add :bl 10) 296 | (:add :ebx 10) 297 | (:sub :ebx 1000) 298 | (:sub :rbx 100)) 299 | 300 | (defasmtest test-sdis 301 | (:push :rbp) 302 | (:mov :rbp :rsp) 303 | (:mov :eax (:half :rbp nil 1 8)) 304 | (:mov :ebx (:half :rbp nil 1 16)) 305 | loophead 306 | (:inc :ebx) 307 | (:cmp :ebx :eax) 308 | (:jnz loophead) 309 | (:leave) 310 | (:retn 16)) 311 | 312 | (defasmtest test-sdis-2 313 | (:push :rbp) 314 | (:mov :rbp :rsp) 315 | (:mov :eax (:half :rbp nil 1 8)) 316 | (:mov :ebx (:half :rbp nil 1 16)) 317 | loophead 318 | (:cmp :ebx :eax) 319 | (:jnz loopexit) 320 | (:inc :ebx) 321 | (:jmp loophead) 322 | loopexit 323 | (:leave) 324 | (:retn 16)) 325 | 326 | (defmacro define-big-function (name) 327 | `(defasmtest ,name 328 | (:xor :rax :rax) 329 | loophead 330 | ,@(iter (for i from 0 below 50) (collect '(:mov :rbx 1))) 331 | (:jmp loophead))) 332 | 333 | (define-big-function test-sdis-3) 334 | 335 | (defparameter *source1* 336 | '((:var :ext |_foo| (:half 12)) 337 | (:var :int |_bar| (:half 13)) 338 | (:proc :ext |_main| 339 | (:mov :edi (:half :rip nil 1 (:half |_bar|))) 340 | (:call (:half |_xdouble|)) 341 | (:mov :edi :eax) 342 | (:call (:half |_xtriple|)) 343 | (:ret)) 344 | (:proc :ext |_xdouble| 345 | (:add :rdi :rdi) 346 | (:mov :rax :rdi) 347 | (:ret)))) 348 | 349 | (defparameter *source2* 350 | '((:proc :ext |_xtriple| 351 | (:mov :rbx 3) 352 | (:imul :rdi :rbx) 353 | (:mov :rax :rdi) 354 | (:ret)))) 355 | 356 | ; x86-64 arg registers: rdi, rsi, rdx, rcx, r8, r9 357 | (defparameter *source3* 358 | '((:proc :ext |_memcpy16b| 359 | ; arg dst in rdi 360 | ; arg src in rsi 361 | ; arg count, in bytes, in rdx 362 | ; temp loop-count in rcx 363 | (:xor :rcx :rcx) 364 | (:cmp :rcx :rdx) 365 | (:jz loopexit) 366 | loophead 367 | (:movdqa :xmm0 (:wide :rsi :rcx 1 0)) 368 | (:movdqa (:wide :rdi :rcx 1 0) :xmm0) 369 | (:add :rcx 16) 370 | (:cmp :rcx :rdx) 371 | (:jnz loophead) 372 | loopexit 373 | (:ret)))) 374 | -------------------------------------------------------------------------------- /utilities.lisp: -------------------------------------------------------------------------------- 1 | ; utilities.lisp 2 | ; Utility routines used by assembler. 3 | 4 | (in-package "AMD64-ASM") 5 | 6 | (defun prefixsym (pfx s1 &optional package) 7 | (let ((str (concatenate 'string pfx (symbol-name s1)))) 8 | (if package 9 | (intern str package) 10 | (intern str)))) 11 | 12 | (defun catsym (s1 s2) 13 | (intern (concatenate 'string (symbol-name s1) (symbol-name s2)))) 14 | 15 | (defun catsym- (s1 s2) 16 | (intern (concatenate 'string (symbol-name s1) "-" (symbol-name s2)))) 17 | 18 | (defun bounded-by? (value lower upper) 19 | (and (>= value lower) (<= value upper))) 20 | 21 | (defun signed-width (value) 22 | (cond 23 | ((bounded-by? value -128 127) 1) 24 | ((bounded-by? value -32768 32767) 2) 25 | ((bounded-by? value -2147483648 2147483647) 4) 26 | ((bounded-by? value -9223372036854775808 9223372036854775807) 8) 27 | (t nil))) 28 | 29 | (defun unsigned-width (value) 30 | (cond 31 | ((bounded-by? value 0 255) 1) 32 | ((bounded-by? value 0 65535) 2) 33 | ((bounded-by? value 0 4294967295) 4) 34 | ((bounded-by? value 0 18446744073709551615) 8) 35 | (t nil))) 36 | 37 | (defmacro to-fixpoint (&body body) 38 | (let ((chg (gensym))) 39 | `(let ((,chg t)) 40 | (flet ((mark-changed () (setf ,chg t))) 41 | (tagbody 42 | header 43 | (setf ,chg nil) 44 | ,@body 45 | (if ,chg (go header))))))) 46 | 47 | (defun extend-vector-with-vector (source dest) 48 | (iter (for elt in-vector source) 49 | (vector-push-extend elt dest))) 50 | 51 | (defun print-codevector (cv) 52 | (iter (for byte in-vector cv) 53 | (format t "~2,'0X " byte)) 54 | (format t "~%")) 55 | 56 | (defun load-file-into-vector (file) 57 | (with-open-file (stream file :direction :input 58 | :element-type 'unsigned-byte) 59 | (let ((out (make-array (file-length stream)))) 60 | (read-sequence out stream) 61 | out))) 62 | 63 | (defun store-vector-into-file (vec file) 64 | (with-open-file (stream file 65 | :direction :output 66 | :if-exists :supersede 67 | :element-type 'unsigned-byte) 68 | (write-sequence vec stream))) 69 | 70 | (defun asciify-string (str) 71 | (map 'vector #'char-code str)) 72 | --------------------------------------------------------------------------------