├── .gitignore ├── .travis.yml ├── LICENSE.txt ├── README.md ├── disassemble ├── .gitignore ├── fcdisasm.rkt ├── main.rkt ├── nasm.rkt ├── pb.rkt ├── test.rkt ├── test0.rkt ├── test2.rkt ├── test_fp.rkt └── vm.rkt ├── info.rkt └── machine-code ├── disassembler.rkt └── disassembler ├── arm-a64.sls ├── arm-aarch64.sls ├── arm-private.sls ├── private.rkt ├── x86-opcodes.rkt └── x86.rkt /.gitignore: -------------------------------------------------------------------------------- 1 | compiled/ 2 | *~ 3 | 4 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | 3 | sudo: false 4 | 5 | env: 6 | global: 7 | # Supply a global RACKET_DIR environment variable. This is where 8 | # Racket will be installed. A good idea is to use ~/racket because 9 | # that doesn't require sudo to install and is therefore compatible 10 | # with Travis CI's newer container infrastructure. 11 | - RACKET_DIR=~/racket 12 | matrix: 13 | # Supply at least one RACKET_VERSION environment variable. This is 14 | # used by the install-racket.sh script (run at before_install, 15 | # below) to select the version of Racket to download and install. 16 | # 17 | # Supply more than one RACKET_VERSION (as in the example below) to 18 | # create a Travis-CI build matrix to test against multiple Racket 19 | # versions. 20 | # 21 | # The RELEASE snapshot is only available during the release process. 22 | 23 | 24 | 25 | 26 | - RACKET_VERSION=7.5 27 | - RACKET_VERSION=HEAD 28 | 29 | # You may want to test against certain versions of Racket, without 30 | # having them count against the overall success/failure. 31 | matrix: 32 | # Fast finish: Overall build result is determined as soon as any of 33 | # its rows have failed, or, all of its rows that aren't allowed to 34 | # fail have succeeded. 35 | fast_finish: true 36 | 37 | 38 | before_install: 39 | - git clone https://github.com/greghendershott/travis-racket.git 40 | - cat travis-racket/install-racket.sh | bash # pipe to bash not sh! 41 | - export PATH="${RACKET_DIR}/bin:${PATH}" #install-racket.sh can't set for us 42 | 43 | install: 44 | raco pkg install --deps search-auto 45 | 46 | before_script: 47 | 48 | # Here supply steps such as raco make, raco test, etc. 49 | # 50 | # Tip: Use `raco pkg install --deps search-auto ` to install any 51 | # required packages without getting stuck on a confirmation prompt. 52 | script: 53 | raco test -x -p disassemble 54 | 55 | # NOTE: If your repo is a Racket package with an info.rkt that 56 | # includes some `deps`, the following is more elegant: 57 | # 58 | # script: 59 | # - cd .. # Travis did a cd into the dir. Back up, for the next: 60 | # - raco pkg install --deps search-auto --link 61 | # - raco test -x -p 62 | 63 | after_script: 64 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2014 Sam Tobin-Hochstadt 2 | Copyright (C) 2008-2013 Göran Weinholt 3 | Copyright (C) 2009 Andreas Rottmann 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a 6 | copy of this software and associated documentation files (the "Software"), 7 | to deal in the Software without restriction, including without limitation 8 | the rights to use, copy, modify, merge, publish, distribute, sublicense, 9 | and/or sell copies of the Software, and to permit persons to whom the 10 | Software is furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in 13 | all copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 | THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 | FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 | DEALINGS IN THE SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![Build Status](https://api.travis-ci.org/samth/disassemble.svg)](https://travis-ci.org/samth/disassemble) 2 | 3 | A disassembler for JITed functions in Racket. 4 | 5 | To install: 6 | 7 | % raco pkg install disassemble 8 | 9 | To use it, try something like this: 10 | 11 | ``` 12 | [samth@punge:~/sw/disassemble (master) plt] racket 13 | Welcome to Racket v6.0.1.10. 14 | > (require disassemble) 15 | > (define (f x) 1) 16 | > (disassemble f) 17 | 0: 488943f8 (mov (mem64+ rbx #x-8) rax) 18 | 4: 4883c3f8 (add rbx #xfffffffffffffff8) 19 | 8: b803000000 (mov eax #x3) 20 | d: 4c8b75c8 (mov r14 (mem64+ rbp #x-38)) 21 | 11: 4883c428 (add rsp #x28) 22 | 15: 415d (pop r13) 23 | 17: 415c (pop r12) 24 | 19: 5b (pop rbx) 25 | 1a: 5d (pop rbp) 26 | 1b: c3 (ret) 27 | > 28 | ``` 29 | 30 | If you have `ndisasm` installed (and in your `PATH`) you can also try: 31 | 32 | ``` 33 | > (disassemble f #:program 'nasm) 34 | 00000000 488943F8 mov [rbx-0x8],rax 35 | 00000004 4883C3F8 add rbx,byte -0x8 36 | 00000008 B803000000 mov eax,0x3 37 | 0000000D 4C8B75C8 mov r14,[rbp-0x38] 38 | 00000011 4883C428 add rsp,byte +0x28 39 | 00000015 415D pop r13 40 | 00000017 415C pop r12 41 | 00000019 5B pop rbx 42 | 0000001A 5D pop rbp 43 | 0000001B C3 ret 44 | ``` 45 | 46 | This works only on x86 or x86-64. 47 | 48 | Also, the `dump` function writes the bytes of the machine code to a 49 | file: 50 | 51 | ``` 52 | > (dump const "file.bin") 53 | ``` 54 | 55 | Patches, uses, complaints, and suggestions are all welcome. 56 | 57 | The disassembly code (when not using NASM) is taken from Göran 58 | Weinholt's [_Machine Code_ library](https://gitlab.com/weinholt/machine-code). 59 | -------------------------------------------------------------------------------- /disassemble/.gitignore: -------------------------------------------------------------------------------- 1 | *~ 2 | -------------------------------------------------------------------------------- /disassemble/fcdisasm.rkt: -------------------------------------------------------------------------------- 1 | 2 | ;; -*- mode: scheme; coding: utf-8 -*- !# 3 | ;; fcdisasm - The Full-Color Disassembler 4 | ;; Copyright © 2008, 2009, 2010, 2011, 2016, 2017, 2018 Göran Weinholt 5 | ;; SPDX-License-Identifier: MIT 6 | #lang racket 7 | 8 | ;; This program is an example of how to use (industria disassembler) 9 | ;; and a novelty: first disassembler to go *full color* for the hex 10 | ;; dump! 11 | 12 | (require rnrs 13 | (only-in machine-code/disassembler 14 | invalid-opcode? 15 | available-disassemblers get-disassembler 16 | disassembler-max-instruction-size 17 | disassembler-instruction-getter)) 18 | (provide disassemble) 19 | 20 | ;; Print an instruction with hexadecimal numbers. 21 | (define (print-instr/sexpr i) 22 | (cond ((pair? i) 23 | (display "(") 24 | (let lp ((i i)) 25 | (unless (null? i) 26 | (print-instr/sexpr (car i)) 27 | (unless (null? (cdr i)) 28 | (display #\space) 29 | (lp (cdr i))))) 30 | (display ")")) 31 | ((and (number? i) (exact? i) (integer? i)) 32 | (display "#x") 33 | (display (number->string i 16))) 34 | (else 35 | (display i)))) 36 | 37 | #| 38 | 39 | The generated code for a function call in Chez Scheme is roughly 40 | 41 | (lea rcx (mem+ rip )) 42 | (mov rcx) 43 | (jmp ) 44 | ... 45 | 46 | 47 | where the describes the frame size, live variables, etc., and 48 | generally doesn't decode nicely. Make the dissasembler here reject an 49 | instruction that spans an address that was produced by a RIP-relative 50 | calculation, so it will sync back up with the return point (even if 51 | some amount of the is decoded as nonsense). 52 | 53 | The `ndisasm` executable has a similar built-in inference option, but 54 | since is only infers sync points based on jump targets, it doesn't 55 | manage to handle code generated by Chez Scheme. 56 | 57 | |# 58 | 59 | (define (disassemble p disassembler color end-position pc symbols relocations 60 | is-ip? is-jump? pc-rel-delta) 61 | (define (next-symbol symbols pc) 62 | (cond ((null? symbols) symbols) 63 | ((null? (cdr symbols)) symbols) 64 | ((or (> pc (cadar symbols)) 65 | (= pc (caadr symbols))) 66 | (next-symbol (cdr symbols) pc)) 67 | (else symbols))) 68 | (define get-instruction (disassembler-instruction-getter disassembler)) 69 | (define hex-width (+ 1 (* 2 (disassembler-max-instruction-size disassembler)))) 70 | (let ([jump-targets (get-jump-targets p get-instruction is-ip? is-jump? pc-rel-delta end-position pc)] 71 | [end-pc (and end-position (+ pc end-position))]) 72 | (let lp ((pos (port-position p)) 73 | (pc pc) 74 | (symbols (next-symbol symbols pc)) 75 | (data-mode? #f)) 76 | (let* ((tagged-bytes '()) 77 | (i (cond 78 | [(and data-mode? 79 | (not (jump-target? pc jump-targets))) 80 | (let loop ([len 0]) 81 | (cond 82 | [(= len 8) '(data)] 83 | [(or (jump-target? (+ pc len) jump-targets) 84 | (and (positive? len) 85 | (extract-relocation relocations (+ pc len) 1))) 86 | `(data)] 87 | [else 88 | (let ([b (read-byte p)]) 89 | (cond 90 | [(eof-object? b) 91 | (if (zero? len) 92 | eof 93 | '(data))] 94 | [else 95 | (set! tagged-bytes (cons (list 'data b) tagged-bytes)) 96 | (loop (add1 len))]))]))] 97 | [else 98 | (guard (con 99 | ((invalid-opcode? con) 100 | (list 'bad: 101 | (condition-message con)))) 102 | (get-instruction p 103 | (lambda x 104 | (set! tagged-bytes (cons x tagged-bytes)))))])) 105 | (new-pos (port-position p)) 106 | (new-pc (+ pc (- new-pos pos)))) 107 | ;; Print info from the symbol table 108 | (unless (null? symbols) 109 | (when (= pc (caar symbols)) 110 | (let ((sym (car symbols))) 111 | (newline) 112 | (when color (display "\x1b;[4m")) 113 | (display (number->string (car sym) 16)) 114 | (display #\-) 115 | (display (number->string (cadr sym) 16)) 116 | (when color (display "\x1b;[0m")) 117 | (display #\space) 118 | (display (caddr sym)) 119 | (newline)))) 120 | ;; Print instructions 121 | (unless (or (eof-object? i) 122 | (and end-position (> new-pos end-position))) 123 | (cond 124 | [(spanned-sync-point? pc jump-targets new-pc) 125 | ;; Emit data byte and retry at next byte 126 | (file-position p pos) 127 | (let ([b (read-byte p)]) 128 | (display-instruction pc (list (cons 'data (list b))) `(data ,b) '() color 1 relocations '()) 129 | (lp (add1 pos) (add1 pc) symbols #t))] 130 | [else 131 | (let ([new-sync-points (extract-sync-points new-pc i end-pc is-ip? pc-rel-delta)]) 132 | (display-instruction pc tagged-bytes i new-sync-points color (- new-pos pos) relocations jump-targets) 133 | (lp new-pos new-pc (next-symbol symbols new-pc) (or (is-jump? i) (eq? (car i) 'data))))])))))) 134 | 135 | (define (display-instruction pc tagged-bytes i derived color len relocations jump-targets) 136 | (define (display-addr addr) 137 | (let ((x (number->string addr 16))) 138 | (if (< (string-length x) 8) 139 | (display (make-string (- 8 (string-length x)) #\space))) 140 | (display x))) 141 | (display-addr pc) 142 | (display ": ") 143 | (for-each (lambda (x) 144 | (let ((tag (car x)) 145 | (bytes (cdr x))) 146 | (cond ((eq? tag '/is4) 147 | (when color 148 | (display "\x1b;[1;34m")) 149 | (display (number->string (bitwise-bit-field (car bytes) 4 8) 16)) 150 | (when color 151 | (display "\x1b;[1;37m")) 152 | (display (number->string (bitwise-bit-field (car bytes) 0 4) 16))) 153 | (else 154 | (when color 155 | (case tag 156 | ((modr/m sib tfr/exg/sex) (display "\x1b;[1;34m")) 157 | ((opcode) (display "\x1b;[1;32m")) 158 | ((prefix) (display "\x1b;[1;33m")) 159 | ((immediate) (display "\x1b;[1;37m")) 160 | ((disp offset) (display "\x1b;[1;35m")) 161 | (else (display "\x1b;[0m")))) 162 | (for-each (lambda (byte) 163 | (when (< byte #x10) 164 | (display #\0)) 165 | (display (number->string byte 16))) 166 | bytes))))) 167 | (reverse tagged-bytes)) 168 | (when color 169 | (display "\x1b;[0m")) 170 | (display (make-string (- 31 (* 2 len)) #\space)) 171 | (print-instr/sexpr i) 172 | (let ([a (extract-relocation relocations pc len)]) 173 | (cond 174 | [a (printf " ; ~.v" (cdr a))] 175 | [else 176 | (for-each (lambda (d) 177 | (printf " ; => ~x" d)) 178 | derived)])) 179 | (when (memv pc jump-targets) 180 | (display " ; <=")) 181 | (newline)) 182 | 183 | ;; Jump targets are sync points. 184 | ;; Find reachable code by recognizing jumps and skipping to the jump 185 | ;; target, accumulating those jump targets. By building up a list of 186 | ;; those targets, we can later disassemble by linearly scanning, 187 | ;; going into data mode when we see a jump and returning to code mode 188 | ;; when we reach a sync point. 189 | (define (get-jump-targets p get-instruction is-ip? is-jump? pc-rel-delta end-position start-pc) 190 | (define starting-pos (port-position p)) 191 | (define end-pc (and end-position (+ start-pc end-position))) 192 | (let lp ((pos (port-position p)) 193 | (pc start-pc) 194 | (sync-points '()) 195 | (to-explore '()) 196 | (explored (hasheqv))) 197 | (let ([explore-next (lambda (to-explore sync-points explored) 198 | (cond 199 | [(null? to-explore) 200 | (file-position p starting-pos) 201 | sync-points] 202 | [else 203 | (let* ([new-pc (car to-explore)] 204 | [pos (- new-pc start-pc)]) 205 | (file-position p pos) 206 | (lp pos new-pc sync-points (cdr to-explore) explored))]))]) 207 | (cond 208 | [(hash-ref explored pc #f) 209 | (explore-next to-explore sync-points explored)] 210 | [else 211 | (let* ((i (guard (con 212 | ((invalid-opcode? con) 213 | (list 'bad: 214 | (condition-message con)))) 215 | (get-instruction p void))) 216 | (new-pos (port-position p)) 217 | (new-pc (+ pc (- new-pos pos))) 218 | (explored (hash-set explored pc #t))) 219 | (cond 220 | [(or (eof-object? i) 221 | (and end-position (> new-pos end-position))) 222 | (explore-next to-explore sync-points explored)] 223 | [(spanned-sync-point? pc sync-points new-pc) 224 | (file-position p pos) 225 | (read-byte p) 226 | (lp (add1 pos) (add1 pc) sync-points)] 227 | [else 228 | (let* ([new-sync-points (extract-sync-points new-pc i end-pc is-ip? pc-rel-delta)] 229 | [sync-points (append new-sync-points sync-points)] 230 | [to-explore (append new-sync-points to-explore)]) 231 | (if (is-jump? i) 232 | (explore-next to-explore sync-points explored) 233 | (lp new-pos new-pc sync-points to-explore explored)))]))])))) 234 | 235 | (define (extract-sync-points pc i end-pc is-ip? pc-rel-delta) 236 | (let loop ([i i]) 237 | (cond 238 | [(null? i) '()] 239 | [else 240 | (let ([e (car i)]) 241 | (cond 242 | [(and (pair? e) 243 | (or (eq? (car e) '+) 244 | (eq? (car e) 'mem+)) 245 | (pair? (cdr e)) 246 | (is-ip? (cadr e)) 247 | (pair? (cddr e)) 248 | (null? (cdddr e)) 249 | (integer? (caddr e))) 250 | (let ([v (+ pc (caddr e) pc-rel-delta)]) 251 | (if (or (negative? v) 252 | (and end-pc (>= v end-pc))) 253 | (loop (cdr i)) 254 | (cons v (loop (cdr i)))))] 255 | [else 256 | (loop (cdr i))]))]))) 257 | 258 | (define (extract-relocation relocations pc len) 259 | (cond 260 | [(null? relocations) #f] 261 | [(<= pc (caar relocations) (+ pc (sub1 len))) 262 | (car relocations)] 263 | [else (extract-relocation (cdr relocations) pc len)])) 264 | 265 | (define (spanned-sync-point? pc sync-points new-pc) 266 | (let loop ([sync-points sync-points]) 267 | (if (null? sync-points) 268 | #f 269 | (let ([p (car sync-points)]) 270 | (or (< pc p new-pc) 271 | (loop (cdr sync-points))))))) 272 | 273 | (define (jump-target? pc sync-points) 274 | (memq pc sync-points)) 275 | #; 276 | (define (elf-architecture-symbol image) 277 | ;; XXX: Move to (machine-code format elf) 278 | (let ((machine (elf-image-machine image)) 279 | (endianness (elf-image-endianness image)) 280 | (entry (elf-image-entry image))) 281 | (cond ((and (= machine EM-ARM) (= (bitwise-and entry #b1) #b1)) 'arm-t32) 282 | ((and (= machine EM-ARM) (= (bitwise-and entry #b11) #b00)) 'arm-a32) 283 | ((= machine EM-AARCH6) 'arm-a64) 284 | ((= machine EM-386) 'x86-32) 285 | ((= machine EM-X86-64) 'x86-64) 286 | ((= machine EM-68HC12) 'm68hc12) 287 | ((= machine EM-MIPS) (if (= endianness ELFDATA2LSB) 'mipsel 'mipsbe)) 288 | (else 289 | (error 'elf-architecture-symbol 290 | "No support for this architecture" 291 | (cond ((assv machine elf-machine-names) => cdr) 292 | (else machine))))))) 293 | 294 | ;; Returns a list of (start-addr end-addr symbol) in increasing order. 295 | #;(define (parse-elf-symbols image) 296 | (cond ((elf-image-symbols image) => 297 | (lambda (symbols) 298 | (vector-sort! (lambda (s1 s2) 299 | (> (elf-symbol-value (cdr s1)) 300 | (elf-symbol-value (cdr s2)))) 301 | symbols) 302 | (let lp ((ret '()) 303 | (i 0)) 304 | (if (= i (vector-length symbols)) 305 | ret 306 | (let* ((sym (vector-ref symbols i)) 307 | (name (car sym)) (s (cdr sym))) 308 | (if (or (eqv? (elf-symbol-name s) 0) 309 | (eqv? (elf-symbol-shndx s) SHN-UNDEF)) 310 | (lp ret (+ i 1)) 311 | (lp (cons (list (elf-symbol-value s) 312 | (+ (elf-symbol-value s) 313 | (elf-symbol-size s)) 314 | name) 315 | ret) 316 | (+ i 1)))))))) 317 | (else '()))) 318 | 319 | #;(define (disassemble-file filename arch color) 320 | (cond ((is-elf-image? filename) 321 | (display "ELF image detected. Looking for .text section...\n") 322 | (let* ((image (open-elf-image filename)) 323 | (text (elf-image-section-by-name image ".text"))) 324 | (cond ((and text (= (elf-section-type text) SHT-PROGBITS)) 325 | (let ((arch (elf-architecture-symbol image)) 326 | (symbols (parse-elf-symbols image))) 327 | (set-port-position! (elf-image-port image) 328 | (elf-section-offset text)) 329 | (disassemble (elf-image-port image) 330 | (get-disassembler arch) 331 | color 332 | (+ (elf-section-offset text) 333 | (elf-section-size text)) 334 | (elf-section-addr text) 335 | symbols))) 336 | (else 337 | (display "This ELF image has no .text section with executable code.\n") 338 | (display "No disassembly for you.\n"))))) 339 | (else 340 | ;; Assume a DOS .com file. 341 | (disassemble (open-file-input-port filename) 342 | (get-disassembler (string->symbol arch)) 343 | color #f #x100 '())))) 344 | #; 345 | (define (parse-args args) 346 | (define (help . msg) 347 | (let ((x (current-error-port))) 348 | (when msg (display (car msg) x) (newline x) (newline x)) 349 | (display "fcdisasm - Full-color disassembler 350 | 351 | Usage: 352 | fcdisasm [-b|--bits ] [-a|--arch ] [--nocolor] [--] 353 | 354 | The argument can be either 16 (default), 32 or 64. This is 355 | shorthand for --arch x86-. 356 | 357 | The argument, used for raw binary files, is one of these: 358 | " x) 359 | (let lp ((arch* (available-disassemblers))) 360 | (unless (null? arch*) 361 | (display (car arch*)) 362 | (unless (null? (cdr arch*)) 363 | (display ", ")) 364 | (lp (cdr arch*)))) 365 | (display ".\n") 366 | (display " 367 | The --nocolor flag suppresses the color output. 368 | 369 | Author: Göran Weinholt . 370 | ") 371 | (exit 1))) 372 | (let lp ((filename #f) 373 | (color #t) 374 | (arch "x86-16") 375 | (args args)) 376 | (cond ((null? args) 377 | (unless filename 378 | (help "ERROR: No filename given.")) 379 | (values filename arch color)) 380 | ((or (string=? (car args) "--bits") 381 | (string=? (car args) "-b")) 382 | (if (null? (cdr args)) (help "ERROR: -b needs an argument (16, 32, 64)")) 383 | (cond ((assoc (cadr args) '(("64" . "x86-64") ("32" . "x86-32") ("16" . "x86-16"))) => 384 | (lambda (x) 385 | (lp filename color (cdr x) (cddr args)))) 386 | (else 387 | (help "ERROR: invalid argument for --bits flag")))) 388 | ((member (car args) '("-a" "--arch")) 389 | (if (null? (cdr args)) (help "ERROR: -a needs an argument")) 390 | (cond ((memq (string->symbol (cadr args)) (available-disassemblers)) => 391 | (lambda (_) 392 | (lp filename color (cadr args) (cddr args)))) 393 | (else 394 | (help "ERROR: invalid argument for --arch flag")))) 395 | ((string=? (car args) "--nocolor") 396 | (lp filename #f arch (cdr args))) 397 | ((string=? (car args) "--") 398 | (if (not (= (length args) 2)) (help "ERROR: following -- must be only a filename")) 399 | (if filename (help "ERROR: you can't have it both ways, use -- or don't")) 400 | (lp (cadr args) color arch (cddr args))) 401 | (else 402 | (if filename (help "ERROR: extra arguments on command line")) 403 | (lp (car args) color arch (cdr args)))))) 404 | 405 | ;; (define (main args) 406 | ;; (call-with-values (lambda () (parse-args args)) 407 | ;; disassemble-file)) 408 | 409 | ;; (main (cdr (command-line))) 410 | ;; (flush-output-port (current-output-port)) 411 | -------------------------------------------------------------------------------- /disassemble/main.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | (require racket/match racket/list ffi/unsafe racket/lazy-require 4 | version/utils racket/format 5 | (only-in machine-code/disassembler get-disassembler) 6 | (prefix-in fc: "fcdisasm.rkt") 7 | "pb.rkt" 8 | "vm.rkt") 9 | 10 | (lazy-require ("nasm.rkt" [nasm-disassemble])) 11 | 12 | (provide dump 13 | disassemble 14 | disassemble-ffi-function 15 | disassemble-bytes 16 | (rename-out [disassemble decompile])) 17 | 18 | (define go 19 | (case (system-type 'vm) 20 | [(racket) 21 | (define _mz_hash_key _short) 22 | (define _mzshort _int) 23 | 24 | 25 | ;; this struct is just to help get the start tag for procedures 26 | (define-cstruct _test_scheme_object 27 | ([typetag _short] 28 | [key _mz_hash_key])) 29 | 30 | (define (get-tag-num v) 31 | (test_scheme_object-typetag 32 | (cast v _scheme _test_scheme_object-pointer))) 33 | 34 | (define prim-type-number (get-tag-num values)) 35 | 36 | (define _scheme_type 37 | (_enum ;; from stypes.h 38 | `(prim_type = ,prim-type-number 39 | closed_prim_type 40 | closure_type 41 | case_closure_type 42 | cont_type 43 | escaping_cont_type 44 | proc_struct_type 45 | native_closure_type 46 | proc_chaperone_type) 47 | _short)) 48 | 49 | (define-cstruct _scheme_object 50 | ([typetag _scheme_type] 51 | [key _mz_hash_key])) 52 | 53 | ;; we assume that we're always in precise-gc mode 54 | (define _scheme_inclhash_object _scheme_object) 55 | 56 | (define-cstruct _scheme_lambda 57 | ([iso _scheme_inclhash_object] 58 | [num_params _mzshort] 59 | [max_let_depth _mzshort] 60 | [closure_size _mzshort] 61 | [closure_map (_cpointer _mzshort)] 62 | [body _scheme] 63 | [name _scheme] 64 | [tl_map _gcpointer] 65 | ;; more fields here for JIT 66 | )) 67 | 68 | (define-cstruct _scheme_closure 69 | ([so _scheme_object] 70 | [code _scheme_lambda-pointer] 71 | [vals _scheme])) 72 | 73 | (define-cstruct _native_lambda 74 | ([iso _scheme_inclhash_object] 75 | [start_code _fpointer] 76 | ;; either a void * tail_code (non-case-lambda) or mzshort * arities (case-lambda) 77 | [u _gcpointer] 78 | [arity_code _gcpointer] 79 | [max_let_depth _mzshort] 80 | [closure_size _mzshort] 81 | ;; either a 82 | ;; struct Scheme_Lambda *orig_code; /* For not-yet-JITted non-case-lambda */ or 83 | ;; Scheme_Object *name; 84 | [name _scheme] 85 | [tl_map _gcpointer] 86 | ;; a void** 87 | [retained _gcpointer])) 88 | 89 | (define-cstruct _scheme_native_closure 90 | ([so _scheme_object] 91 | [code _native_lambda-pointer] 92 | [vals _pointer])) 93 | 94 | (define find_jit_code_end (get-ffi-obj "scheme_jit_find_code_end" #f 95 | (_fun _gcpointer -> _gcpointer))) 96 | 97 | (define jit-now! (get-ffi-obj "scheme_jit_now" #f (_fun _racket -> _void))) 98 | 99 | (define (typeof v) (scheme_object-typetag (cast v _pointer _scheme_object-pointer))) 100 | 101 | (define (go name f #:size [size #f]) 102 | (unless (procedure? f) 103 | (raise-argument-error name "procedure" f)) 104 | (jit-now! f) 105 | (define fp (cast f _scheme _scheme_native_closure-pointer)) 106 | (unless (eq? 'native_closure_type (typeof fp)) 107 | (raise-argument-error name "non-primitive procedure" f)) 108 | (match (scheme_native_closure-code fp) 109 | [(native_lambda iso code u arity-code max-let-depth closure-size nm tl_map retained) 110 | (let* ([case? (< closure-size 0)] 111 | [closure-size (if case? 112 | (sub1 (- closure-size)) 113 | closure-size)] 114 | [tail-code (if case? #f u)] 115 | [num-arities (if case? closure-size #f)] 116 | [arities (cast u _gcpointer (_cpointer _mzshort))] 117 | [env (scheme_native_closure-vals fp)] 118 | [end (and tail-code (find_jit_code_end tail-code))] 119 | [end (and end (cast end _gcpointer _size))] 120 | [size (if end (- end 121 | (cast tail-code _gcpointer _size)) 122 | size)]) 123 | (when case? 124 | (error name "functions defined with `case-lambda' are not yet supported")) 125 | (unless tail-code 126 | (error name "unable to read jitted code")) 127 | (unless (or end size) 128 | (error name 129 | "unable to find the end of the jitted code, and no #:size supplied")) 130 | (cast tail-code _pointer (_bytes o size)))])) 131 | 132 | go] 133 | [(chez-scheme) 134 | (define code-pointer-adjust 1) 135 | (define code-prefix-words 8) ; see `code` in "cmacro.ss" 136 | 137 | (define inspect/object (vm-primitive 'inspect/object)) 138 | (define lock-object (vm-primitive 'lock-object)) 139 | (define unlock-object (vm-primitive 'unlock-object)) 140 | (define $object-address (vm-eval '($primitive $object-address))) 141 | (define foreign-ref (vm-primitive 'foreign-ref)) 142 | (define extract-procedure (vm-eval '(lambda (v) (extract-procedure v #f)))) 143 | 144 | (define (go name _f #:size [size #f]) 145 | (unless (procedure? _f) 146 | (raise-argument-error name "procedure" _f)) 147 | (define f (extract-procedure _f)) 148 | (define f-object (inspect/object f)) 149 | (define code-object (f-object 'code)) 150 | (define code (code-object 'value)) 151 | (lock-object code) 152 | (define code-p ($object-address code code-pointer-adjust)) 153 | (define length (foreign-ref 'uptr code-p (ctype-sizeof _intptr))) 154 | 155 | (define body-p (+ code-p (* code-prefix-words (ctype-sizeof _intptr)))) 156 | 157 | (define bstr (make-bytes length)) 158 | (memcpy bstr (cast body-p _uintptr _pointer) length) 159 | 160 | (unlock-object code) 161 | 162 | bstr) 163 | go] 164 | [else 165 | (error "unknown virtual machine")])) 166 | 167 | (define extract-relocations 168 | (case (system-type 'vm) 169 | [(chez-scheme) 170 | (define extract-procedure (vm-eval '(lambda (v) (extract-procedure v #f)))) 171 | (define inspect/object (vm-primitive 'inspect/object)) 172 | (lambda (f) 173 | ((((inspect/object (extract-procedure f)) 'code) 'reloc+offset) 'value))] 174 | [else 175 | (lambda (f) null)])) 176 | 177 | (define color #f) 178 | 179 | (define (detect-arch) 180 | (define arch (with-handlers ([exn:fail:contract? 181 | (lambda (exn) 182 | ;; No `(system-type 'arch)` before v7.9.0.6: 183 | (define sp (path->bytes (system-library-subpath #f))) 184 | (cond 185 | [(regexp-match? #rx#"i386" sp) 'i386] 186 | [(regexp-match? #rx#"x86_64" sp) 'x86_64] 187 | [(regexp-match? #rx#"aarch64" sp) 'aarch64] 188 | [else sp]))]) 189 | 190 | (let ([sys-target-type (system-type 'target-machine)]) 191 | ; first check target machine type in case we are targeting pb, 192 | ; in which case (system-type 'arch) will return the underlying 193 | ; machine arch and not the targeted pb variant 194 | (if (pb-arch? (symbol->string sys-target-type)) 195 | sys-target-type 196 | (system-type 'arch))))) 197 | (cond 198 | [(equal? arch 'i386) 'x86-32] 199 | [(equal? arch 'x86_64) 'x86-64] 200 | [(equal? arch 'aarch64) 'arm-a64] 201 | [(pb-arch? (symbol->string arch)) arch] 202 | [else (error 'disassemble "unsupported architecture: ~s" arch)])) 203 | 204 | (define (pb-arch? arch) 205 | (regexp-match? #rx#"^(t?)pb((?>32|64)?)((?>b|l)?)$" arch)) 206 | 207 | (define (get-pb-config pb-arch) 208 | (let ([parts (regexp-match #rx#"^(t?)pb((?>32|64)?)((?>b|l)?)$" pb-arch)]) 209 | (if parts 210 | (let 211 | ([t (second parts)] 212 | [bits (third parts)] 213 | [l (fourth parts)]) 214 | (pb-config 215 | (if (equal? bits #"32") '32 '64) 216 | (if (equal? l #"b") 'big 'little) 217 | (not (equal? t #"")))) 218 | #f))) 219 | 220 | ;; #f for arch is "auto-detect" 221 | (define (disassemble f #:program [prog #f] #:arch [arch #f]) 222 | (disassemble-bytes (go 'disassemble f) 223 | #:program prog #:arch arch 224 | #:relocations (extract-relocations f))) 225 | 226 | (define (disassemble-ffi-function fptr #:size s #:program [prog #f] #:arch [arch #f]) 227 | (disassemble-bytes (cast fptr _pointer (_bytes o s)) 228 | #:program prog)) 229 | 230 | (define (disassemble-bytes bs 231 | #:arch [arch #f] 232 | ;; `prog` is 'nasm or #f 233 | #:program [prog #f] 234 | ;; `relocations` is (list (cons ) ...) 235 | #:relocations [relocations '()]) 236 | (let ([arch (or arch (detect-arch))]) 237 | (case prog 238 | [(nasm) (display (nasm-disassemble bs))] 239 | [else 240 | 241 | (cond 242 | [(pb-arch? (symbol->string arch)) 243 | (pb-disassemble bs (get-pb-config (symbol->string arch)) relocations)] 244 | [else 245 | (fc:disassemble (open-input-bytes bs) 246 | (get-disassembler arch) 247 | color #f 0 '() 248 | ;; Convert relocations to mutable-pair associations: 249 | (let loop ([relocations relocations]) 250 | (if (null? relocations) 251 | '() 252 | (let ([p (car relocations)]) 253 | (mcons (mcons (cdr p) (car p)) 254 | (loop (cdr relocations)))))) 255 | ;; recognize instruction-pointer register: 256 | (case arch 257 | [(x86-32) (lambda (x) (eq? x 'eip))] 258 | [(x86-64) (lambda (x) (eq? x 'rip))] 259 | [(arm-a64) (lambda (x) (eq? x 'pc))] 260 | [else (error "ip recognizer missing")]) 261 | ;; recognize unconditional jump instructions: 262 | (case arch 263 | [(x86-32 x86-64) 264 | (lambda (i) 265 | (memq (mcar i) '(jmp)))] 266 | [(arm-a64) 267 | (lambda (i) 268 | (memq (mcar i) '(b br)))] 269 | [else (error "jump recognizer missing")]) 270 | ;; implicit delta on ip-relative calculations: 271 | (case arch 272 | [(arm-a64) -4] 273 | [else 0]))])]))) 274 | 275 | (provide get-code-bytes) 276 | (define (get-code-bytes f) (go 'get-code-bytes f)) 277 | 278 | (define (dump f file-name) 279 | (define bs (go 'dump f)) 280 | (let ((file (open-output-file file-name #:exists 'replace))) 281 | (write-bytes bs file) 282 | (close-output-port file))) 283 | -------------------------------------------------------------------------------- /disassemble/nasm.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | 3 | ;; avoid test errors when nasm isn't available 4 | (module* test racket/base) 5 | 6 | (require racket/file racket/port racket/system racket/match) 7 | 8 | (provide nasm-disassemble) 9 | 10 | ;; Taken from Larceny, written by Felix Klock 11 | 12 | ;;; Experimental library to get us semi-reliable IA32 disassembly by 13 | ;;; delegating the job to the nasm disassembler ndisasm. 14 | ;;; 15 | ;;; Note that the output can be misleading especially because we 16 | ;;; currently encode exception codes directly in the instruction 17 | ;;; stream, which the disassembler has no knowledge of. (To find this 18 | ;;; in the IAssassin backend, just search for the token 'dwords'; that 19 | ;;; is the directive for emitting constants in Sassy.) 20 | 21 | (define stype (system-type)) 22 | 23 | (define nasm-executable-name 24 | (case stype 25 | [(windows) "ndisasm.exe"] 26 | [else "ndisasm"])) 27 | 28 | (define nasm-path 29 | (find-executable-path nasm-executable-name)) 30 | 31 | (unless nasm-path 32 | (error 'disassemble "unable to find the `ndisasm' executable")) 33 | 34 | (define nasm-help-text 35 | (let () 36 | (define p (open-output-string)) 37 | (match-define (list _ in pid _ proc) (process*/ports p #f 'stdout nasm-path "-h")) 38 | (proc 'wait) 39 | (close-output-port in) 40 | (get-output-string p))) 41 | 42 | (define systype (system-type 'word)) 43 | 44 | (unless (regexp-match (regexp-quote (number->string systype)) 45 | nasm-help-text) 46 | (error 'nasm "this version of ndisasm does not support ~a-bit disassembly" systype)) 47 | 48 | (define (nasm-disassemble-bytevector bv) 49 | (let ([tempfile (make-temporary-file "nasmtemp~a.o")]) 50 | (let ((out (open-output-file tempfile #:exists 'truncate))) 51 | (write-bytes bv out) 52 | (flush-output out) 53 | (close-output-port out)) 54 | (define p (open-output-string)) 55 | (match-define (list _ in pid _ proc) 56 | (process*/ports p #f 'stdout nasm-path "-b" 57 | (number->string systype) 58 | (path->string tempfile))) 59 | (proc 'wait) 60 | (delete-file tempfile) 61 | (close-output-port in) 62 | (get-output-string p))) 63 | 64 | (define (nasm-disassemble x) 65 | (cond 66 | ((bytes? x) 67 | (nasm-disassemble-bytevector x)) 68 | (else 69 | (raise-argument-error 'nasm-disassemble "bytes" x)))) 70 | 71 | -------------------------------------------------------------------------------- /disassemble/pb.rkt: -------------------------------------------------------------------------------- 1 | #lang racket 2 | 3 | (require (for-syntax racket/syntax)) 4 | (require racket/fixnum) 5 | (provide pb-disassemble 6 | pb-config) 7 | 8 | (define pb-instruction-byte-size 4) 9 | 10 | (define pb-nop 0) 11 | (define pb-literal 1) 12 | (define pb-mov-16-group-start 2) 13 | 14 | (define pb-mov-group-start 10) 15 | (define pb-binop-group-start 22) 16 | (define pb-cmp-group-start 74) 17 | (define pb-fp-binop-group-start 92) 18 | (define pb-unop-group-start 118) 19 | (define pb-fp-unop-group-start 122) 20 | (define pb-fp-cmp-op-group-start 126) 21 | (define pb-rev-op-group-start 144) 22 | (define pb-ld-group-start 164) 23 | (define pb-st-group-start 184) 24 | (define pb-b-group-start 204) 25 | (define pb-b*-group-start 210) 26 | (define pb-return 213) 27 | (define pb-interp 214) 28 | (define pb-adr 215) 29 | (define pb-chunk 228) 30 | 31 | ;; Note: 32 | ;; Currently, every instruction is implemented except for the following: 33 | ;; pb-call, pb-inc, pb-lock, pb-cas, pb-call-arena-{in, out}, pb-stack-call, pb-fence 34 | 35 | ;; PB instruction shapes. All instructions are 4 bytes in length 36 | ; di/dr 37 | ;; ----------------------------------------------- 38 | ;; | op | reg | immed/reg | 39 | ;; ----------------------------------------------- 40 | ;; dri/drr 41 | ;; ----------------------------------------------- 42 | ;; | op | reg | reg | immed/reg | 43 | ;; ----------------------------------------------- 44 | ;; di 45 | ;; ----------------------------------------------- 46 | ;; | op | reg | immed | 47 | ;; ----------------------------------------------- 48 | ;; i 49 | ;; ----------------------------------------------- 50 | ;; | op | immed | 51 | ;; ----------------------------------------------- 52 | 53 | ;; Helper Macro for defining a simple enumeration datatype. 54 | ;; Given a `enum-name` and field_1, ... field_n, defines the following: 55 | ;; A struct, 56 | ;; (struct -struct [enum-fields]) 57 | ;; where enum-fields is a list of symbols representing the fields 58 | ;; An instance of the struct called 59 | ;; A constant named each of field_1, ..., field_n for easy access to each of the enum fields 60 | 61 | (define-syntax (define/enum stx) 62 | (syntax-case stx () 63 | [(_ name fields ...) 64 | (let* ([field-names (syntax->datum #'(fields ...))] 65 | [field-symbols (map (lambda (field) (string->symbol (format "~a" field))) field-names)]) 66 | (with-syntax ([struct-name (format-id stx "~a-struct" (syntax-e #'name))] 67 | [instance-name (format-id stx "~a" (syntax-e #'name))] 68 | [count-name (format-id stx "~a-count" (syntax-e #'name))]) 69 | #`(begin 70 | (struct struct-name [enum-fields] #:transparent) 71 | (define instance-name (struct-name '#,field-symbols)) 72 | (define-syntax count-name #,(length (syntax->datum #'(fields ...)))) 73 | 74 | #,(let loop ([fields* (syntax->datum #'(fields ...))] [i 0]) 75 | (cond 76 | [(null? fields*) (void)] 77 | [else 78 | (with-syntax ([const-name 79 | (datum->syntax stx (string->symbol (format "~a" (car fields*))))]) 80 | #`(begin 81 | (define const-name #,i) 82 | #,(loop (cdr fields*) (+ 1 i))))])))))])) 83 | 84 | ;; Returns the field names for a corresponding enum by accessing the `enum-fields` 85 | ;; of the corresponding instance 86 | (define-syntax (enum-fields stx) 87 | (syntax-case stx () 88 | [(_ enum-name) 89 | (with-syntax ([accessor-name (format-id stx "~a-struct-enum-fields" (syntax-e #'enum-name))]) 90 | #'(accessor-name enum-name))])) 91 | 92 | (define-syntax (enum-field-count stx) 93 | (syntax-case stx () 94 | [(_ enum-name) #`(length (enum-fields enum-name))])) 95 | 96 | ;; PB instructions come in groups of different variants. For example, in the chez scheme backend, binops are defined as: 97 | ;; [pb-bin-op pb-signals pb-binaries pb-argument-types] 98 | ;; where pb-signals is an enum representing each possible signal, pb-binaries is an enum representing each binop, 99 | ;; and pb-argument-types represents immediate or register variants. Taking the product of these options 100 | ;; yields the different binop variants. 101 | 102 | ;; On the disassembler end, we can use the following macro, deconstruct-op, which is given an opcode and 103 | ;; a list of known enum options that have been multiplied to form the opcode, and we can repeatedly 104 | ;; take the modulus of the opcode and number of enum fields and "divide away" the number of enum fields 105 | ;; in order to figure out exactly which variant an opcode corresponds to. 106 | (define-syntax (deconstruct-op stx) 107 | (syntax-case stx () 108 | [(_ op base [name enum] ... body) 109 | #` 110 | (let ([rel (- op base)]) 111 | #, 112 | (let loop ([names (syntax->datum #'(name ...))] [enums (syntax->datum #'(enum ...))] [div 1]) 113 | (cond 114 | [(null? names) #'body] 115 | [else 116 | (with-syntax ([name (format-id stx "~a" (car names))] 117 | [enum-name (format-id stx "~a" (car enums))] 118 | [enum-count (format-id stx "~a-count" (car enums))]) 119 | #`(let ([name (remainder (quotient rel #,div) #,(syntax-local-value #'enum-count))]) 120 | #,(loop (cdr names) (cdr enums) (* div (syntax-local-value #'enum-count)))))])))])) 121 | 122 | (define/enum pb-argument-types pb-register pb-immediate) 123 | 124 | (define/enum pb-shift pb-shift0 pb-shift1 pb-shift2 pb-shift3) 125 | 126 | (define/enum pb-zk zero-bits keep-bits) 127 | 128 | (define/enum pb-mov-types 129 | pb-i->i 130 | pb-d->d 131 | pb-i->d 132 | pb-d->i 133 | pb-s->d 134 | pb-d->s 135 | pb-d->s->d 136 | pb-i-bits->d-bits ; 64-bit only 137 | pb-d-bits->i-bits ; 64-bit only 138 | pb-i-i-bits->d-bits ; 32-bit only 139 | pb-d-lo-bits->i-bits ; 32-bit only 140 | pb-d-hi-bits->i-bits) 141 | 142 | (define/enum pb-signal-types pb-no-signal pb-signal) 143 | 144 | (define/enum pb-sizes 145 | pb-int8 146 | pb-uint8 147 | pb-int16 148 | pb-uint16 149 | pb-int32 150 | pb-uint32 151 | pb-int64 152 | pb-uint64 153 | pb-single 154 | pb-double) 155 | 156 | (define/enum pb-regs 157 | pb-reg-tc 158 | pb-reg-sfp 159 | pb-reg-ap 160 | pb-reg-trap 161 | pb-reg-ac0 162 | pb-reg-xp 163 | pb-reg-ts 164 | pb-reg-td 165 | pb-reg-cp 166 | pb-reg-r9 167 | pb-reg-r10 168 | pb-reg-r11 169 | pb-reg-r12 170 | pb-reg-r13 171 | pb-reg-r14 172 | pb-reg-r15 173 | pb-reg-fp1 174 | pb-reg-fp2 175 | pb-reg-fp3 176 | pb-reg-fp4 177 | pb-reg-fp5 178 | pb-reg-fp6 179 | pb-reg-fp7 180 | pb-reg-fp8) 181 | 182 | (define/enum pb-binaries 183 | pb-add 184 | pb-sub 185 | pb-mul 186 | pb-div 187 | pb-subz 188 | pb-subp 189 | pb-and 190 | pb-ior 191 | pb-xor 192 | pb-lsl 193 | pb-lsr 194 | pb-asr 195 | pb-lslo) 196 | 197 | (define/enum pb-unaries pb-not pb-sqrt) 198 | 199 | (define/enum pb-branches pb-fals pb-true pb-always) 200 | 201 | (define/enum pb-cmp-ops pb-eq pb-lt pb-gt pb-le pb-ge pb-ab pb-bl pb-cs pb-cc) 202 | 203 | (define pb-size-names 204 | (vector "int8" "uint8" "int16" "uint16" "int32" "uint32" "int64" "uint64" "single" "double")) 205 | 206 | (define pb-mov-type-names 207 | (vector "i->i" 208 | "d->d" 209 | "i->d" 210 | "d->i" 211 | "s->d" 212 | "d->s" 213 | "d->s->d" 214 | "i-bits->d-bits" 215 | "d-bits->i-bits" 216 | "i-i-bits->d-bits" 217 | "d-lo-bits->i-bits" 218 | "d-hi-bits->i-bits")) 219 | 220 | (define pb-binop-names 221 | (vector "add" "sub" "mul" "div" "subz" "subp" "and" "ior" "xor" "lsl" "lsr" "asr" "lslo")) 222 | 223 | (define pb-fp-binop-names (vector-map (lambda (n) (format "fp-~a" n)) pb-binop-names)) 224 | 225 | (define pb-unop-names (vector "not" "sqrt")) 226 | 227 | (define pb-fp-unop-names (vector-map (lambda (n) (format "fp-~a" n)) pb-unop-names)) 228 | 229 | (define pb-branch-names (vector "bfalse" "btrue" "b")) 230 | 231 | (define pb-cmp-names (vector "eq" "lt" "gt" "le" "ge" "ab" "bl" "cs" "cc")) 232 | 233 | (define pb-fp-cmp-op-names (vector-map (lambda (n) (format "fp-~a" n)) pb-cmp-names)) 234 | 235 | (define reg-names 236 | (vector "tc" "sfp" "ap" "trap" "ac0" "xp" "ts" "td" "cp" "r9" "r10" "r11" "r12" "r13" "r14" "r15")) 237 | 238 | (define fp-reg-names (vector "fp1" "fp2" "fp3" "fp4" "fp5" "fp6" "fp7" "fp8")) 239 | 240 | ;; Helpers for formatting instructions 241 | 242 | (define (format-instr-parts op-name properties) 243 | (format "(~s ~s)" op-name (string-join properties " "))) 244 | 245 | (define (format-reg r [fp #f]) 246 | (let ([reg-name (if fp (vector-ref fp-reg-names r) (vector-ref reg-names r))]) 247 | (format "%~a" reg-name))) 248 | 249 | (define (s-ext imm imm-sz) 250 | (let ([sign (arithmetic-shift imm (- (sub1 imm-sz)))]) (+ (- (arithmetic-shift sign imm-sz)) imm))) 251 | 252 | (define (format-imm imm imm-sz sgn?) 253 | (if sgn? (format "(imm ~a)" (s-ext imm imm-sz)) (format "(imm #x~x)" imm))) 254 | 255 | (define (format-offset off) 256 | (format "(offset #x~x)" off)) 257 | 258 | (define (format-props props) 259 | (string-join (filter non-empty-string? (map format-prop props)))) 260 | 261 | (define (format-instr/dri op-name dest reg imm props [is-fp? '(#f #f #f)]) 262 | (let ([fmt-props (format-props props)]) 263 | (if (equal? fmt-props "") 264 | (format "(~a ~a ~a ~a)" 265 | op-name 266 | (format-reg dest (first is-fp?)) 267 | (format-reg reg (second is-fp?)) 268 | (format-imm imm 0 #f)) 269 | (format "(~a ~a ~a ~a ~a)" 270 | op-name 271 | (format-reg dest (first is-fp?)) 272 | (format-reg reg (second is-fp?)) 273 | (format-imm imm 0 #f) 274 | (format-props props))))) 275 | 276 | (define (format-instr/dir op-name dest reg imm props [is-fp? '(#f #f #f)]) 277 | (let ([fmt-props (format-props props)]) 278 | (if (equal? fmt-props "") 279 | (format "(~a ~a ~a ~a)" 280 | op-name 281 | (format-reg dest (first is-fp?)) 282 | (format-imm imm 0 #f) 283 | (format-reg reg (second is-fp?))) 284 | (format "(~a ~a ~a ~a ~a)" 285 | op-name 286 | (format-reg dest (first is-fp?)) 287 | (format-imm imm 0 #f) 288 | (format-reg reg (second is-fp?)) 289 | fmt-props)))) 290 | 291 | (define (format-instr/di op dst imm props) 292 | (let ([fmt-props (format-props props)]) 293 | (if (equal? fmt-props "") 294 | (format "(~a ~a ~a)" op (format-reg dst) (format-imm imm 0 #f)) 295 | (format "(~a ~a ~a ~a)" op (format-reg dst) (format-imm imm 0 #f) fmt-props)))) 296 | 297 | (define (format-instr/dr op dst reg props [is-fp? '(#f #f)]) 298 | (let ([fmt-props (format-props props)]) 299 | (if (equal? fmt-props "") 300 | (format "(~a ~a ~a)" op (format-reg dst (first is-fp?)) (format-reg reg (second is-fp?))) 301 | (format "(~a ~a ~a ~a)" 302 | op 303 | (format-reg dst (first is-fp?)) 304 | (format-reg reg (second is-fp?)) 305 | fmt-props)))) 306 | 307 | (define (format-instr/drr op dst r1 r2 props [is-fp? '(#f #f #f)]) 308 | (let ([fmt-props (format-props props)]) 309 | (if (equal? fmt-props "") 310 | (format "(~a ~a ~a ~a)" 311 | op 312 | (format-reg dst (first is-fp?)) 313 | (format-reg r1 (second is-fp?)) 314 | (format-reg r2 (third is-fp?))) 315 | (format "(~a ~a ~a ~a ~a)" 316 | op 317 | (format-reg dst (first is-fp?)) 318 | (format-reg r1 (second is-fp?)) 319 | (format-reg r2 (third is-fp?)) 320 | fmt-props)))) 321 | 322 | (define (format-instr/d op dest) 323 | (format "(~a ~a)" op (format-reg dest))) 324 | 325 | (define (as-offset imm) 326 | (s-ext imm 24)) 327 | 328 | (define (format-label-imm label imm im-sz sgn?) 329 | (format "(label ~a ~a)" label (format-imm imm im-sz sgn?))) 330 | 331 | (define (format-instr/i op imm label im-sz sgn?) 332 | (format "(~a ~a)" 333 | op 334 | (if (equal? label "") (format-imm imm im-sz sgn?) (format-label-imm label imm im-sz sgn?)))) 335 | 336 | ; Properties which may be associated with an instruction 337 | (struct zero/keep [zk]) 338 | (struct shift [s]) 339 | (struct mov-type [mt]) 340 | (struct signal [s]) 341 | 342 | 343 | (define (format-prop prop) 344 | (match prop 345 | [(zero/keep zk) (if (equal? zk keep-bits) "#:keep-bits #t" "")] 346 | [(shift s) (if (> s 0) (format "#:shift ~a" s) "")] 347 | [(mov-type mt) (if (not (member mt (list pb-i->i pb-d->d))) 348 | (format "#:mov-type ~a" (vector-ref pb-mov-type-names mt)) "")] 349 | [(signal s) (if (equal? s pb-signal) "#:signal #t" "")])) 350 | 351 | (define (format/pb-mov16 s zk reg imm) 352 | (format-instr/di "mov-16" reg imm (list (zero/keep zk) (shift s)))) 353 | 354 | ; Utilities for decoding pb instructions 355 | (define (decode/pb-mov16 instr) 356 | (let* ([op (instr-op instr)] 357 | [rel (- op pb-mov-16-group-start)] 358 | [shift (remainder rel 4)] 359 | [zero/keep (quotient rel 4)] 360 | [reg (instr-di-dest instr)] 361 | [imm (instr-di-imm instr)]) 362 | (format/pb-mov16 shift zero/keep reg imm))) 363 | 364 | (define (decode/pb-mov instr) 365 | (deconstruct-op 366 | (instr-op instr) 367 | pb-mov-group-start 368 | [movt pb-mov-types] 369 | (cond 370 | [(equal? mov-type pb-i-i-bits->d-bits) "(unsupported)"] 371 | [else 372 | (format-instr/dr "mov" (instr-dr-dest instr) (instr-dr-reg instr) (list (mov-type movt)))]))) 373 | 374 | (define (decode/pb-binop instr) 375 | (deconstruct-op (instr-op instr) 376 | pb-binop-group-start 377 | [drr/dri pb-argument-types] 378 | [op-kind pb-binaries] 379 | [sig pb-signal-types] 380 | (cond 381 | [(equal? drr/dri pb-register) 382 | (format-instr/drr (vector-ref pb-binop-names op-kind) 383 | (instr-drr-dest instr) 384 | (instr-drr-reg1 instr) 385 | (instr-drr-reg2 instr) 386 | (list (signal sig)))] 387 | [(equal? drr/dri pb-immediate) 388 | (format-instr/dri (vector-ref pb-binop-names op-kind) 389 | (instr-dri-dest instr) 390 | (instr-dri-reg instr) 391 | (instr-dri-imm instr) 392 | (list (signal sig)))]))) 393 | 394 | (define (decode/pb-fp-unop instr) 395 | (deconstruct-op 396 | (instr-op instr) 397 | pb-fp-unop-group-start 398 | [dr/di pb-argument-types] 399 | [op-kind pb-unaries] 400 | (cond 401 | [(equal? dr/di pb-register) 402 | (format-instr/dr (vector-ref pb-fp-unop-names op-kind) 403 | (instr-dr-dest instr) 404 | (instr-dr-reg instr) 405 | '() 406 | '(#t #t))] 407 | [else (error 'pb-disassemble "floating-point instruction cannot have dri variant")]))) 408 | 409 | (define (decode/pb-cmp instr) 410 | (let* ([op (instr-op instr)] 411 | [rel (- op pb-cmp-group-start)] 412 | [dr/di (remainder rel (enum-field-count pb-argument-types))] 413 | [op-kind (remainder (quotient rel (enum-field-count pb-argument-types)) 414 | (enum-field-count pb-cmp-ops))]) 415 | (cond 416 | [(equal? dr/di pb-register) 417 | (format-instr/dr (vector-ref pb-cmp-names op-kind) 418 | (instr-dr-dest instr) 419 | (instr-dr-reg instr) 420 | '())] 421 | [(equal? dr/di pb-immediate) 422 | (format-instr/di (vector-ref pb-cmp-names op-kind) 423 | (instr-di-dest instr) 424 | (instr-di-imm instr) 425 | '())]))) 426 | 427 | (define (decode/pb-fp-binop instr) 428 | (deconstruct-op 429 | (instr-op instr) 430 | pb-fp-binop-group-start 431 | [drr/dri pb-argument-types] 432 | [op-kind pb-binaries] 433 | (begin 434 | (cond 435 | [(equal? drr/dri pb-register) 436 | (format-instr/drr (vector-ref pb-fp-binop-names op-kind) 437 | (instr-drr-dest instr) 438 | (instr-drr-reg1 instr) 439 | (instr-drr-reg2 instr) 440 | '() 441 | '(#t #t #t))] 442 | [else (error 'pb-disassemble "floating-point instruction cannot have dri variant")])))) 443 | 444 | (define (decode/pb-unop instr) 445 | (deconstruct-op (instr-op instr) 446 | pb-unop-group-start 447 | [dr/di pb-argument-types] 448 | [op-kind pb-unaries] 449 | (cond 450 | [(equal? dr/di pb-immediate) 451 | (format-instr/di (vector-ref pb-unop-names op-kind) 452 | (instr-di-dest instr) 453 | (instr-di-imm instr) 454 | '())] 455 | [(equal? dr/di pb-register) 456 | (format-instr/dr (vector-ref pb-unop-names op-kind) 457 | (instr-dr-dest instr) 458 | (instr-dr-reg instr) 459 | '())]))) 460 | 461 | (define (decode/pb-fp-cmp-op instr) 462 | (deconstruct-op 463 | (instr-op instr) 464 | pb-fp-cmp-op-group-start 465 | [drr/dri pb-argument-types] 466 | [op-kind pb-cmp-ops] 467 | (cond 468 | [(equal? drr/dri pb-register) 469 | (format-instr/dr (vector-ref pb-fp-cmp-op-names op-kind) 470 | (instr-dr-dest instr) 471 | (instr-dr-reg instr) 472 | '() 473 | '(#t #t))] 474 | [else (error 'pb-disassemble "floating point instruction cannot have dri variant")]))) 475 | 476 | (define (decode/pb-rev-op instr) 477 | (deconstruct-op (instr-op instr) 478 | pb-rev-op-group-start 479 | [dr/di pb-argument-types] 480 | [sz pb-sizes] 481 | (cond 482 | [(equal? dr/di pb-register) 483 | (format-instr/dr (format "rev~a" (vector-ref pb-size-names sz)) 484 | (instr-dr-dest instr) 485 | (instr-dr-reg instr) 486 | '())] 487 | [else (error 'pb-disassemble "rev instruction canot have di variant")]))) 488 | 489 | (define (decode/pb-ld-op instr) 490 | (deconstruct-op (instr-op instr) 491 | pb-ld-group-start 492 | [drr/dri pb-argument-types] 493 | [sz pb-sizes] 494 | (let ([fp (or (equal? sz pb-single) (equal? sz pb-double))]) 495 | (cond 496 | [(equal? drr/dri pb-register) 497 | (format-instr/drr (format "ld-~a" (vector-ref pb-size-names sz)) 498 | (instr-drr-dest instr) 499 | (instr-drr-reg1 instr) 500 | (instr-drr-reg2 instr) 501 | '() 502 | (list fp #f #f))] 503 | [(equal? drr/dri pb-immediate) 504 | (format-instr/dri (format "ld-~a" (vector-ref pb-size-names sz)) 505 | (instr-dri-dest instr) 506 | (instr-dri-reg instr) 507 | (instr-dri-imm instr) 508 | '() 509 | (list fp #f #f))])))) 510 | 511 | (define (decode/pb-st-op instr) 512 | (deconstruct-op (instr-op instr) 513 | pb-st-group-start 514 | [drr/dri pb-argument-types] 515 | [sz pb-sizes] 516 | (let ([fp (or (equal? sz pb-double) (equal? sz pb-single))]) 517 | (cond 518 | [(equal? drr/dri pb-register) 519 | (format-instr/drr (format "st-~a" (vector-ref pb-size-names sz)) 520 | (instr-drr-dest instr) 521 | (instr-drr-reg1 instr) 522 | (instr-drr-reg2 instr) 523 | '() 524 | (list fp #f #f))] 525 | [(equal? drr/dri pb-immediate) 526 | (format-instr/dir (format "st-~a" (vector-ref pb-size-names sz)) 527 | (instr-dri-dest instr) 528 | (instr-dri-reg instr) 529 | (instr-dri-imm instr) 530 | '() 531 | (list fp #f #f))])))) 532 | 533 | ; simple binary search used for searching 534 | ; through a list of labels sorted by offset 535 | (define (bsearch cmp vec e start end) 536 | (if (< start end) 537 | (let* ([mid (quotient (+ end start) 2)] [cur (vector-ref vec mid)] [c (cmp e cur)]) 538 | (cond 539 | [(equal? c 0) cur] 540 | [(equal? c -1) (bsearch cmp vec e start mid)] 541 | [(equal? c 1) (bsearch cmp vec e (+ 1 mid) end)])) 542 | #f)) 543 | 544 | (define (cmp-label-offset offset lbl) 545 | (cond 546 | [(equal? offset (label-offset lbl)) 0] 547 | [(< offset (label-offset lbl)) -1] 548 | [else 1])) 549 | 550 | (define (decode/pb-b-op instr i labels) 551 | (deconstruct-op 552 | (instr-op instr) 553 | pb-b-group-start 554 | [r/i pb-argument-types] 555 | [b-type pb-branches] 556 | (cond 557 | [(equal? r/i pb-register) 558 | (format-instr/d (vector-ref pb-branch-names b-type) (instr-dr-reg instr))] 559 | [(equal? r/i pb-immediate) 560 | (let* ([target (+ (* pb-instruction-byte-size (+ 1 i)) (get-branch-target instr))] 561 | ; perform a binary search to attempt to find a label associated 562 | ; with the current offset in our list of collected labels 563 | [label (bsearch cmp-label-offset labels target 0 (vector-length labels))]) 564 | (format-instr/i (vector-ref pb-branch-names b-type) 565 | (instr-i-imm instr) 566 | (if label (label-name label) "") 567 | 24 568 | #t))]))) 569 | 570 | (define (decode/pb-b*-op instr) 571 | (deconstruct-op (instr-op instr) 572 | pb-b*-group-start 573 | [dr/di pb-argument-types] 574 | (cond 575 | [(equal? dr/di pb-register) 576 | (format-instr/dr "b*" (instr-dr-dest instr) (instr-dr-reg instr) '())] 577 | [(equal? dr/di pb-immediate) 578 | (format-instr/di "b*" (instr-di-dest instr) (instr-di-imm instr) '())]))) 579 | 580 | (define (decode/pb-adr-op instr) 581 | (format-instr/di "adr" 582 | (instr-adr-dest instr) 583 | ; immediate for pb-adr is an instruction-level offset 584 | (* (instr-adr-imm instr) pb-instruction-byte-size) 585 | '())) 586 | 587 | (define decode/pb-nop "(nop)") 588 | 589 | (define (decode/pb-literal-op instr num-words) 590 | (format-instr/d "literal" (instr-di-dest instr))) 591 | 592 | (define (decode/pb-return-op instr) 593 | "(return)") 594 | 595 | (define (decode/pb-interp-op instr) 596 | (format-instr/d "interp" (instr-d-dest instr))) 597 | 598 | (define (decode/pb-chunk-op instr) 599 | (let ([outer-idx (instr-ii-high instr)] [inner-idx (instr-ii-low instr)]) 600 | (format "(pb-chunk #:outer-idx ~a #:inner-idx ~a)" outer-idx inner-idx))) 601 | 602 | ; Helpers for extracting components of a pb instruction. Taken from ChezScheme's pbchunk.ss 603 | 604 | (define (instr-op instr) 605 | (bitwise-and instr #xFF)) 606 | 607 | (define (instr-d-dest instr) 608 | (bitwise-and (arithmetic-shift instr -8) #xF)) 609 | 610 | (define (instr-dr-dest instr) 611 | (instr-d-dest instr)) 612 | (define (instr-dr-reg instr) 613 | (bitwise-and (arithmetic-shift instr -16) #xF)) 614 | 615 | (define (instr-di-dest instr) 616 | (instr-d-dest instr)) 617 | (define (instr-di-imm instr) 618 | (arithmetic-shift instr -16)) 619 | 620 | (define (instr-adr-dest instr) 621 | (instr-di-dest instr)) 622 | (define (instr-adr-imm instr) 623 | (arithmetic-shift instr -12)) 624 | 625 | (define (instr-drr-dest instr) 626 | (instr-d-dest instr)) 627 | (define (instr-drr-reg1 instr) 628 | (bitwise-and (arithmetic-shift instr -12) #xF)) 629 | (define (instr-drr-reg2 instr) 630 | (bitwise-and (arithmetic-shift instr -16) #xF)) 631 | 632 | (define (instr-dri-dest instr) 633 | (instr-d-dest instr)) 634 | (define (instr-dri-reg instr) 635 | (bitwise-and (arithmetic-shift instr -12) #xF)) 636 | (define (instr-dri-imm instr) 637 | (arithmetic-shift instr -16)) 638 | 639 | (define (instr-i-imm instr) 640 | (arithmetic-shift instr -8)) 641 | 642 | (define (instr-ii-low instr) 643 | (bitwise-and (arithmetic-shift instr -8) #xFF)) 644 | 645 | (define (instr-ii-high instr) 646 | (bitwise-and (arithmetic-shift instr -16))) 647 | 648 | #| 649 | In Chez Scheme, the rp-header and rp-compact-header structures are defined as follows: 650 | 651 | (define-primitive-structure-disps rp-header type-untyped 652 | ([uptr toplink] 653 | [uptr mv-return-address] 654 | [ptr livemask] 655 | [iptr frame-size])) 656 | 657 | (define-primitive-structure-disps rp-compact-header type-untyped 658 | ([uptr toplink] 659 | [iptr mask+size+mode])) 660 | 661 | Assuming that sizeof(uptr) = sizeof(ptr) = sizeof(iptr) = machine word size, 662 | We have sizeof(rp-header) as 4*(word size). Similarly, sizeof(rp-compact-header) = 2*(word size) 663 | |# 664 | 665 | ; Sizes for chez scheme rp headers. We need these to be able to separate 666 | ; the headers from the instruction stream 667 | (define size-rp-compact-header 2) 668 | (define size-rp-header 4) 669 | 670 | (define (format-label lbl-n) 671 | (format "l~a" lbl-n)) 672 | 673 | (struct label ([offset] [name]) #:transparent) 674 | (define (new-label offset n) 675 | (label offset (format-label n))) 676 | 677 | (define (read-instr bs i endian) 678 | (let ([instr-bytes (subbytes bs i (+ i pb-instruction-byte-size))]) 679 | (bytes->instr instr-bytes endian))) 680 | 681 | (define (sort-dedup-labels labels) 682 | (let* ([sorted (sort labels (lambda (a b) (< a b)))] [deduped (remove-duplicates sorted)]) 683 | (reverse (foldl (lambda (offset lst) (cons (new-label offset (length lst)) lst)) '() deduped)))) 684 | 685 | ; note: adapted from Chez Scheme pbchunk.ss 686 | ; helper to collect rp headers and labels (branch targets) from 687 | ; the instruction stream. 688 | (define (collect-headers-labels bs config len) 689 | (define word-size (native-word-size (pb-config-bits config))) 690 | (let loop ([i 0] [headers '()] [labels '()]) 691 | (cond 692 | [(fx= i len) (values '() (sort-dedup-labels labels))] 693 | 694 | [(and (pair? headers) 695 | ; if we hit the index of an rp header, skip over it 696 | (fx= i (caar headers))) 697 | (let ([size (cdar headers)]) 698 | (let ([i (+ i size)]) 699 | (let-values ([(rest-headers labels) (loop i (cdr headers) labels)]) 700 | (values (cons (car headers) rest-headers) labels))))] 701 | [else 702 | (let ([instr (read-instr bs i (pb-config-endian config))]) 703 | (define (next) 704 | (loop (fx+ i pb-instruction-byte-size) headers labels)) 705 | 706 | (define (next/add-label new-label) 707 | (loop (fx+ i pb-instruction-byte-size) headers (cons new-label labels))) 708 | 709 | (define (next/adr) 710 | (let ([delta (fx* pb-instruction-byte-size (instr-adr-imm instr))]) 711 | (cond 712 | [(> delta 0) 713 | (let* ([after (fx+ i pb-instruction-byte-size delta)] 714 | [size (if (fx= 1 715 | (fxand 1 716 | (bytes-ref 717 | bs 718 | (fx- after 719 | (if (eq? (pb-config-endian config) 'little) 720 | word-size 721 | 1))))) 722 | (* size-rp-compact-header word-size) 723 | (* size-rp-header word-size))] 724 | [start (fx- after size)] 725 | [header (cons start size)]) 726 | (loop (fx+ i pb-instruction-byte-size) 727 | ;; (mflatt) insert keeping headers sorted 728 | (let sort-loop ([headers headers]) 729 | (cond 730 | [(null? headers) (list header)] 731 | [(fx<= start (caar headers)) (cons header headers)] 732 | [else (cons (car headers) (sort-loop (cdr headers)))])) 733 | labels))] 734 | [else (next)]))) 735 | 736 | (define (next-branch) 737 | (let* ([delta (get-branch-target instr)] 738 | [target-label (fx+ i pb-instruction-byte-size delta)]) 739 | (next/add-label target-label))) 740 | 741 | (define (next/literal) 742 | (loop (fx+ i pb-instruction-byte-size word-size) headers labels)) 743 | 744 | (cond 745 | [(is-branch-imm? instr) (next-branch)] 746 | [(equal? (instr-op instr) pb-adr) (next/adr)] 747 | [(equal? (instr-op instr) pb-literal) (next/literal)] 748 | [else (next)]))]))) 749 | 750 | ; helper predicate to determine if an instruction is a branch with immediate. We are only interested 751 | ; in these for determining branch targets within the current function we are disassembling 752 | (define (is-branch-imm? instr) 753 | (if (in-range (instr-op instr) 754 | pb-b-group-start 755 | (+ pb-b-group-start 756 | (* (enum-field-count pb-branches) (enum-field-count pb-argument-types)))) 757 | (deconstruct-op (instr-op instr) 758 | pb-b-group-start 759 | [r/i pb-argument-types] 760 | [_ pb-branches] 761 | (equal? r/i pb-immediate)) 762 | #f)) 763 | 764 | (define (get-branch-target b-imm-instr) 765 | (s-ext (instr-i-imm b-imm-instr) 24)) 766 | 767 | (define (bytes->instr-little-endian instr-bytes) 768 | (bitwise-ior (bitwise-and (arithmetic-shift (bytes-ref instr-bytes 3) 24) #xFF000000) 769 | (bitwise-and (arithmetic-shift (bytes-ref instr-bytes 2) 16) #xFF0000) 770 | (bitwise-and (arithmetic-shift (bytes-ref instr-bytes 1) 8) #xFF00) 771 | (bytes-ref instr-bytes 0))) 772 | 773 | (define (bytes->instr-big-endian instr-bytes) 774 | (bitwise-ior (bitwise-and (arithmetic-shift (bytes-ref instr-bytes 0) 24) #xFF000000) 775 | (bitwise-and (arithmetic-shift (bytes-ref instr-bytes 1) 16) #xFF0000) 776 | (bitwise-and (arithmetic-shift (bytes-ref instr-bytes 2) 8) #xFF00) 777 | (bytes-ref instr-bytes 3))) 778 | 779 | (define (bytes->instr instr-bytes endian) 780 | (case endian 781 | [(little) (bytes->instr-little-endian instr-bytes)] 782 | [(big) (bytes->instr-big-endian instr-bytes)])) 783 | 784 | (define-syntax (in-range stx) 785 | (syntax-case stx () 786 | [(_ x a b) #'(and (<= a x) (< x b))])) 787 | 788 | ; this is useful as a default for any instructions that are not yet supported 789 | (define (pb-print-skeleton-instr instr) 790 | (format "(opcode: ~a ...)" (instr-op instr))) 791 | 792 | (define (literal-word-size sz) 793 | (cond 794 | [(equal? sz '32) 2] 795 | [(equal? sz '64) 3] 796 | [else (error 'pb-dissassemble "invalid word size ~a" sz)])) 797 | 798 | (define (native-word-size sz) 799 | (cond 800 | [(equal? sz '32) 4] 801 | [(equal? sz '64) 8] 802 | [else (error 'pb-dissassemble "invalid word size ~a" sz)])) 803 | 804 | (define (disassemble instr instr-idx labels config) 805 | (cond 806 | [(in-range (instr-op instr) 807 | pb-mov-16-group-start 808 | (+ pb-mov-16-group-start (* (enum-field-count pb-zk) (enum-field-count pb-shift)))) 809 | (decode/pb-mov16 instr)] 810 | [(in-range (instr-op instr) 811 | pb-mov-group-start 812 | (+ pb-mov-group-start (enum-field-count pb-mov-types))) 813 | (decode/pb-mov instr)] 814 | [(in-range (instr-op instr) 815 | pb-binop-group-start 816 | (+ pb-binop-group-start 817 | (* (enum-field-count pb-argument-types) 818 | (enum-field-count pb-binaries) 819 | (enum-field-count pb-signal-types)))) 820 | (decode/pb-binop instr)] 821 | [(in-range (instr-op instr) 822 | pb-cmp-group-start 823 | (+ pb-cmp-group-start 824 | (* (enum-field-count pb-argument-types) (enum-field-count pb-cmp-ops)))) 825 | (decode/pb-cmp instr)] 826 | [(in-range (instr-op instr) 827 | pb-fp-binop-group-start 828 | (+ pb-fp-binop-group-start 829 | (* (enum-field-count pb-argument-types) (enum-field-count pb-binaries)))) 830 | (decode/pb-fp-binop instr)] 831 | [(in-range (instr-op instr) 832 | pb-fp-cmp-op-group-start 833 | (+ pb-fp-cmp-op-group-start 834 | (* (enum-field-count pb-cmp-ops) (enum-field-count pb-argument-types)))) 835 | (decode/pb-fp-cmp-op instr)] 836 | [(in-range (instr-op instr) 837 | pb-unop-group-start 838 | (+ pb-unop-group-start 839 | (* (enum-field-count pb-argument-types) (enum-field-count pb-unaries)))) 840 | (decode/pb-unop instr)] 841 | [(in-range (instr-op instr) 842 | pb-fp-unop-group-start 843 | (+ pb-fp-unop-group-start 844 | (* (enum-field-count pb-argument-types) (enum-field-count pb-unaries)))) 845 | (decode/pb-fp-unop instr)] 846 | [(in-range (instr-op instr) 847 | pb-ld-group-start 848 | (+ pb-ld-group-start 849 | (* (enum-field-count pb-sizes) (enum-field-count pb-argument-types)))) 850 | (decode/pb-ld-op instr)] 851 | [(in-range (instr-op instr) 852 | pb-st-group-start 853 | (+ pb-st-group-start 854 | (* (enum-field-count pb-sizes) (enum-field-count pb-argument-types)))) 855 | (decode/pb-st-op instr)] 856 | [(in-range (instr-op instr) 857 | pb-b-group-start 858 | (+ pb-b-group-start 859 | (* (enum-field-count pb-branches) (enum-field-count pb-argument-types)))) 860 | (decode/pb-b-op instr instr-idx labels)] 861 | [(in-range (instr-op instr) 862 | pb-b*-group-start 863 | (+ pb-b*-group-start (enum-field-count pb-argument-types))) 864 | (decode/pb-b*-op instr)] 865 | [(equal? (instr-op instr) pb-nop) decode/pb-nop] 866 | [(equal? (instr-op instr) pb-literal) 867 | (decode/pb-literal-op instr (literal-word-size (pb-config-bits config)))] 868 | [(equal? (instr-op instr) pb-return) (decode/pb-return-op instr)] 869 | [(equal? (instr-op instr) pb-adr) (decode/pb-adr-op instr)] 870 | [(equal? (instr-op instr) pb-interp) (decode/pb-interp-op instr)] 871 | [(equal? (instr-op instr) pb-chunk) 872 | (error 'pb-disassemble "cannot continue disassembly in presence of pb-chunk")] 873 | [else (pb-print-skeleton-instr instr)])) 874 | 875 | ; A given pb machine has a word size (32 or 64 bit), an endianness, and may or may not 876 | ; have threading support 877 | (struct pb-config ([bits] [endian] [threaded?]) #:transparent) 878 | 879 | (define (format-relocation name) 880 | (format "(relocation ~s)" name)) 881 | 882 | (define (pad s len [p 32]) 883 | (if (>= (string-length s) len) 884 | s 885 | ; front pad with p 886 | (string-append (build-string (- len (string-length s)) (lambda (_) (integer->char p))) s))) 887 | 888 | (define (pad-index i) 889 | (pad (format "~a" i) 6)) 890 | 891 | (define (pad-instr instr) 892 | (pad (format "~x" instr) 8 48)) 893 | 894 | (define (disassemble-data bs config i len relocs) 895 | (let loop ([i i] [end (+ i len)] [rs relocs]) 896 | (if (< i end) 897 | (let* ([instr (read-instr bs i (pb-config-endian config))] 898 | [is-reloc? (and (pair? rs) (equal? i (+ (cdr (first rs)))))] 899 | [reloc-disp (if is-reloc? (format "~a" (format-relocation (car (first rs)))) "")]) 900 | (display (format "~a:\t~a\t(data)\t\t~a\n" (pad-index i) (pad-instr instr) reloc-disp)) 901 | (loop (+ i pb-instruction-byte-size) end (if is-reloc? (cdr rs) rs))) 902 | rs))) 903 | 904 | (define (disassemble-loop bs config relocs) 905 | (define word-size (native-word-size (pb-config-bits config))) 906 | (let-values ([(rp-headers labels) (collect-headers-labels bs config (bytes-length bs))]) 907 | (let* ([labels-vec (list->vector labels)] [instr-length (pb-count-instrs bs)]) 908 | (let loop ([i 0] [remaining-labels labels] [rs (reverse relocs)] [rps rp-headers]) 909 | (cond 910 | [(equal? i (bytes-length bs)) (void)] 911 | [(<= (+ i pb-instruction-byte-size) (bytes-length bs)) 912 | (let* ([instr (read-instr bs i (pb-config-endian config))] 913 | [idx (quotient i pb-instruction-byte-size)] 914 | [is-label? (and (pair? remaining-labels) 915 | (equal? i (label-offset (car remaining-labels))))] 916 | ; we want to display relocations at an offset of {} away from 917 | ; their listed address 918 | [is-reloc? (and (pair? rs) (equal? i (+ (cdr (first rs)))))] 919 | [is-rp-header? (and (pair? rps) (equal? i (caar rps)))]) 920 | 921 | (when (and is-label? is-rp-header?) 922 | (error 'pb-disassemble "rp-header should not be branch target")) 923 | 924 | (when is-label? 925 | (when is-rp-header? 926 | (error 'pb-disassemble "rp-header should not be branch target")) 927 | (display (format "\n.~a:\n" (label-name (car remaining-labels))))) 928 | 929 | ; rp headers are placeholders which Chez Scheme inserts into 930 | ; the instruction stream. They are non-instruction data and must 931 | ; be treated as such 932 | (if is-rp-header? 933 | (let ([rp-skip (- (cdar rps) pb-instruction-byte-size)]) 934 | (display (format "~a:\t~a\trp-header\n" (pad-index i) (pad-instr instr))) 935 | (define remaining-rs 936 | (disassemble-data bs config (+ i pb-instruction-byte-size) rp-skip rs)) 937 | 938 | (loop (+ i pb-instruction-byte-size rp-skip) 939 | remaining-labels 940 | remaining-rs 941 | (cdr rps))) 942 | ;; otherwise, disassemble instruction as normal 943 | (begin 944 | (display (format "~a:\t~a\t~a\n" 945 | (pad-index i) 946 | (pad-instr instr) 947 | (disassemble instr idx labels-vec config))) 948 | 949 | ; literal instructions contain an extra 950 | ; data word after the instruction which 951 | ; must be skipped over 952 | (let ([literal-skip (if (equal? (instr-op instr) pb-literal) word-size 0)]) 953 | (define remaining-rs 954 | (if (> literal-skip 0) 955 | (disassemble-data bs config (+ i pb-instruction-byte-size) literal-skip rs) 956 | rs)) 957 | 958 | (loop (+ i pb-instruction-byte-size literal-skip) 959 | (if is-label? (cdr remaining-labels) remaining-labels) 960 | remaining-rs 961 | rps)))))]))))) 962 | 963 | (define (pb-count-instrs bs) 964 | (unless (equal? (remainder (bytes-length bs) pb-instruction-byte-size) 0) 965 | (error 'pb-disassemble "bad instruction format")) 966 | (quotient (bytes-length bs) pb-instruction-byte-size)) 967 | 968 | (define (pb-disassemble bs config relocations) 969 | (unless (bytes? bs) 970 | (error 'pb-disassemble "unexpected input")) 971 | (let-values ([(headers labels) (collect-headers-labels bs config (bytes-length bs))]) 972 | (disassemble-loop bs config relocations))) 973 | -------------------------------------------------------------------------------- /disassemble/test.rkt: -------------------------------------------------------------------------------- 1 | #lang racket 2 | (require racket/unsafe/ops "main.rkt") 3 | 4 | (define const-string "a constant string") 5 | 6 | (define (uses-const-string) 7 | (display const-string)) 8 | 9 | (void (decompile uses-const-string)) 10 | 11 | (define (f x) 12 | (and x 13 | (for/fold () ([i (in-range 100)]) 14 | (values)))) 15 | 16 | (define (g k) 17 | (if (eq? k 'x) #t (eq? k 'y))) 18 | 19 | 20 | (define z 100) 21 | (define f2 (let ([cnt 0]) (lambda (x y z) (set! cnt (random 100)) (* 3 x cnt z)))) 22 | 23 | 24 | (void (decompile f2)) 25 | (define x (case-lambda [(x) 1] [(x y) (list x y)])) 26 | (define (y [x 1] [y 2] [z 3] [w 4] [a 5] [b 6]) 1) 27 | 28 | (define (id x) 29 | (for/fold ([z 0.0]) ([i (in-range 100)]) 30 | (unsafe-fl+ x z))) 31 | 32 | (void (decompile id)) 33 | -------------------------------------------------------------------------------- /disassemble/test0.rkt: -------------------------------------------------------------------------------- 1 | #lang racket 2 | 3 | (require "main.rkt") 4 | 5 | (define (iter f n i) 6 | (cond 7 | [(= n 0) i] 8 | [else (f (iter f (sub1 n) i))])) 9 | 10 | (define (fact n) 11 | (if (<= n 0) 1 12 | (* n (fact (- n 1))))) 13 | 14 | (define (func x) 15 | (define tmp 3) 16 | (if (= x 0) 17 | (set! tmp 0) 18 | (set! tmp 1)) 19 | tmp) 20 | 21 | (decompile fact) 22 | (decompile func) 23 | ;(decompile iter) 24 | -------------------------------------------------------------------------------- /disassemble/test2.rkt: -------------------------------------------------------------------------------- 1 | #lang racket 2 | (require disassemble racket/unsafe/ops) 3 | (define (f x) (+ x 7)) 4 | 5 | (define (g x) (unsafe-fx+ x 7)) 6 | 7 | 8 | (define (h x) 9 | (for/sum ([i (in-range x)]) i)) 10 | 11 | (module+ test 12 | (decompile f) 13 | (decompile g) 14 | (decompile h)) 15 | 16 | -------------------------------------------------------------------------------- /disassemble/test_fp.rkt: -------------------------------------------------------------------------------- 1 | #lang racket 2 | 3 | (require racket/flonum "main.rkt") 4 | 5 | (define (add-fp x y) 6 | (fl+ x y)) 7 | 8 | (define (cmp-fp x y) 9 | (let ([a (fl+ x y)] 10 | [b (fl- x y)]) 11 | (fl= a b))) 12 | 13 | (define (hypot a b c) 14 | (flsqrt (fl+ (fl* a a) (fl* b b) (fl* c c)))) 15 | 16 | (void (decompile add-fp)) 17 | (newline) 18 | (void (decompile cmp-fp)) 19 | (newline) 20 | (void (decompile hypot)) 21 | -------------------------------------------------------------------------------- /disassemble/vm.rkt: -------------------------------------------------------------------------------- 1 | #lang racket/base 2 | (require racket/linklet) 3 | 4 | ;; Same functions as `ffi/unsafe/vm`, but avoiding (for now) a dependency 5 | ;; on the latest Racket 6 | 7 | (provide vm-eval 8 | vm-primitive) 9 | 10 | (define-values (vm-eval vm-primitive) 11 | (case (system-type 'vm) 12 | [(chez-scheme) 13 | (define-values (raw-eval call-with-system-wind) 14 | (instantiate-linklet 15 | (compile-linklet '(linklet () () (values eval call-with-system-wind))) 16 | '() 17 | (make-instance 'top-level))) 18 | (define (eval s) 19 | (call-with-system-wind 20 | (lambda () 21 | (raw-eval s)))) 22 | (values eval eval)] 23 | [else 24 | (values #f #f)])) 25 | -------------------------------------------------------------------------------- /info.rkt: -------------------------------------------------------------------------------- 1 | #lang info 2 | 3 | (define collection 'multi) 4 | (define version "1.0") 5 | (define deps '("base" 6 | "r6rs-lib" 7 | "srfi-lib" 8 | "srfi-lite-lib")) 9 | (define license 'MIT) 10 | -------------------------------------------------------------------------------- /machine-code/disassembler.rkt: -------------------------------------------------------------------------------- 1 | ;; -*- mode: scheme; coding: utf-8 -*- 2 | ;; Copyright © 2016, 2017, 2018 Göran Weinholt 3 | ;; SPDX-License-Identifier: MIT 4 | #!r6rs 5 | 6 | ;; Generic disassembler support. 7 | 8 | (library (machine-code disassembler) 9 | (export 10 | invalid-opcode? 11 | available-disassemblers get-disassembler 12 | disassembler? disassembler-name 13 | disassembler-min-instruction-size 14 | disassembler-max-instruction-size 15 | disassembler-instruction-getter) 16 | (import 17 | (rnrs (6)) 18 | (rename (machine-code disassembler private) 19 | (available-disassemblers p:available-disassemblers) 20 | (get-disassembler p:get-disassembler)) 21 | #;(prefix (machine-code disassembler arm-a32) arm-a32:) 22 | (prefix (machine-code disassembler arm-a64) arm-a64:) 23 | #;(prefix (machine-code disassembler i8080) i8080:) 24 | #;(prefix (machine-code disassembler m68hc12) m68hc12:) 25 | #;(prefix (machine-code disassembler mips) mips:) 26 | (prefix (machine-code disassembler x86) x86:)) 27 | 28 | (define register-all-disassemblers 29 | (let ((done #f)) 30 | (lambda () 31 | (unless done 32 | ;; visit libraries 33 | (set! done (list x86:get-instruction 34 | #;mips:get-instruction 35 | #;m68hc12:get-instruction 36 | #;i8080:get-instruction 37 | #;arm-a32:get-instruction 38 | arm-a64:get-instruction)))))) 39 | 40 | (define (available-disassemblers) 41 | (register-all-disassemblers) 42 | (p:available-disassemblers)) 43 | 44 | (define (get-disassembler name) 45 | (register-all-disassemblers) 46 | (or (p:get-disassembler name) 47 | (error 'get-disassembler "This disassembler has not been registered" name)))) 48 | -------------------------------------------------------------------------------- /machine-code/disassembler/arm-aarch64.sls: -------------------------------------------------------------------------------- 1 | ;; -*- mode: scheme; coding: utf-8 -*- 2 | ;; Copyright © 2017, 2018 Göran Weinholt 3 | ;; SPDX-License-Identifier: MIT 4 | #!r6rs 5 | 6 | ;; AArch64 definitions. 7 | 8 | (library (machine-code disassembler arm-aarch64) 9 | (export 10 | system-registers) 11 | (import 12 | (rnrs (6))) 13 | 14 | (define system-registers 15 | '( 16 | ;;; Debug system registers 17 | ( OSDTRRX_EL1 2 0 0 0 2) 18 | ( MDCCINT_EL1 2 0 0 2 0) 19 | ( MDSCR_EL1 2 0 0 2 2) 20 | ( OSDTRTX_EL1 2 0 0 3 2) 21 | ( OSECCR_EL1 2 0 0 6 2) 22 | ( DBGBVR0_EL1 2 0 0 0 4) 23 | ( DBGBVR1_EL1 2 0 0 1 4) 24 | ( DBGBVR2_EL1 2 0 0 2 4) 25 | ( DBGBVR3_EL1 2 0 0 3 4) 26 | ( DBGBVR4_EL1 2 0 0 4 4) 27 | ( DBGBVR5_EL1 2 0 0 5 4) 28 | ( DBGBVR6_EL1 2 0 0 6 4) 29 | ( DBGBVR7_EL1 2 0 0 7 4) 30 | ( DBGBCR0_EL1 2 0 0 0 5) 31 | ( DBGBCR1_EL1 2 0 0 1 5) 32 | ( DBGBCR2_EL1 2 0 0 2 5) 33 | ( DBGBCR3_EL1 2 0 0 3 5) 34 | ( DBGBCR4_EL1 2 0 0 4 5) 35 | ( DBGBCR5_EL1 2 0 0 5 5) 36 | ( DBGBCR6_EL1 2 0 0 6 5) 37 | ( DBGBCR7_EL1 2 0 0 7 5) 38 | ( DBGWVR0_EL1 2 0 0 0 6) 39 | ( DBGWVR1_EL1 2 0 0 1 6) 40 | ( DBGWVR2_EL1 2 0 0 2 6) 41 | ( DBGWVR3_EL1 2 0 0 3 6) 42 | ( DBGWVR4_EL1 2 0 0 4 6) 43 | ( DBGWVR5_EL1 2 0 0 5 6) 44 | ( DBGWVR6_EL1 2 0 0 6 6) 45 | ( DBGWVR7_EL1 2 0 0 7 6) 46 | ( DBGWCR0_EL1 2 0 0 0 7) 47 | ( DBGWCR1_EL1 2 0 0 1 7) 48 | ( DBGWCR2_EL1 2 0 0 2 7) 49 | ( DBGWCR3_EL1 2 0 0 3 7) 50 | ( DBGWCR4_EL1 2 0 0 4 7) 51 | ( DBGWCR5_EL1 2 0 0 5 7) 52 | ( DBGWCR6_EL1 2 0 0 6 7) 53 | ( DBGWCR7_EL1 2 0 0 7 7) 54 | ( MDRAR_EL1 2 0 1 0 0) 55 | ( OSLAR_EL1 2 0 1 0 4) 56 | ( OSLSR_EL1 2 0 1 1 4) 57 | ( OSDLR_EL1 2 0 1 3 4) 58 | ( DBGPRCR_EL1 2 0 1 4 4) 59 | ( DBGCLAIMSET_EL1 2 0 7 8 6) 60 | ( DBGCLAIMCLR_EL1 2 0 7 9 6) 61 | (DBGAUTHSTATUS_EL1 2 0 7 14 6) 62 | ( MDCCSR_EL0 2 3 0 1 0) 63 | ( DBGDTR_EL0 2 3 0 4 0) 64 | ( DBGDTRRX_EL0 2 3 0 5 0) 65 | ( DBGDTRTX_EL0 2 3 0 5 0) 66 | ( DBGVCR32_EL2 2 4 0 7 0) 67 | ;;; Non-debug system registers 68 | ( MIDR_EL1 3 0 0 0 0) 69 | ( MPIDR_EL1 3 0 0 0 5) 70 | ( REVIDR_EL1 3 0 0 0 6) 71 | ( ID_PFR0_EL1 3 0 0 1 0) 72 | ( ID_PFR1_EL1 3 0 0 1 1) 73 | ( ID_DFR0_EL1 3 0 0 1 2) 74 | ( ID_AFR0_EL1 3 0 0 1 3) 75 | ( ID_MMFR0_EL1 3 0 0 1 4) 76 | ( ID_MMFR1_EL1 3 0 0 1 5) 77 | ( ID_MMFR2_EL1 3 0 0 1 6) 78 | ( ID_MMFR3_EL1 3 0 0 1 7) 79 | ( ID_ISAR0_EL1 3 0 0 2 0) 80 | ( ID_ISAR1_EL1 3 0 0 2 1) 81 | ( ID_ISAR2_EL1 3 0 0 2 2) 82 | ( ID_ISAR3_EL1 3 0 0 2 3) 83 | ( ID_ISAR4_EL1 3 0 0 2 4) 84 | ( ID_ISAR5_EL1 3 0 0 2 5) 85 | ( ID_MMFR4_EL1 3 0 0 2 6) 86 | ( MVFR0_EL1 3 0 0 3 0) 87 | ( MVFR1_EL1 3 0 0 3 1) 88 | ( MVFR2_EL1 3 0 0 3 2) 89 | ( ID_AA64PFR0_EL1 3 0 0 4 0) 90 | ( ID_AA64PFR1_EL1 3 0 0 4 1) 91 | ( ID_AA64DFR0_EL1 3 0 0 5 0) 92 | ( ID_AA64DFR1_EL1 3 0 0 5 1) 93 | ( ID_AA64AFR0_EL1 3 0 0 5 4) 94 | ( ID_AA64AFR1_EL1 3 0 0 5 5) 95 | ( ID_AA64ISAR0_EL1 3 0 0 6 0) 96 | ( ID_AA64ISAR1_EL1 3 0 0 6 1) 97 | ( ID_AA64MMFR0_EL1 3 0 0 7 0) 98 | ( ID_AA64MMFR1_EL1 3 0 0 7 1) 99 | ( SCTLR_EL1 3 0 1 0 0) 100 | ( ACTLR_EL1 3 0 1 0 1) 101 | ( CPACR_EL1 3 0 1 0 2) 102 | ( TTBR0_EL1 3 0 2 0 0) 103 | ( TTBR1_EL1 3 0 2 0 1) 104 | ( TCR_EL1 3 0 2 0 2) 105 | ( ICC_PMR_EL1 3 0 4 6 0) 106 | ( ICV_PMR_EL1 3 0 4 6 0) 107 | ( AFSR0_EL1 3 0 5 1 0) 108 | ( AFSR1_EL1 3 0 5 1 1) 109 | ( ESR_EL1 3 0 5 2 0) 110 | ( FAR_EL1 3 0 6 0 0) 111 | ( PAR_EL1 3 0 7 4 0) 112 | ( PMINTENSET_EL1 3 0 9 14 1) 113 | ( PMINTENCLR_EL1 3 0 9 14 2) 114 | ( MAIR_EL1 3 0 10 2 0) 115 | ( AMAIR_EL1 3 0 10 3 0) 116 | ( VBAR_EL1 3 0 12 0 0) 117 | ( RVBAR_EL1 3 0 12 0 1) 118 | ( RMR_EL1 3 0 12 0 2) 119 | ( ISR_EL1 3 0 12 1 0) 120 | ( ICC_IAR0_EL1 3 0 12 8 0) 121 | ( ICV_IAR0_EL1 3 0 12 8 0) 122 | ( ICC_EOIR0_EL1 3 0 12 8 1) 123 | ( ICV_EOIR0_EL1 3 0 12 8 1) 124 | ( ICC_HPPIR0_EL1 3 0 12 8 2) 125 | ( ICV_HPPIR0_EL1 3 0 12 8 2) 126 | ( ICC_BPR0_EL1 3 0 12 8 3) 127 | ( ICV_BPR0_EL1 3 0 12 8 3) 128 | ( ICC_AP0R0_EL1 3 0 12 8 4) 129 | ( ICC_AP0R1_EL1 3 0 12 8 5) 130 | ( ICC_AP0R2_EL1 3 0 12 8 6) 131 | ( ICC_AP0R3_EL1 3 0 12 8 7) 132 | ( ICV_AP0R0_EL1 3 0 12 8 4) 133 | ( ICV_AP0R1_EL1 3 0 12 8 5) 134 | ( ICV_AP0R2_EL1 3 0 12 8 6) 135 | ( ICV_AP0R3_EL1 3 0 12 8 7) 136 | ( ICC_AP1R0_EL1 3 0 12 9 0) 137 | ( ICC_AP1R1_EL1 3 0 12 9 1) 138 | ( ICC_AP1R2_EL1 3 0 12 9 2) 139 | ( ICC_AP1R3_EL1 3 0 12 9 3) 140 | ( ICV_AP1R0_EL1 3 0 12 9 0) 141 | ( ICV_AP1R1_EL1 3 0 12 9 1) 142 | ( ICV_AP1R2_EL1 3 0 12 9 2) 143 | ( ICV_AP1R3_EL1 3 0 12 9 3) 144 | ( ICC_DIR_EL1 3 0 12 11 1) 145 | ( ICV_DIR_EL1 3 0 12 11 1) 146 | ( ICC_RPR_EL1 3 0 12 11 3) 147 | ( ICV_RPR_EL1 3 0 12 11 3) 148 | ( ICC_SGI1R_EL1 3 0 12 11 5) 149 | ( ICC_ASGI1R_EL1 3 0 12 11 6) 150 | ( ICC_SGI0R_EL1 3 0 12 11 7) 151 | ( ICC_IAR1_EL1 3 0 12 12 0) 152 | ( ICV_IAR1_EL1 3 0 12 12 0) 153 | ( ICC_EOIR1_EL1 3 0 12 12 1) 154 | ( ICV_EOIR1_EL1 3 0 12 12 1) 155 | ( ICC_HPPIR1_EL1 3 0 12 12 2) 156 | ( ICV_HPPIR1_EL1 3 0 12 12 2) 157 | ( ICC_BPR1_EL1 3 0 12 12 3) 158 | ( ICV_BPR1_EL1 3 0 12 12 3) 159 | ( ICC_CTLR_EL1 3 0 12 12 4) 160 | ( ICV_CTLR_EL1 3 0 12 12 4) 161 | ( ICC_SRE_EL1 3 0 12 12 5) 162 | ( ICC_IGRPEN0_EL1 3 0 12 12 6) 163 | ( ICC_IGRPEN1_EL1 3 0 12 12 7) 164 | ( ICC_IGRPEN0_EL1 3 0 12 12 6) 165 | ( ICV_IGRPEN0_EL1 3 0 12 12 6) 166 | ( ICC_IGRPEN1_EL1 3 0 12 12 7) 167 | ( ICV_IGRPEN1_EL1 3 0 12 12 7) 168 | ( CONTEXTIDR_EL1 3 0 13 0 1) 169 | ( TPIDR_EL1 3 0 13 0 4) 170 | ( CNTKCTL_EL1 3 0 14 1 0) 171 | ( CCSIDR_EL1 3 1 0 0 0) 172 | ( CLIDR_EL1 3 1 0 0 1) 173 | ( AIDR_EL1 3 1 0 0 7) 174 | ( CSSELR_EL1 3 2 0 0 0) 175 | ( CTR_EL0 3 3 0 0 1) 176 | ( DCZID_EL0 3 3 0 0 7) 177 | ( PMCR_EL0 3 3 9 12 0) 178 | ( PMCNTENSET_EL0 3 3 9 12 1) 179 | ( PMCNTENCLR_EL0 3 3 9 12 2) 180 | ( PMOVSCLR_EL0 3 3 9 12 3) 181 | ( PMSWINC_EL0 3 3 9 12 4) 182 | ( PMSELR_EL0 3 3 9 12 5) 183 | ( PMCEID0_EL0 3 3 9 12 6) 184 | ( PMCEID1_EL0 3 3 9 12 7) 185 | ( PMCCNTR_EL0 3 3 9 13 0) 186 | ( PMXEVTYPER_EL0 3 3 9 13 1) 187 | ( PMXEVCNTR_EL0 3 3 9 13 2) 188 | ( PMUSERENR_EL0 3 3 9 14 0) 189 | ( PMOVSSET_EL0 3 3 9 14 3) 190 | ( TPIDR_EL0 3 3 13 0 2) 191 | ( TPIDRRO_EL0 3 3 13 0 3) 192 | ( CNTFRQ_EL0 3 3 14 0 0) 193 | ( CNTPCT_EL0 3 3 14 0 1) 194 | ( CNTVCT_EL0 3 3 14 0 2) 195 | ( CNTP_TVAL_EL0 3 3 14 2 0) 196 | ( CNTP_CTL_EL0 3 3 14 2 1) 197 | ( CNTP_CVAL_EL0 3 3 14 2 2) 198 | ( CNTV_TVAL_EL0 3 3 14 3 0) 199 | ( CNTV_CTL_EL0 3 3 14 3 1) 200 | ( CNTV_CVAL_EL0 3 3 14 3 2) 201 | ( PMEVCNTR0_EL0 3 3 14 8 0) 202 | ( PMEVCNTR1_EL0 3 3 14 8 1) 203 | ( PMEVCNTR2_EL0 3 3 14 8 2) 204 | ( PMEVCNTR3_EL0 3 3 14 8 3) 205 | ( PMEVCNTR4_EL0 3 3 14 8 4) 206 | ( PMEVCNTR5_EL0 3 3 14 8 5) 207 | ( PMEVCNTR6_EL0 3 3 14 8 6) 208 | ( PMEVCNTR7_EL0 3 3 14 8 7) 209 | ( PMEVCNTR8_EL0 3 3 14 9 0) 210 | ( PMEVCNTR9_EL0 3 3 14 9 1) 211 | ( PMEVCNTR10_EL0 3 3 14 9 2) 212 | ( PMEVCNTR11_EL0 3 3 14 9 3) 213 | ( PMEVCNTR12_EL0 3 3 14 9 4) 214 | ( PMEVCNTR13_EL0 3 3 14 9 5) 215 | ( PMEVCNTR14_EL0 3 3 14 9 6) 216 | ( PMEVCNTR15_EL0 3 3 14 9 7) 217 | ( PMEVCNTR16_EL0 3 3 14 10 0) 218 | ( PMEVCNTR17_EL0 3 3 14 10 1) 219 | ( PMEVCNTR18_EL0 3 3 14 10 2) 220 | ( PMEVCNTR19_EL0 3 3 14 10 3) 221 | ( PMEVCNTR20_EL0 3 3 14 10 4) 222 | ( PMEVCNTR21_EL0 3 3 14 10 5) 223 | ( PMEVCNTR22_EL0 3 3 14 10 6) 224 | ( PMEVCNTR23_EL0 3 3 14 10 7) 225 | ( PMEVCNTR24_EL0 3 3 14 11 0) 226 | ( PMEVCNTR25_EL0 3 3 14 11 1) 227 | ( PMEVCNTR26_EL0 3 3 14 11 2) 228 | ( PMEVCNTR27_EL0 3 3 14 11 3) 229 | ( PMEVCNTR28_EL0 3 3 14 11 4) 230 | ( PMEVCNTR29_EL0 3 3 14 11 5) 231 | ( PMEVCNTR30_EL0 3 3 14 11 6) 232 | ( PMEVTYPER0_EL0 3 3 14 12 0) 233 | ( PMEVTYPER1_EL0 3 3 14 12 1) 234 | ( PMEVTYPER2_EL0 3 3 14 12 2) 235 | ( PMEVTYPER3_EL0 3 3 14 12 3) 236 | ( PMEVTYPER4_EL0 3 3 14 12 4) 237 | ( PMEVTYPER5_EL0 3 3 14 12 5) 238 | ( PMEVTYPER6_EL0 3 3 14 12 6) 239 | ( PMEVTYPER7_EL0 3 3 14 12 7) 240 | ( PMEVTYPER8_EL0 3 3 14 13 0) 241 | ( PMEVTYPER9_EL0 3 3 14 13 1) 242 | ( PMEVTYPER10_EL0 3 3 14 13 2) 243 | ( PMEVTYPER11_EL0 3 3 14 13 3) 244 | ( PMEVTYPER12_EL0 3 3 14 13 4) 245 | ( PMEVTYPER13_EL0 3 3 14 13 5) 246 | ( PMEVTYPER14_EL0 3 3 14 13 6) 247 | ( PMEVTYPER15_EL0 3 3 14 13 7) 248 | ( PMEVTYPER16_EL0 3 3 14 14 0) 249 | ( PMEVTYPER17_EL0 3 3 14 14 1) 250 | ( PMEVTYPER18_EL0 3 3 14 14 2) 251 | ( PMEVTYPER19_EL0 3 3 14 14 3) 252 | ( PMEVTYPER20_EL0 3 3 14 14 4) 253 | ( PMEVTYPER21_EL0 3 3 14 14 5) 254 | ( PMEVTYPER22_EL0 3 3 14 14 6) 255 | ( PMEVTYPER23_EL0 3 3 14 14 7) 256 | ( PMEVTYPER24_EL0 3 3 14 15 0) 257 | ( PMEVTYPER25_EL0 3 3 14 15 1) 258 | ( PMEVTYPER26_EL0 3 3 14 15 2) 259 | ( PMEVTYPER27_EL0 3 3 14 15 3) 260 | ( PMEVTYPER28_EL0 3 3 14 15 4) 261 | ( PMEVTYPER29_EL0 3 3 14 15 5) 262 | ( PMEVTYPER30_EL0 3 3 14 15 6) 263 | ( PMCCFILTR_EL0 3 3 14 15 7) 264 | ( VPIDR_EL2 3 4 0 0 0) 265 | ( VMPIDR_EL2 3 4 0 0 5) 266 | ( SCTLR_EL2 3 4 1 0 0) 267 | ( ACTLR_EL2 3 4 1 0 1) 268 | ( HCR_EL2 3 4 1 1 0) 269 | ( MDCR_EL2 3 4 1 1 1) 270 | ( CPTR_EL2 3 4 1 1 2) 271 | ( HSTR_EL2 3 4 1 1 3) 272 | ( HACR_EL2 3 4 1 1 7) 273 | ( TTBR0_EL2 3 4 2 0 0) 274 | ( TCR_EL2 3 4 2 0 2) 275 | ( VTTBR_EL2 3 4 2 1 0) 276 | ( VTCR_EL2 3 4 2 1 2) 277 | ( DACR32_EL2 3 4 3 0 0) 278 | ( IFSR32_EL2 3 4 5 0 1) 279 | ( AFSR0_EL2 3 4 5 1 0) 280 | ( AFSR1_EL2 3 4 5 1 1) 281 | ( ESR_EL2 3 4 5 2 0) 282 | ( FPEXC32_EL2 3 4 5 3 0) 283 | ( FAR_EL2 3 4 6 0 0) 284 | ( HPFAR_EL2 3 4 6 0 4) 285 | ( MAIR_EL2 3 4 10 2 0) 286 | ( AMAIR_EL2 3 4 10 3 0) 287 | ( VBAR_EL2 3 4 12 0 0) 288 | ( RVBAR_EL2 3 4 12 0 1) 289 | ( RMR_EL2 3 4 12 0 2) 290 | ( ICH_AP0R0_EL2 3 4 12 8 0) 291 | ( ICH_AP0R1_EL2 3 4 12 8 1) 292 | ( ICH_AP0R2_EL2 3 4 12 8 2) 293 | ( ICH_AP0R3_EL2 3 4 12 8 3) 294 | ( ICH_AP1R0_EL2 3 4 12 9 0) 295 | ( ICH_AP1R1_EL2 3 4 12 9 1) 296 | ( ICH_AP1R2_EL2 3 4 12 9 2) 297 | ( ICH_AP1R3_EL2 3 4 12 9 3) 298 | ( ICC_SRE_EL2 3 4 12 9 5) 299 | ( ICH_HCR_EL2 3 4 12 11 0) 300 | ( ICH_VTR_EL2 3 4 12 11 1) 301 | ( ICH_MISR_EL2 3 4 12 11 2) 302 | ( ICH_EISR_EL2 3 4 12 11 3) 303 | ( ICH_ELRSR_EL2 3 4 12 11 5) 304 | ( ICH_VMCR_EL2 3 4 12 11 7) 305 | ( ICH_LR0_EL2 3 4 12 12 0) 306 | ( ICH_LR1_EL2 3 4 12 12 1) 307 | ( ICH_LR2_EL2 3 4 12 12 2) 308 | ( ICH_LR3_EL2 3 4 12 12 3) 309 | ( ICH_LR4_EL2 3 4 12 12 4) 310 | ( ICH_LR5_EL2 3 4 12 12 5) 311 | ( ICH_LR6_EL2 3 4 12 12 6) 312 | ( ICH_LR7_EL2 3 4 12 12 7) 313 | ( ICH_LR8_EL2 3 4 12 13 8) 314 | ( ICH_LR9_EL2 3 4 12 13 9) 315 | ( ICH_LR10_EL2 3 4 12 13 10) 316 | ( ICH_LR11_EL2 3 4 12 13 11) 317 | ( ICH_LR12_EL2 3 4 12 13 12) 318 | ( ICH_LR13_EL2 3 4 12 13 13) 319 | ( ICH_LR14_EL2 3 4 12 13 14) 320 | ( ICH_LR15_EL2 3 4 12 13 15) 321 | ( TPIDR_EL2 3 4 13 0 2) 322 | ( CNTVOFF_EL2 3 4 14 0 3) 323 | ( CNTHCTL_EL2 3 4 14 1 0) 324 | ( CNTHP_TVAL_EL2 3 4 14 2 0) 325 | ( CNTHP_CTL_EL2 3 4 14 2 1) 326 | ( CNTHP_CVAL_EL2 3 4 14 2 2) 327 | ( SCTLR_EL3 3 6 1 0 0) 328 | ( ACTLR_EL3 3 6 1 0 1) 329 | ( SCR_EL3 3 6 1 1 0) 330 | ( SDER32_EL3 3 6 1 1 1) 331 | ( CPTR_EL3 3 6 1 1 2) 332 | ( MDCR_EL3 3 6 1 3 1) 333 | ( TTBR0_EL3 3 6 2 0 0) 334 | ( TCR_EL3 3 6 2 0 2) 335 | ( AFSR0_EL3 3 6 5 1 0) 336 | ( AFSR1_EL3 3 6 5 1 1) 337 | ( ESR_EL3 3 6 5 2 0) 338 | ( FAR_EL3 3 6 6 0 0) 339 | ( MAIR_EL3 3 6 10 2 0) 340 | ( AMAIR_EL3 3 6 10 3 0) 341 | ( VBAR_EL3 3 6 12 0 0) 342 | ( RVBAR_EL3 3 6 12 0 1) 343 | ( RMR_EL3 3 6 12 0 2) 344 | ( ICC_CTLR_EL3 3 6 12 12 4) 345 | ( ICC_SRE_EL3 3 6 12 12 5) 346 | ( ICC_IGRPEN1_EL3 3 6 12 12 7) 347 | ( TPIDR_EL3 3 6 13 0 2) 348 | ( CNTPS_TVAL_EL1 3 7 14 2 0) 349 | ( CNTPS_CTL_EL1 3 7 14 2 1) 350 | ( CNTPS_CVAL_EL1 3 7 14 2 2) 351 | ;;; Special-purpose registers 352 | ( SPSR_EL1 3 0 4 0 0) 353 | ( ELR_EL1 3 0 4 0 1) 354 | ( SP_EL0 3 0 4 1 0) 355 | ( SPSel 3 0 4 2 0) 356 | ( CurrentEL 3 0 4 2 2) 357 | ( DAIF 3 3 4 2 1) 358 | ( NZCV 3 3 4 2 0) 359 | ( FPCR 3 3 4 4 0) 360 | ( FPSR 3 3 4 4 1) 361 | ( DSPSR_EL0 3 3 4 5 0) 362 | ( DLR_EL0 3 3 4 5 1) 363 | ( SPSR_EL2 3 4 4 0 0) 364 | ( ELR_EL2 3 4 4 0 1) 365 | ( SP_EL1 3 4 4 1 0) 366 | ( SPSR_irq 3 4 4 3 0) 367 | ( SPSR_abt 3 4 4 3 1) 368 | ( SPSR_und 3 4 4 3 2) 369 | ( SPSR_fiq 3 4 4 3 3) 370 | ( SPSR_EL3 3 6 4 0 0) 371 | ( ELR_EL3 3 6 4 0 1) 372 | ( SP_EL2 3 6 4 1 0)))) 373 | -------------------------------------------------------------------------------- /machine-code/disassembler/arm-private.sls: -------------------------------------------------------------------------------- 1 | ;; -*- mode: scheme; coding: utf-8 -*- 2 | ;; Copyright © 2016, 2017, 2018 Göran Weinholt 3 | ;; SPDX-License-Identifier: MIT 4 | #!r6rs 5 | 6 | (library (machine-code disassembler arm-private) 7 | (export 8 | define-encoding != &= !&=) 9 | (import 10 | (rnrs (6)) 11 | (machine-code disassembler private)) 12 | 13 | (define != (lambda (x y) (not (= x y)))) 14 | 15 | ;; (&= x 'b101x) matches when (member x '(#b1010 #b1011)) 16 | (define-syntax &= 17 | (lambda (x) 18 | (syntax-case x (quote) 19 | [(_ var 'bit-pattern) 20 | (and (or (identifier? #'var) (integer? (syntax->datum #'var))) (identifier? #'bit-pattern)) 21 | (if (not (char=? #\b (string-ref (symbol->string (syntax->datum #'bit-pattern)) 0))) 22 | (syntax-violation '&= "Invalid pattern (must be a quoted symbol starting #\\b)" x #'bit-pattern) 23 | (let ((pattern (symbol->string (syntax->datum #'bit-pattern)))) 24 | (let lp ((i 1) (bits 0) (mask 0)) 25 | (if (fx=? i (string-length pattern)) 26 | (if (eqv? mask 0) 27 | #'#t 28 | #`(eqv? (bitwise-and var #,mask) #,bits)) 29 | (case (string-ref pattern i) 30 | [(#\x) 31 | (lp (+ i 1) bits mask)] 32 | [(#\0 #\1) 33 | (let ((bit (if (eqv? (string-ref pattern i) #\0) 0 1)) 34 | (idx (- (string-length pattern) i 1))) 35 | (lp (+ i 1) 36 | (bitwise-ior bits (bitwise-arithmetic-shift-left bit idx)) 37 | (bitwise-ior mask (bitwise-arithmetic-shift-left 1 idx))))] 38 | [else 39 | (syntax-violation '&= "Invalid pattern (only x, 0 and 1 are allowed)" x #'bit-pattern)])))))] 40 | [(_ var bit-pattern) 41 | #'(eqv? var bit-pattern)]))) 42 | 43 | (define-syntax !&= 44 | (lambda (x) 45 | (syntax-case x () 46 | [(_ var bit-pattern) 47 | #'(not (&= var bit-pattern))]))) 48 | 49 | ;; Syntax for defining instruction encodings. The fields of the 50 | ;; instructions are written with the index of the top field bit. 51 | ;; Fields can be given an identifier which will be bound to the 52 | ;; field value, or a field constraint. This matches what is seen in 53 | ;; the instruction set encoding chapters of the ARM manuals. 54 | 55 | (define-syntax field-eqv? 56 | (lambda (x) 57 | (syntax-case x (!= quote) 58 | [(_ field (!= value)) 59 | #'(!&= field value)] 60 | [(_ field value) 61 | #'(&= field value)]))) 62 | 63 | (define-syntax define-encoding 64 | (lambda (x) 65 | (define debug #f) 66 | (define (get-next-top-bit top-bit field-spec*) 67 | (syntax-case field-spec* () 68 | [() -1] 69 | [((next-top-bit . _) field-spec* ...) 70 | (and (fixnum? (syntax->datum #'next-top-bit)) 71 | (fxdatum #'next-top-bit) (syntax->datum top-bit))) 72 | (syntax->datum #'next-top-bit)])) 73 | (define (wrap-body name lhs* rhs* body) 74 | (with-syntax ([(lhs* ...) (reverse lhs*)] 75 | [(rhs* ...) (reverse rhs*)]) 76 | #`(let ((lhs* rhs*) ...) 77 | #,(with-syntax ([err #`(raise-UD #,(string-append "Unallocated " 78 | (symbol->string (syntax->datum name)) 79 | " op") 80 | `(lhs* ,lhs*) ...)]) 81 | (let f ((body body)) 82 | (syntax-case body (select match) 83 | [(select pc instruction) 84 | #'err] 85 | [(select pc instruction option option* ...) 86 | (if debug 87 | #`(let ((x (option pc instruction)) 88 | (y (guard (exn ((invalid-opcode? exn) #f)) 89 | #,(f #'(select pc instruction option* ...))))) 90 | (if (and x y) 91 | (error 'name "Indistinct encoding" pc instruction x y) 92 | (or x y))) 93 | #`(or (option pc instruction) 94 | #,(f #'(select pc instruction option* ...))))] 95 | [(match (field* ...)) 96 | #'err] 97 | [(match (field* ...) [(value* ...) expr*] . k*) 98 | (if debug 99 | #`(let ((x (and (field-eqv? field* value*) ...)) 100 | (y (guard (exn ((invalid-opcode? exn) #f)) 101 | #,(f #'(match (field* ...) . k*))))) 102 | (if (and x y) 103 | (error 'name "Indistinct encoding" expr* y) 104 | (if x expr* y))) 105 | #`(if (and (field-eqv? field* value*) ...) 106 | expr* 107 | #,(f #'(match (field* ...) . k*))))])))))) 108 | (syntax-case x () 109 | [(_ (encoding-name pc instruction field-spec* ...)) 110 | #'(define-encoding (encoding-name pc instruction field-spec* ...) 111 | (select pc instruction))] 112 | [(_ (encoding-name pc instruction field-spec* ...) 113 | body) 114 | (and (identifier? #'encoding-name) (identifier? #'instruction)) 115 | (let loop ([field-spec* #'(field-spec* ...)] 116 | [eq-mask 0] [eq-bits 0] 117 | [neq-mask 0] [neq-bits 0] 118 | [lhs* '()] [rhs* '()]) 119 | (syntax-case field-spec* (= !=) 120 | [() 121 | (with-syntax ([wrapped-body (wrap-body #'encoding-name lhs* rhs* #'body)]) 122 | (unless (= (bitwise-and eq-bits eq-mask) eq-bits) 123 | (syntax-violation 'define-encoding "Bits do not match the mask, bad constraints?" 124 | x field-spec*)) 125 | #`(define (encoding-name pc instruction) 126 | (and (eqv? (bitwise-and instruction #,eq-mask) #,eq-bits) 127 | (or (eqv? #,neq-mask 0) 128 | (not (eqv? (bitwise-and instruction #,neq-mask) #,neq-bits))) 129 | wrapped-body)))] 130 | [((top-bit) field-spec* ...) 131 | (fixnum? (syntax->datum #'top-bit)) 132 | ;; Ignore anything of the form (). 133 | (loop #'(field-spec* ...) eq-mask eq-bits neq-mask neq-bits lhs* rhs*)] 134 | [((top-bit name) field-spec* ...) 135 | (and (fixnum? (syntax->datum #'top-bit)) (identifier? #'name)) 136 | ;; Defines a field. 137 | (let* ((next-top-bit (get-next-top-bit #'top-bit #'(field-spec* ...)))) 138 | (with-syntax ((accessor #`(bitwise-bit-field instruction 139 | (+ #,next-top-bit 1) (+ top-bit 1)))) 140 | (loop #'(field-spec* ...) eq-mask eq-bits neq-mask neq-bits 141 | #`(name #,@lhs*) #`(accessor #,@rhs*))))] 142 | [((top-bit (= field-bits)) field-spec* ...) 143 | (and (fixnum? (syntax->datum #'top-bit)) (fixnum? (syntax->datum #'field-bits))) 144 | ;; Defines a constraint (the field must be equal to field-bits). 145 | (let* ((next-top-bit (get-next-top-bit #'top-bit #'(field-spec* ...))) 146 | (bottom-bit (fx+ next-top-bit 1)) 147 | (width (fx+ (fx- (syntax->datum #'top-bit) bottom-bit) 1))) 148 | (loop #'(field-spec* ...) 149 | (bitwise-ior eq-mask 150 | (bitwise-arithmetic-shift-left (- (bitwise-arithmetic-shift-left 1 width) 1) 151 | bottom-bit)) 152 | (bitwise-ior eq-bits (bitwise-arithmetic-shift-left (syntax->datum #'field-bits) bottom-bit)) 153 | neq-mask neq-bits 154 | lhs* rhs*))] 155 | [((top-bit (!= field-bits) name) field-spec* ...) 156 | (and (fixnum? (syntax->datum #'top-bit)) (fixnum? (syntax->datum #'field-bits))) 157 | ;; Defines a field with a constraint (the field must be unequal to field-bits). 158 | (let* ((next-top-bit (get-next-top-bit #'top-bit #'(field-spec* ...))) 159 | (bottom-bit (fx+ next-top-bit 1)) 160 | (width (fx+ (fx- (syntax->datum #'top-bit) bottom-bit) 1))) 161 | (with-syntax ((accessor #`(bitwise-bit-field instruction 162 | (+ #,next-top-bit 1) (+ top-bit 1)))) 163 | (loop #'(field-spec* ...) 164 | eq-mask eq-bits 165 | (bitwise-ior neq-mask 166 | (bitwise-arithmetic-shift-left (- (bitwise-arithmetic-shift-left 1 width) 1) 167 | bottom-bit)) 168 | (bitwise-ior neq-bits 169 | (bitwise-arithmetic-shift-left (syntax->datum #'field-bits) bottom-bit)) 170 | #`(name #,@lhs*) #`(accessor #,@rhs*))))]))])))) 171 | -------------------------------------------------------------------------------- /machine-code/disassembler/private.rkt: -------------------------------------------------------------------------------- 1 | ;; -*- mode: scheme; coding: utf-8 -*- 2 | ;; Copyright © 2010, 2012, 2016, 2017, 2018 Göran Weinholt 3 | ;; SPDX-License-Identifier: MIT 4 | #!r6rs 5 | 6 | ;; Code shared between the disassemblers. Should not be imported by 7 | ;; anyone else. 8 | 9 | (library (machine-code disassembler private) 10 | (export 11 | raise-UD invalid-opcode? map-in-order 12 | register-disassembler 13 | available-disassemblers get-disassembler 14 | make-disassembler disassembler? disassembler-name 15 | disassembler-min-instruction-size 16 | disassembler-max-instruction-size 17 | disassembler-instruction-getter) 18 | (import 19 | (rnrs (6))) 20 | 21 | (define (map-in-order p l) 22 | (if (null? l) 23 | '() 24 | (cons (p (car l)) 25 | (map-in-order p (cdr l))))) 26 | 27 | (define-condition-type &invalid-opcode &condition 28 | make-invalid-opcode invalid-opcode?) 29 | 30 | (define (raise-UD msg . irritants) 31 | (raise (condition 32 | (make-who-condition 'get-instruction) 33 | (make-message-condition msg) 34 | (make-irritants-condition irritants) 35 | (make-invalid-opcode)))) 36 | 37 | (define-record-type disassembler 38 | (fields name 39 | min-instruction-size 40 | max-instruction-size 41 | instruction-getter)) 42 | 43 | (define *registered-disassemblers* '()) 44 | 45 | (define (register-disassembler disassembler) 46 | (set! *registered-disassemblers* (cons (cons (disassembler-name disassembler) 47 | disassembler) 48 | *registered-disassemblers*))) 49 | 50 | (define (available-disassemblers) 51 | (map car *registered-disassemblers*)) 52 | 53 | (define (get-disassembler name) 54 | (cond ((assq name *registered-disassemblers*) => cdr) 55 | (else #f)))) 56 | -------------------------------------------------------------------------------- /machine-code/disassembler/x86.rkt: -------------------------------------------------------------------------------- 1 | ;; -*- mode: scheme; coding: utf-8 -*- 2 | ;; Disassembler for the Intel x86-16/32/64 instruction set. 3 | ;; Copyright © 2008, 2009, 2010, 2012, 2013, 2016, 2017, 2018 Göran Weinholt 4 | ;; SPDX-License-Identifier: MIT 5 | #!r6rs 6 | 7 | ;;; Idea 8 | 9 | ;; One goal is to show the instructions as the processor would 10 | ;; interpret them (if it has support for the instruction at all, that 11 | ;; is), i.e. if the processor would give an invalid-opcode exception 12 | ;; for a specific instruction stream (and it's not obvious how a 13 | ;; future processor would not), then this library should raise an 14 | ;; exception with the &invalid-opcode condition. 15 | 16 | ;; Instructions with different semantics should always look different, 17 | ;; e.g. sysret, where a REX.W prefix modifies the semantics. So for 18 | ;; 64-bit operand sizes "sysretq" is returned. If the instruction has 19 | ;; an operand that can be used to decide the operand size, no such 20 | ;; suffix is necessary. 21 | 22 | (library (machine-code disassembler x86) 23 | (export 24 | get-instruction invalid-opcode?) 25 | (import 26 | (except (rnrs (6)) get-u8) 27 | (machine-code disassembler private) 28 | (machine-code disassembler x86-opcodes)) 29 | 30 | (define maximum-instruction-size 15) 31 | 32 | (define debug #f) 33 | 34 | (define (print . x) (for-each display x) (newline)) 35 | 36 | (define-enumeration prefix 37 | (operand address cs ds es fs gs ss lock repz repnz rex rex.w rex.r 38 | rex.x rex.b vex vex.l) 39 | prefix-set) 40 | 41 | (define-enumeration tag 42 | ;; Just used for the `tag' syntax. So here is a list of all tags 43 | ;; that can be given to the "collect" function. 44 | (modr/m sib disp immediate /is4 prefix opcode) 45 | tag-set) 46 | 47 | (define (has-modr/m? instr) 48 | ;; Not the prettiest function ever, but it works. 49 | (and (list? instr) 50 | (exists (lambda (op) 51 | (memv (string-ref (symbol->string op) 0) 52 | '(#\C #\D #\E #\G #\M #\N #\P #\Q #\R #\S #\U #\V #\W))) 53 | (cdr instr)))) 54 | 55 | (define (has-/is4? instr) 56 | (and (list? instr) 57 | (exists (lambda (op) (memq op '(In Kpd Kps Kss Ksd Lo Lx))) 58 | (cdr instr)))) 59 | 60 | ;;; Simple byte decoding 61 | 62 | (define (ModR/M-mod byte) 63 | (fxbit-field byte 6 8)) 64 | 65 | (define ModR/M-reg 66 | (case-lambda 67 | ((byte) 68 | (fxbit-field byte 3 6)) 69 | ((byte prefixes) 70 | (if (enum-set-member? (prefix rex.r) prefixes) 71 | (fxior #b1000 (fxbit-field byte 3 6)) 72 | (fxbit-field byte 3 6))))) 73 | 74 | (define ModR/M-r/m 75 | (case-lambda 76 | ((byte) 77 | (bitwise-bit-field byte 0 3)) 78 | ((byte prefixes) 79 | (if (enum-set-member? (prefix rex.b) prefixes) 80 | (bitwise-ior #b1000 (bitwise-bit-field byte 0 3)) 81 | (bitwise-bit-field byte 0 3))))) 82 | 83 | (define (print-modr/m byte prefixes) 84 | (print "ModR/M=#x" (number->string byte 16) 85 | " Mod=#b" (number->string (ModR/M-mod byte) 2) 86 | " Reg=#b" (number->string (ModR/M-reg byte prefixes) 2) 87 | " R/M=#b" (number->string (ModR/M-r/m byte prefixes) 2))) 88 | 89 | (define (print-sib byte prefixes) 90 | (print "SIB=#x" (number->string byte 16) 91 | " Scale=#b" (number->string (SIB-scale byte) 2) 92 | " Index=#b" (number->string (SIB-index byte prefixes) 2) 93 | " Base=#b" (number->string (SIB-base byte prefixes) 2))) 94 | 95 | (define (SIB-scale byte) 96 | (bitwise-arithmetic-shift-left 1 (bitwise-bit-field byte 6 8))) 97 | 98 | (define (SIB-index byte prefixes) 99 | (if (enum-set-member? (prefix rex.x) prefixes) 100 | (bitwise-ior #b1000 (bitwise-bit-field byte 3 6)) 101 | (bitwise-bit-field byte 3 6))) 102 | 103 | (define (SIB-base byte prefixes) 104 | (if (enum-set-member? (prefix rex.b) prefixes) 105 | (bitwise-ior #b1000 (bitwise-bit-field byte 0 3)) 106 | (bitwise-bit-field byte 0 3))) 107 | 108 | (define (VEX-vvvv byte mode) 109 | ;; Encodes another XMM operand. 110 | (bitwise-and (if (= mode 64) #b1111 #b111) ;VEX.vvvv 111 | (bitwise-xor #b1111 (bitwise-bit-field byte 3 7)))) 112 | 113 | (define (VEX-m-mmmm->table byte) 114 | (case (bitwise-bit-field byte 0 5) ;VEX.m-mmmm 115 | ((#b00001) (vector-ref opcodes #x0F)) 116 | ((#b00010) (vector-ref (vector-ref opcodes #x0F) #x38)) 117 | ((#b00011) (vector-ref (vector-ref opcodes #x0F) #x3A)) 118 | (else (raise-UD "Reserved VEX.m-mmmm encoding" 119 | (bitwise-bit-field byte 0 5))))) 120 | 121 | (define (XOP-map-select->table byte) 122 | (case (bitwise-bit-field byte 0 5) ;XOP.map_select 123 | ((#b01000) XOP-opcode-map-8) 124 | ((#b01001) XOP-opcode-map-9) 125 | ((#b01010) XOP-opcode-map-A) 126 | (else (raise-UD "Reserved XOP.map_select encoding" 127 | (bitwise-bit-field byte 0 5))))) 128 | 129 | (define (VEX3->prefixes prefixes mode byte1 byte2) 130 | (let ((byte1 (if (= mode 64) byte1 (bitwise-ior byte1 #b1110000)))) 131 | (fold-left enum-set-union 132 | prefixes 133 | (list 134 | (if (bitwise-bit-set? byte1 7) ;VEX.R 135 | (prefix-set) (prefix-set rex.r)) 136 | (if (bitwise-bit-set? byte1 6) ;VEX.X 137 | (prefix-set) (prefix-set rex.x)) 138 | (if (bitwise-bit-set? byte1 5) ;VEX.B 139 | (prefix-set) (prefix-set rex.b)) 140 | (if (bitwise-bit-set? byte2 7) ;VEX.W 141 | (prefix-set rex.w) (prefix-set)) 142 | (if (bitwise-bit-set? byte2 2) ;VEX.L 143 | (prefix-set vex.l) (prefix-set)) 144 | (case (bitwise-bit-field byte2 0 2) ;VEX.pp 145 | ((#b00) (prefix-set vex)) 146 | ((#b01) (prefix-set vex operand)) 147 | ((#b10) (prefix-set vex repz)) 148 | (else (prefix-set vex repnz))) 149 | (if (= mode 64) (prefix-set rex) (prefix-set)))))) 150 | 151 | (define (VEX2->prefixes prefixes mode byte1) 152 | (let ((byte (if (= mode 64) byte1 (bitwise-ior byte1 #b11000000)))) 153 | (fold-left enum-set-union 154 | prefixes 155 | (list 156 | (if (bitwise-bit-set? byte 7) ;VEX.R 157 | (prefix-set) (prefix-set rex.r)) 158 | (if (bitwise-bit-set? byte 2) ;VEX.L 159 | (prefix-set vex.l) (prefix-set)) 160 | (case (bitwise-bit-field byte 0 2) ;VEX.pp 161 | ((#b00) (prefix-set vex)) 162 | ((#b01) (prefix-set vex operand)) 163 | ((#b10) (prefix-set vex repz)) 164 | (else (prefix-set vex repnz))) 165 | (if (= mode 64) (prefix-set rex) (prefix-set)))))) 166 | 167 | (define (lookahead-is-valid-VEX? port) 168 | "In legacy mode, the upper two bits following a C4 or C5 byte must 169 | be #b11 for it to be considered a valid VEX prefix (so there will be 170 | no conflict with LES/LDS)." 171 | (let ((byte (lookahead-u8 port))) 172 | (and (not (eof-object? byte)) 173 | (= (bitwise-bit-field byte 6 8) #b11)))) 174 | 175 | (define (lookahead-is-valid-XOP? port) 176 | "The instruction is probably a POP if map_select < #b111." 177 | (let ((byte (lookahead-u8 port))) 178 | (and (fixnum? byte) 179 | (fx>=? (fxbit-field byte 0 5) #b00111)))) 180 | 181 | ;;; 182 | (define (VEX-prefix-check prefixes mode) 183 | (when (eqv? mode 16) 184 | (raise-UD "The VEX/XOP prefix is not valid in 16-bit modes")) 185 | (unless (enum-set=? (prefix-set) 186 | (enum-set-intersection 187 | prefixes (prefix-set vex rex lock operand repz repnz))) 188 | (raise-UD "Conflicting prefixes together with VEX/XOP"))) 189 | 190 | (define (needs-VEX prefixes) 191 | (unless (enum-set-member? (prefix vex) prefixes) 192 | (raise-UD "This instruction requires a VEX prefix"))) 193 | 194 | ;;; Port input 195 | (define (really-get-bytevector-n port n collect limiter tag) 196 | (limiter n collect tag) 197 | (let ((bv (get-bytevector-n port n))) 198 | (unless (eof-object? bv) 199 | (if collect (apply collect tag (bytevector->u8-list bv)))) 200 | (when (or (eof-object? bv) (< (bytevector-length bv) n)) 201 | (raise-UD "End of file inside instruction")) 202 | bv)) 203 | 204 | (define (get-u8 port limiter) 205 | (bytevector-u8-ref (really-get-bytevector-n port 1 #f limiter #f) 206 | 0)) 207 | 208 | (define (get-u8/collect port collect limiter tag) 209 | (bytevector-u8-ref (really-get-bytevector-n port 1 collect limiter tag) 210 | 0)) 211 | 212 | (define (get-s8/collect port collect limiter tag) 213 | (bytevector-s8-ref (really-get-bytevector-n port 1 collect limiter tag) 214 | 0)) 215 | 216 | (define (get-s16/collect port collect limiter tag) 217 | (bytevector-s16-ref (really-get-bytevector-n port 2 collect limiter tag) 218 | 0 (endianness little))) 219 | 220 | (define (get-u16/collect port collect limiter tag) 221 | (bytevector-u16-ref (really-get-bytevector-n port 2 collect limiter tag) 222 | 0 (endianness little))) 223 | 224 | (define (get-s32/collect port collect limiter tag) 225 | (bytevector-s32-ref (really-get-bytevector-n port 4 collect limiter tag) 226 | 0 (endianness little))) 227 | 228 | (define (get-u32/collect port collect limiter tag) 229 | (bytevector-u32-ref (really-get-bytevector-n port 4 collect limiter tag) 230 | 0 (endianness little))) 231 | 232 | (define (get-u64/collect port collect limiter tag) 233 | (bytevector-u64-ref (really-get-bytevector-n port 8 collect limiter tag) 234 | 0 (endianness little))) 235 | 236 | ;;; Register names 237 | (define reg-names8 '#(al cl dl bl ah ch dh bh)) 238 | 239 | ;; Intel calls these r8l, r9l, and so on, but since AMD invented 240 | ;; them, use AMD's names. 241 | (define reg-names8rex '#(al cl dl bl spl bpl sil dil 242 | r8b r9b r10b r11b r12b r13b r14b r15b)) 243 | 244 | (define reg-names16 '#(ax cx dx bx sp bp si di 245 | r8w r9w r10w r11w r12w r13w r14w r15w)) 246 | 247 | (define reg-names32 '#(eax ecx edx ebx esp ebp esi edi 248 | r8d r9d r10d r11d r12d r13d r14d r15d)) 249 | 250 | (define reg-names64 '#(rax rcx rdx rbx rsp rbp rsi rdi 251 | r8 r9 r10 r11 r12 r13 r14 r15)) 252 | 253 | ;; These are sometimes called mmx0, mmx1, etc for no apparent 254 | ;; reason. 255 | (define reg-names-mmx '#(mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7 256 | mm0 mm1 mm2 mm3 mm4 mm5 mm6 mm7)) 257 | 258 | (define reg-names-xmm '#(xmm0 xmm1 xmm2 xmm3 xmm4 xmm5 xmm6 xmm7 259 | xmm8 xmm9 xmm10 xmm11 xmm12 xmm13 xmm14 xmm15)) 260 | 261 | (define reg-names-ymm '#(ymm0 ymm1 ymm2 ymm3 ymm4 ymm5 ymm6 ymm7 262 | ymm8 ymm9 ymm10 ymm11 ymm12 ymm13 ymm14 ymm15)) 263 | 264 | (define reg-names-sreg '#(es cs ss ds fs gs #f #f 265 | es cs ss ds fs gs #f #f)) 266 | 267 | (define reg-names-creg '#(cr0 cr1 cr2 cr3 cr4 cr5 cr6 cr7 cr8 cr9 268 | cr10 cr11 cr12 cr13 cr14 cr15)) 269 | 270 | (define reg-names-dreg '#(dr0 dr1 dr2 dr3 dr4 dr5 dr6 dr7 dr8 dr9 271 | dr10 dr11 dr12 dr13 dr14 dr15)) 272 | 273 | (define reg-names-x87 '#(st0 st1 st2 st3 st4 st5 st6 st7 274 | st0 st1 st2 st3 st4 st5 st6 st7)) 275 | 276 | (define reg-names-bnd '#(bnd0 bnd1 bnd2 bnd3 #f #f #f #f 277 | #f #f #f #f #f #f #f #f)) 278 | 279 | ;;; Special cases 280 | (define fix-pseudo-ops 281 | (let ((pseudos-table (make-eq-hashtable))) 282 | (for-each (lambda (p) 283 | ;; Builds a hashtable where mnemonic maps to an 284 | ;; alist of (immbyte . pseudo-mnemonic). 285 | (hashtable-update! pseudos-table (car p) 286 | (lambda (old) 287 | (cons (cons (cadr p) (caddr p)) 288 | old)) 289 | '())) 290 | pseudo-mnemonics) 291 | (lambda (instr) 292 | "Check the Ib operand and use it to look up a pseudo mnemonic 293 | if any are available. Also used for 3DNow! instructions, where the Ib 294 | operand is an opcode extension." 295 | (let ((imm (car (reverse instr))) 296 | (mnemonic (car instr))) 297 | (cond ((and (number? imm) (hashtable-ref pseudos-table mnemonic #f)) => 298 | (lambda (immlist) 299 | (cond ((assq imm immlist) => 300 | (lambda (pseudo) 301 | (cons (cdr pseudo) 302 | (cdr (reverse (cdr (reverse instr))))))) 303 | ((eq? mnemonic '*3dnow*) 304 | (raise-UD "Reserved 3Dnow! instruction" imm)) 305 | (else instr)))) 306 | (else instr)))))) 307 | 308 | (define (fix-lock instruction prefixes) 309 | (cond ((not (enum-set-member? (prefix lock) prefixes)) 310 | instruction) 311 | ((or (null? (cdr instruction)) 312 | (not (list? (cadr instruction)))) 313 | (raise-UD "LOCK prefix requires a memory destination operand")) 314 | ((memq (car instruction) lock-instructions) => 315 | (lambda (name) 316 | (cons (string->symbol 317 | (string-append "lock." (symbol->string (car instruction)))) 318 | (cdr instruction)))) 319 | (else 320 | (raise-UD "LOCK prefix invalid for this instruction")))) 321 | 322 | (define (fix-branches instruction prefixes) 323 | "Annotate instructions with branch hints in IA-64 style." 324 | (cond ((memq (car instruction) branch-hint-instructions) => 325 | (lambda (name) 326 | (cond ((enum-set-member? (prefix cs) prefixes) 327 | (cons (string->symbol 328 | (string-append (symbol->string (car instruction)) 329 | ;; Statically Predict branch Not Taken 330 | ".spnt")) 331 | (cdr instruction))) 332 | ((enum-set-member? (prefix ds) prefixes) 333 | (cons (string->symbol 334 | (string-append (symbol->string (car instruction)) 335 | ;; Statically Predict branch Taken 336 | ".sptk")) 337 | (cdr instruction))) 338 | (else instruction)))) 339 | (else instruction))) 340 | 341 | (define (fix-rep instruction prefixes) 342 | (cond ((enum-set-member? (prefix repz) prefixes) 343 | (cond ((memq (car instruction) rep-instructions) 344 | (cons (string->symbol 345 | (string-append "rep." 346 | (symbol->string (car instruction)))) 347 | (cdr instruction))) 348 | ((memq (car instruction) repz-instructions) 349 | (cons (string->symbol 350 | (string-append "repz." 351 | (symbol->string (car instruction)))) 352 | (cdr instruction))) 353 | (else instruction))) 354 | ((enum-set-member? (prefix repnz) prefixes) 355 | (cond ((memq (car instruction) repz-instructions) 356 | (cons (string->symbol 357 | (string-append "repnz." 358 | (symbol->string (car instruction)))) 359 | (cdr instruction))) 360 | ((memq (car instruction) bnd-instructions) 361 | (cons (string->symbol 362 | (string-append "bnd." 363 | (symbol->string (car instruction)))) 364 | (cdr instruction))) 365 | (else instruction))) 366 | (else instruction))) 367 | 368 | (define (fix-nop instruction prefixes mode operand-size) 369 | (define (nop) 370 | (if (enum-set-member? (prefix repz) prefixes) '(pause) '(nop))) 371 | (case (car instruction) 372 | ((*nop*) 373 | (case mode 374 | ((32 64) 375 | (case operand-size 376 | ((16) (if (enum-set-member? (prefix rex.b) prefixes) 377 | '(xchg r8w ax) 378 | '(xchg ax ax))) 379 | ((32) (if (enum-set-member? (prefix rex.b) prefixes) 380 | '(xchg r8d eax) 381 | (nop))) 382 | (else (if (enum-set-member? (prefix rex.b) prefixes) 383 | '(xchg r8 rax) 384 | '(xchg rax rax))))) 385 | ((16) 386 | (case operand-size 387 | ((16) (nop)) 388 | (else '(xchg eax eax)))))) 389 | ((bndmk bndldx bndstx) 390 | ;; The reg-reg versions of these instructions are (nop Ev). 391 | (if (and (symbol? (cadr instruction)) 392 | (symbol? (caddr instruction))) 393 | '(nop) ;discard the operand 394 | instruction)) 395 | (else instruction))) 396 | 397 | (define (fix-rIP-relative instruction ip limiter) 398 | ;; Fixes the vectors created by rIP-relative. The rIP-relative 399 | ;; operands in x86 add an offset to the address where the 400 | ;; instruction ended, so the instruction length (managed by 401 | ;; limiter) is needed. 402 | `(,(car instruction) 403 | ,@(map (lambda (operand) 404 | (if (vector? operand) 405 | (+ ip (limiter) (vector-ref operand 0)) 406 | operand)) 407 | (cdr instruction)))) 408 | 409 | (define (rIP-relative ip mode offset) 410 | (if ip 411 | (vector offset) ;this is later fixed up 412 | `(+ ,(case mode 413 | ((16) 'ip) 414 | ((32) 'eip) 415 | (else 'rip)) 416 | ,offset))) 417 | 418 | ;;; Instruction stream decoding 419 | (define (get-displacement port mode collect limiter prefixes modr/m address-size) 420 | "Reads a SIB and a memory offset, if present. Returns a memory 421 | reference or a register number. This is later passed to 422 | translate-displacement." 423 | (let ((mod (ModR/M-mod modr/m)) 424 | (r/m (ModR/M-r/m modr/m prefixes))) 425 | (define (mem32/64 register regs sib?) 426 | (case mod 427 | ((#b00) (cond 428 | ((fx=? (fxand register #b111) #b101) 429 | (if (or (not (eqv? mode 64)) sib?) 430 | (list (get-s32/collect port collect limiter (tag disp))) 431 | (list (if (fx=? address-size 64) 'rip 'eip) 432 | (get-s32/collect port collect limiter (tag disp))))) 433 | (else 434 | (list (vector-ref regs register))))) 435 | ((#b01) (list (vector-ref regs register) 436 | (get-s8/collect port collect limiter (tag disp)))) 437 | (else (list (vector-ref regs register) 438 | (get-s32/collect port collect limiter (tag disp)))))) 439 | (define (mem16 ) 440 | (let ((addr16 '#((bx si) (bx di) (bp si) (bp di) (si) (di) (bp) (bx)))) 441 | (case mod 442 | ((#b00) (if (fx=? r/m #b110) 443 | (list (get-s16/collect port collect limiter (tag disp))) 444 | (vector-ref addr16 r/m))) 445 | ((#b01) (append (vector-ref addr16 r/m) 446 | (list (get-s8/collect port collect limiter (tag disp))))) 447 | (else (append (vector-ref addr16 r/m) 448 | (list (get-s16/collect port collect limiter (tag disp)))))))) 449 | (if (fx=? mod #b11) 450 | r/m ;register operand 451 | (if (fx=? address-size 16) 452 | (mem16) 453 | (let ((regs (if (fx=? address-size 64) reg-names64 reg-names32)) 454 | (sib (and (fx=? (ModR/M-r/m modr/m) #b100) 455 | (get-u8/collect port collect limiter (tag sib))))) 456 | (if sib 457 | (append (mem32/64 (SIB-base sib prefixes) regs #t) 458 | (if (fx=? (SIB-index sib prefixes) #b100) 459 | '() 460 | `((* ,(vector-ref regs (SIB-index sib prefixes)) 461 | ,(SIB-scale sib))))) 462 | (mem32/64 r/m regs #f))))))) 463 | 464 | (define (translate-displacement prefixes mode disp operand-size . memsize) 465 | (cond ((integer? disp) 466 | (vector-ref (case operand-size 467 | ((8) (if (enum-set-member? (prefix rex) prefixes) 468 | reg-names8rex 469 | reg-names8)) 470 | ((16) reg-names16) 471 | ((32) reg-names32) 472 | ((64) reg-names64) 473 | ((mmx) reg-names-mmx) 474 | ((xmm) (if (enum-set-member? (prefix vex.l) prefixes) 475 | reg-names-ymm 476 | reg-names-xmm)) 477 | ((x87) reg-names-x87) 478 | ((bnd) 479 | (when (fx>=? disp 4) 480 | (raise-UD "Invalid BND register encoded")) 481 | reg-names-bnd) 482 | ((notreg) 483 | (raise-UD "ModR/M encoded a register but memory is required")) 484 | ((generic) 485 | ;; This isn't really valid. It gets 486 | ;; corrected by fix-nop. 487 | reg-names32) 488 | (else 489 | (error 'translate-displacement 490 | "Unimplemented register operand size" operand-size))) 491 | disp)) 492 | ((list? disp) 493 | (cons (case (if (null? memsize) operand-size (car memsize)) 494 | ((8) 'mem8+) 495 | ((16) 'mem16+) 496 | ((32) 'mem32+) 497 | ((64 mmx) 'mem64+) 498 | ((x87 80) 'mem80+) 499 | ((xmm) (if (enum-set-member? (prefix vex.l) prefixes) 500 | 'mem256+ 501 | 'mem128+)) 502 | ((128) 'mem128+) 503 | ((bnd) 504 | (case mode 505 | ((64) 'mem128+) 506 | (else 'mem64+))) 507 | ((ptr16) 'mem16:16+) 508 | ((ptr24) 'mem16:24+) 509 | ((ptr32) 'mem16:32+) 510 | ((ptr64) 'mem16:64+) 511 | ((generic) 'mem+) 512 | ((notmem) 513 | (raise-UD "ModR/M byte encoded memory but a register is required")) 514 | (else 515 | (error 'translate-displacement 516 | "Unimplemented memory operand size" 517 | (if (null? memsize) operand-size (car memsize))))) 518 | (cond ((prefixes->segment-override prefixes mode #f) => 519 | (lambda (seg) (cons seg disp))) 520 | (else disp)))) 521 | (else 522 | ;; This happens if ModR/M should've been read, but wasn't. 523 | (error 'translate-displacement 524 | "Bad displacement" disp)))) 525 | 526 | (define (prefixes->segment-override prefixes mode default) 527 | ;; TODO: What if multiple segment overrides are given? 528 | "Get the effective segment, if any. The `default' segment for 529 | 64-bit mode is always #f." 530 | (cond ((enum-set-member? (prefix fs) prefixes) 'fs) 531 | ((enum-set-member? (prefix gs) prefixes) 'gs) 532 | (else 533 | (if (= mode 64) 534 | #f 535 | (cond ((enum-set-member? (prefix cs) prefixes) 'cs) 536 | ((enum-set-member? (prefix ds) prefixes) 'ds) 537 | ((enum-set-member? (prefix es) prefixes) 'es) 538 | ((enum-set-member? (prefix ss) prefixes) 'ss) 539 | (else default)))))) 540 | 541 | (define (get-operand port mode collect ip limiter op prefixes opcode vex.v 542 | operand-size address-size modr/m 543 | disp /is4) 544 | (let get-operand ((op op)) 545 | (case op 546 | ((Jb) (rIP-relative ip mode (get-s8/collect port collect limiter (tag disp)))) 547 | ((Jz) 548 | (case operand-size 549 | ((16) (rIP-relative ip mode (get-s16/collect port collect limiter (tag disp)))) 550 | ((32 64) (rIP-relative ip mode (get-s32/collect port collect limiter (tag disp)))))) 551 | 552 | ((Md/q) ;FIXME: verify 553 | (translate-displacement prefixes mode disp 554 | (if (= operand-size 16) 32 operand-size))) 555 | 556 | ((Gd/q) ;FIXME: verify 557 | (translate-displacement prefixes mode (ModR/M-reg modr/m prefixes) 558 | (if (= operand-size 16) 32 operand-size))) 559 | 560 | ((Gq) (translate-displacement prefixes mode (ModR/M-reg modr/m prefixes) 561 | 64)) 562 | ((Gv) (translate-displacement prefixes mode (ModR/M-reg modr/m prefixes) 563 | operand-size)) 564 | ((Gz) (translate-displacement prefixes mode (ModR/M-reg modr/m prefixes) 565 | (if (= operand-size 16) 16 32))) 566 | ((Gd) (translate-displacement prefixes mode (ModR/M-reg modr/m prefixes) 567 | 32)) 568 | ((Gb) (translate-displacement prefixes mode (ModR/M-reg modr/m prefixes) 569 | 8)) 570 | ((Gw) (translate-displacement prefixes mode (ModR/M-reg modr/m prefixes) 571 | 16)) 572 | 573 | ((Ev) (translate-displacement prefixes mode disp operand-size)) 574 | ((Eb) (translate-displacement prefixes mode disp 8)) 575 | ((Ew) (translate-displacement prefixes mode disp 16)) 576 | ((Ed) (translate-displacement prefixes mode disp 32)) 577 | ((Eq) (translate-displacement prefixes mode disp 64)) 578 | ((Ed/q) 579 | (translate-displacement prefixes mode disp 580 | (if (= operand-size 16) 32 operand-size))) 581 | ((Edq/mode) 582 | ;; Intel MPX (waiting for a proper opsyntax). 583 | (let ((x (translate-displacement prefixes mode disp 584 | (case mode 585 | ((16) 32) 586 | (else mode))))) 587 | (when (and (eqv? address-size 16) (pair? x)) 588 | (raise-UD "16-bit addressing with MPX instruction")) 589 | x)) 590 | ((Edq/mode/norel) 591 | ;; Intel MPX (waiting for a proper opsyntax). 592 | (let ((x (get-operand 'Edq/mode))) 593 | (if (and (pair? x) (eq? (cadr x) 'rip)) 594 | (raise-UD "RIP-relative addressing") 595 | x))) 596 | ((Ebnd) 597 | ;; Intel MPX (waiting for a proper opsyntax). 598 | (let ((x (translate-displacement prefixes mode disp 'bnd))) 599 | (when (and (eqv? address-size 16) (pair? x)) 600 | (raise-UD "16-bit addressing with MPX instruction")) 601 | x)) 602 | ((Emib) 603 | ;; Intel MPX (waiting for a proper opsyntax). 604 | (let ((x (translate-displacement prefixes mode disp 'generic))) 605 | (cond ((symbol? x) x) 606 | ((eqv? address-size 16) 607 | (raise-UD "16-bit addressing with MPX instruction")) 608 | ((eq? (cadr x) 'rip) 609 | (raise-UD "RIP-relative addressing")) 610 | (else 611 | (map (lambda (op) 612 | (if (and (pair? op) (eq? (car op) '*)) 613 | `(* ,(cadr op) 1) ;scale is ignored 614 | op)) 615 | x))))) 616 | 617 | ((Ib) (get-u8/collect port collect limiter (tag immediate))) 618 | ((IbS) 619 | ;; Sign extended immediate byte (not official opsyntax) 620 | (let ((byte (get-u8/collect port collect limiter (tag immediate)))) 621 | (if (bitwise-bit-set? byte 7) 622 | (case operand-size 623 | ((16) (bitwise-ior #xff00 byte)) 624 | ((32) (bitwise-ior #xffffff00 byte)) 625 | (else (bitwise-ior #xffffffffffffff00 byte))) 626 | byte))) 627 | ((Iw) (get-u16/collect port collect limiter (tag immediate))) 628 | ((Id) (get-u32/collect port collect limiter (tag immediate))) 629 | ((Iv) 630 | ((case operand-size 631 | ((16) get-u16/collect) 632 | ((32) get-u32/collect) 633 | (else get-u64/collect)) 634 | port collect limiter (tag immediate))) 635 | ((Iz) 636 | (case operand-size 637 | ((16) (get-u16/collect port collect limiter (tag immediate))) 638 | ((32) (get-u32/collect port collect limiter (tag immediate))) 639 | (else 640 | (let ((imm (get-u32/collect port collect limiter (tag immediate)))) 641 | (if (bitwise-bit-set? imm 31) 642 | (bitwise-ior #xffffffff00000000 imm) 643 | imm))))) 644 | 645 | ((Ob) 646 | (list 'mem8+ 647 | ((case address-size 648 | ((16) get-u16/collect) 649 | ((32) get-u32/collect) 650 | (else get-u64/collect)) 651 | port collect limiter (tag disp)))) 652 | ((Ov) 653 | ;; FIXME: is this correct? 654 | (list (case operand-size 655 | ((16) 'mem16+) 656 | ((32) 'mem32+) 657 | (else 'mem64+)) 658 | ((case address-size 659 | ((16) get-u16/collect) 660 | ((32) get-u32/collect) 661 | (else get-u64/collect)) 662 | port collect limiter (tag disp)))) 663 | ;; Far pointer 664 | ((Ap) 665 | (let* ((off (if (eqv? operand-size 32) 666 | (get-u32/collect port collect limiter (tag disp)) 667 | (get-u16/collect port collect limiter (tag disp)))) 668 | (ss (get-u16/collect port collect limiter (tag disp)))) 669 | (list 'far ss off))) 670 | 671 | ;; String operation operands 672 | ((Xb) 673 | (let ((seg (prefixes->segment-override prefixes mode 'ds))) 674 | (case address-size 675 | ((16) `(mem8+ ,seg si)) 676 | ((32) (if seg `(mem8+ ,seg esi) '(mem8+ esi))) 677 | (else (if seg `(mem8+ ,seg rsi) '(mem8+ rsi)))))) 678 | ((Xv) 679 | (let ((seg (prefixes->segment-override prefixes mode 'ds)) 680 | (size (case operand-size 681 | ((16) 'mem16+) 682 | ((32) 'mem32+) 683 | (else 'mem64+)))) 684 | (case address-size 685 | ((16) `(,size ,seg si)) 686 | ((32) (if seg `(,size ,seg esi) `(,size esi))) 687 | (else (if seg `(,size ,seg rsi) `(,size rsi)))))) 688 | ((Xz) 689 | (let ((seg (prefixes->segment-override prefixes mode 'ds)) 690 | (size (case operand-size 691 | ((16) 'mem16+) 692 | (else 'mem32+)))) 693 | (case address-size 694 | ((16) `(,size ,seg si)) 695 | ((32) (if seg `(,size ,seg esi) `(,size esi))) 696 | (else (if seg `(,size ,seg rsi) `(,size rsi)))))) 697 | 698 | ((Yb) 699 | (case address-size 700 | ((16) '(mem8+ es di)) 701 | ((32) (if (= mode 64) '(mem8+ edi) '(mem8+ es edi))) 702 | (else '(mem8+ rdi)))) 703 | ((Yv) 704 | (let ((size (case operand-size 705 | ((16) 'mem16+) 706 | ((32) 'mem32+) 707 | (else 'mem64+)))) 708 | (case address-size 709 | ((16) `(,size es di)) 710 | ((32) (if (= mode 64) `(,size edi) `(,size es edi))) 711 | (else `(,size rdi))))) 712 | ((Yz) 713 | (let ((size (case operand-size 714 | ((16) 'mem16+) 715 | (else 'mem32+)))) 716 | (case address-size 717 | ((16) `(,size es di)) 718 | ((32) (if (= mode 64) `(,size edi) `(,size es edi))) 719 | (else `(,size rdi))))) 720 | 721 | ;; Special registers 722 | ((Cd/q) (vector-ref reg-names-creg (ModR/M-reg modr/m prefixes))) 723 | ((Dd/q) (vector-ref reg-names-dreg (ModR/M-reg modr/m prefixes))) 724 | ((Sw) (or (vector-ref reg-names-sreg (ModR/M-reg modr/m)) 725 | (raise-UD "Invalid segment register encoded"))) 726 | ((bnd) 727 | ;; Intel MPX (waiting for a proper opsyntax). 728 | (or (vector-ref reg-names-bnd (ModR/M-reg modr/m prefixes)) 729 | (raise-UD "Invalid BND register encoded"))) 730 | 731 | ;; SSE. "Packed" is also "vector" in some documentation. It 732 | ;; means that the register is packed with more than one 733 | ;; number. For example, ps means four 32-bit floats packed 734 | ;; together in a 128-bit xmm register. "Scalar" is when 735 | ;; there is just one number in a register, at the lowest 736 | ;; bits. 737 | 738 | ;; ps = packed single-precision floating point 739 | ;; pd = packed double-precision floating point 740 | ;; ss = scalar single-precision floating point 741 | ;; sd = scalar double-precision floating point 742 | ;; x = 128/256 bit vector 743 | ((Vps Vdq Vpd Vq Vd Vsd Vss Vx) 744 | (translate-displacement prefixes mode (ModR/M-reg modr/m prefixes) 'xmm)) 745 | ;; Called VRdq by AMD: 746 | ((Ups Udq Upd Uq) 747 | (translate-displacement prefixes mode disp 'xmm 'notmem)) 748 | 749 | ((Wps Wdq Wpd Wx) 750 | (translate-displacement prefixes mode disp 'xmm)) 751 | ((Wsd Udq/Mq Wq) 752 | (translate-displacement prefixes mode disp 'xmm 64)) 753 | ((Wss Udq/Md) 754 | (translate-displacement prefixes mode disp 'xmm 32)) 755 | ((Udq/Mw) 756 | (translate-displacement prefixes mode disp 'xmm 16)) 757 | 758 | ((Pq Pd) 759 | (translate-displacement prefixes mode (ModR/M-reg modr/m prefixes) 'mmx)) 760 | ((Qq) 761 | (translate-displacement prefixes mode disp 'mmx)) 762 | ((Qd) 763 | (translate-displacement prefixes mode disp 'mmx 32)) 764 | ;; Called PRq by AMD: 765 | ((Nq) 766 | (translate-displacement prefixes mode disp 'mmx 'notmem)) 767 | 768 | ((Wps/128 Wo) ;Forced to 128-bit xmm 769 | (translate-displacement (enum-set-difference prefixes (prefix-set vex.l)) 770 | mode disp 'xmm)) 771 | ((Wq/128) ;Forced to 128-bit xmm 772 | (translate-displacement (enum-set-difference prefixes (prefix-set vex.l)) 773 | mode disp 'xmm 64)) 774 | ((Vq/128) ;Forced to 128-bit xmm 775 | (translate-displacement (enum-set-difference prefixes (prefix-set vex.l)) 776 | mode (ModR/M-reg modr/m prefixes) 'xmm 64)) 777 | ((Vo) ;Forced to 128-bit xmm 778 | (translate-displacement (enum-set-difference prefixes (prefix-set vex.l)) 779 | mode (ModR/M-reg modr/m prefixes) 'xmm)) 780 | 781 | ;; Intel AVX. K, KW, WK, B, BW, WB is not official opsyntax. 782 | ((Kpd Kps Kss Ksd) 783 | (needs-VEX prefixes) 784 | (translate-displacement prefixes mode 785 | (fxbit-field /is4 4 (if (eqv? mode 64) 8 7)) 786 | 'xmm)) 787 | ((Lo) 788 | (needs-VEX prefixes) 789 | (translate-displacement (enum-set-difference prefixes (prefix-set vex.l)) 790 | mode 791 | (fxbit-field /is4 4 (if (eqv? mode 64) 8 7)) 792 | 'xmm)) 793 | ((Lx) 794 | (needs-VEX prefixes) 795 | (translate-displacement prefixes mode 796 | (fxbit-field /is4 4 (if (eqv? mode 64) 8 7)) 797 | 'xmm)) 798 | 799 | ((KWpd) (if (enum-set-member? (prefix rex.w) prefixes) 800 | (get-operand 'Wpd) (get-operand 'Kpd))) 801 | ((KWps) (if (enum-set-member? (prefix rex.w) prefixes) 802 | (get-operand 'Wps) (get-operand 'Kps))) 803 | 804 | ((WKpd) (if (enum-set-member? (prefix rex.w) prefixes) 805 | (get-operand 'Kpd) (get-operand 'Wpd))) 806 | ((WKps) (if (enum-set-member? (prefix rex.w) prefixes) 807 | (get-operand 'Kps) (get-operand 'Wps))) 808 | 809 | ((Bpd Bps Bss Bsd Bdq Hx) 810 | (needs-VEX prefixes) 811 | (translate-displacement prefixes mode vex.v 'xmm)) 812 | ((Ho) 813 | (needs-VEX prefixes) 814 | (translate-displacement (enum-set-difference prefixes (prefix-set vex.l)) 815 | mode vex.v 'xmm)) 816 | 817 | ;; TODO: These things shouldn't be used. Replace them with #(W ... ...) 818 | ((BWpd) (if (enum-set-member? (prefix rex.w) prefixes) 819 | (get-operand 'Wpd) (get-operand 'Bpd))) 820 | ((BWps) (if (enum-set-member? (prefix rex.w) prefixes) 821 | (get-operand 'Wps) (get-operand 'Bps))) 822 | ((BWsd) (if (enum-set-member? (prefix rex.w) prefixes) 823 | (get-operand 'Wsd) (get-operand 'Bsd))) 824 | ((BWss) (if (enum-set-member? (prefix rex.w) prefixes) 825 | (get-operand 'Wss) (get-operand 'Bss))) 826 | 827 | ((WBpd) (if (enum-set-member? (prefix rex.w) prefixes) 828 | (get-operand 'Bpd) (get-operand 'Wpd))) 829 | ((WBps) (if (enum-set-member? (prefix rex.w) prefixes) 830 | (get-operand 'Bps) (get-operand 'Wps))) 831 | ((WBss) (if (enum-set-member? (prefix rex.w) prefixes) 832 | (get-operand 'Bss) (get-operand 'Wss))) 833 | ((WBsd) (if (enum-set-member? (prefix rex.w) prefixes) 834 | (get-operand 'Bsd) (get-operand 'Wsd))) 835 | 836 | ((In) (fxbit-field /is4 0 4)) 837 | 838 | ((By) 839 | (needs-VEX prefixes) 840 | (translate-displacement prefixes mode vex.v (max 32 operand-size))) 841 | 842 | ;; These must be memory references 843 | ((M) (translate-displacement prefixes mode disp 'notreg 'generic)) 844 | ((Ms) (translate-displacement prefixes mode disp 'notreg 845 | (case operand-size 846 | ((16) 'ptr24) 847 | ((32) 'ptr32) 848 | (else 'ptr64)))) 849 | ((Mb) (translate-displacement prefixes mode disp 'notreg 8)) 850 | ((Mw) (translate-displacement prefixes mode disp 'notreg 16)) 851 | ((Md) (translate-displacement prefixes mode disp 'notreg 32)) 852 | ((Mq) (translate-displacement prefixes mode disp 'notreg 64)) 853 | ((Mdq) (translate-displacement prefixes mode disp 'notreg 128)) 854 | ((Mpd Mps) (translate-displacement prefixes mode disp 'notreg 'xmm)) 855 | ((Mv) (translate-displacement prefixes mode disp 'notreg operand-size)) 856 | ((Mem80) 857 | ;; Used for x87 memory operands, so it could really be 80, 858 | ;; 64 or 32 bits depending on how the x87 is configured. In 859 | ;; Linux on amd64 it's used for "long double", which is in 860 | ;; fact 80 bits wide. 861 | (translate-displacement prefixes mode disp 'notreg 80)) 862 | ((Mp) 863 | (translate-displacement prefixes mode disp 'notreg 864 | (case operand-size 865 | ((16) 'ptr16) 866 | ((32) 'ptr32) 867 | (else 'ptr64)))) 868 | ((Ma) ;only for BOUND 869 | (translate-displacement prefixes mode disp 870 | 'notreg 871 | (case operand-size 872 | ((16) 32) 873 | ((32) 64) 874 | (else 875 | (raise-UD "Internal error: Ma used in 64-bit mode"))))) 876 | 877 | ((Rd/q) 878 | ;; 64-bit general register in long mode, 32-bit in legacy. 879 | (translate-displacement prefixes mode disp 880 | (if (eqv? mode 16) 32 mode) 'notmem)) 881 | ((Rv/Mw) (translate-displacement prefixes mode disp operand-size 16)) 882 | ((Rd/Mw) (translate-displacement prefixes mode disp 32 16)) 883 | ((Rd/Mb) (translate-displacement prefixes mode disp 32 8)) 884 | 885 | ((*rAX/r8 *rCX/r9 *rDX/r10 *rBX/r11 *rSP/r12 *rBP/r13 *rSI/r14 *rDI/r15) 886 | (translate-displacement prefixes mode (ModR/M-r/m opcode prefixes) 887 | operand-size)) 888 | ((*AL/R8L *CL/R9L *DL/R10L *BL/R11L *AH/R12L *CH/R13L *DH/R14L *BH/R15L) 889 | (translate-displacement prefixes mode (ModR/M-r/m opcode prefixes) 8)) 890 | ((*eCX *eDX *eBX *eSP *eBP *eSI *eDI) 891 | (translate-displacement prefixes mode (ModR/M-r/m opcode prefixes) 892 | (if (eqv? operand-size 16) 16 32))) 893 | 894 | ;; x87 895 | ((*st0) 'st0) 896 | ((*st) (translate-displacement prefixes mode disp 'x87)) 897 | 898 | ((*unity) 1) 899 | ((*CS) 'cs) 900 | ((*ES) 'es) 901 | ((*DS) 'ds) 902 | ((*FS) 'fs) 903 | ((*GS) 'gs) 904 | ((*SS) 'ss) 905 | ((*DX) 'dx) 906 | ((*CL) 'cl) 907 | ((*eAX) (if (eqv? operand-size 16) 'ax 'eax)) 908 | ((*AX) 'ax) 909 | ((*AL) 'al) 910 | ((*rAX) 911 | (case operand-size 912 | ((16) 'ax) 913 | ((32) 'eax) 914 | (else 'rax))) 915 | ((*XMM0) 'xmm0) 916 | (else 917 | (error 'get-operand "Unimplemented opsyntax" op))))) 918 | 919 | (define (get-operands port mode collect ip limiter prefixes instr modr/m opcode vex.v d64) 920 | (let* ((operand-size (case mode 921 | ((64) (cond ((enum-set-member? (prefix rex.w) prefixes) 64) 922 | ((enum-set-member? (prefix operand) prefixes) 16) 923 | (d64 64) 924 | (else 32))) 925 | ((32) (cond ((enum-set-member? (prefix operand) prefixes) 16) 926 | (else 32))) 927 | (else (cond ((enum-set-member? (prefix operand) prefixes) 32) 928 | (else 16))))) 929 | (address-size (case mode 930 | ((64) (cond ((enum-set-member? (prefix address) prefixes) 32) 931 | (else 64))) 932 | ((32) (cond ((enum-set-member? (prefix address) prefixes) 16) 933 | (else 32))) 934 | (else (cond ((enum-set-member? (prefix address) prefixes) 32) 935 | (else 16))))) 936 | (modr/m (or modr/m (and (has-modr/m? instr) 937 | (get-u8/collect port collect limiter (tag modr/m))))) 938 | (disp (and (number? modr/m) 939 | (get-displacement port mode collect limiter prefixes modr/m address-size))) 940 | (/is4 (and (has-/is4? instr) (get-u8/collect port collect limiter (tag /is4))))) 941 | ;; At this point in the instruction stream, the only things left 942 | ;; are I, J and O (immediate, jump offset, offset) values. 943 | (when debug 944 | (print "Instruction=" instr 945 | " prefixes=" (enum-set->list prefixes) 946 | " opcode=" (number->string opcode 16) 947 | " vex.v=" vex.v 948 | " displacement=" disp 949 | " /is4=" (and /is4 (number->string /is4 2))) 950 | (if (number? modr/m) (print-modr/m modr/m prefixes))) 951 | 952 | (let* ((x (cons (car instr) 953 | (let lp ((op* (cdr instr))) 954 | (if (null? op*) 955 | '() 956 | (cons (get-operand port mode collect ip limiter (car op*) prefixes 957 | opcode vex.v 958 | operand-size address-size modr/m 959 | disp /is4) 960 | (lp (cdr op*))))))) 961 | (x (fix-nop x prefixes mode operand-size)) 962 | (x (fix-pseudo-ops x)) 963 | (x (fix-lock x prefixes)) 964 | (x (fix-branches x prefixes)) 965 | (x (fix-rep x prefixes)) 966 | (x (fix-rIP-relative x ip limiter))) 967 | x))) 968 | 969 | (define (get-instruction* port mode collect ip limiter) 970 | (let more-opcode ((opcode-table opcodes) 971 | (vex.v #f) 972 | (prefixes (prefix-set))) 973 | (let ((opcode (get-u8 port limiter))) 974 | (let lp ((instr (vector-ref opcode-table opcode)) 975 | (modr/m #f) 976 | (opcode opcode) 977 | (prefixes prefixes) 978 | (opcode-collected #f) 979 | (vex-traversed #f) 980 | (d64 #f)) 981 | (cond 982 | ((and (= opcode #xC4) (or (= mode 64) (lookahead-is-valid-VEX? port)) 983 | (not (enum-set-member? (prefix vex) prefixes))) 984 | ;; Three-byte VEX prefix 985 | (let* ((byte1 (get-u8 port limiter)) 986 | (byte2 (get-u8 port limiter))) 987 | (collect (tag prefix) opcode byte1 byte2) 988 | (VEX-prefix-check prefixes mode) 989 | (more-opcode (VEX-m-mmmm->table byte1) 990 | (VEX-vvvv byte2 mode) 991 | (VEX3->prefixes prefixes mode byte1 byte2)))) 992 | 993 | ((and (= opcode #xC5) (or (= mode 64) (lookahead-is-valid-VEX? port)) 994 | (not (enum-set-member? (prefix vex) prefixes))) 995 | ;; Two-byte VEX prefix 996 | (let ((byte1 (get-u8 port limiter))) 997 | (collect (tag prefix) opcode byte1) 998 | (VEX-prefix-check prefixes mode) 999 | (more-opcode (vector-ref opcodes #x0F) 1000 | (VEX-vvvv byte1 mode) 1001 | (VEX2->prefixes prefixes mode byte1)))) 1002 | 1003 | ((and (eq? instr (vector-ref opcodes #x8F)) ;ugly 1004 | (lookahead-is-valid-XOP? port)) 1005 | ;; Three-byte XOP prefix 1006 | (let* ((byte1 (get-u8 port limiter)) 1007 | (byte2 (get-u8 port limiter))) 1008 | (collect (tag prefix) opcode byte1 byte2) 1009 | (VEX-prefix-check prefixes mode) 1010 | (more-opcode (XOP-map-select->table byte1) 1011 | (VEX-vvvv byte2 mode) 1012 | (VEX3->prefixes prefixes mode byte1 byte2)))) 1013 | 1014 | ((not instr) 1015 | (unless opcode-collected 1016 | (collect 'opcode opcode)) 1017 | (raise-UD "Invalid or reserved opcode")) 1018 | 1019 | ((and (list? instr) (eq? (car instr) '*prefix*)) ;Prefix 1020 | (collect (tag prefix) opcode) 1021 | (when (enum-set-member? (prefix rex) prefixes) 1022 | (raise-UD "Other prefixes can not follow the REX prefix")) 1023 | (more-opcode opcode-table 1024 | vex.v 1025 | (enum-set-union 1026 | prefixes 1027 | ((enum-set-constructor (prefix-set)) (cdr instr))))) 1028 | 1029 | ((list? instr) 1030 | ;; An instruction has finally been found 1031 | (unless opcode-collected 1032 | (collect (tag opcode) opcode)) 1033 | (when (and (enum-set-member? (prefix vex) prefixes) 1034 | (not vex-traversed)) 1035 | (raise-UD "VEX was used but a legacy instruction was found")) 1036 | (get-operands port mode collect ip limiter prefixes instr modr/m opcode vex.v d64)) 1037 | 1038 | ;; Traverse the instruction table 1039 | 1040 | ((eq? (vector-ref instr 0) 'Group) 1041 | ;; Read a ModR/M byte and use the fields as opcode 1042 | ;; extension. 1043 | (collect (tag opcode) opcode) 1044 | (let* ((modr/m (get-u8/collect port collect limiter (tag modr/m))) 1045 | (v (vector-ref instr (if (and (> (vector-length instr) 3) 1046 | (= (ModR/M-mod modr/m) #b11)) 1047 | 3 2))) 1048 | (instr (vector-ref v (ModR/M-reg modr/m)))) 1049 | (cond ((and (vector? instr) (= (vector-length instr) 8)) 1050 | (when debug (print-modr/m modr/m prefixes)) 1051 | (lp (vector-ref instr (ModR/M-r/m modr/m)) 1052 | 'ModR/M-invalid opcode prefixes 1053 | #t vex-traversed d64)) 1054 | (else 1055 | (lp instr modr/m opcode prefixes 1056 | #t vex-traversed d64))))) 1057 | 1058 | ((eq? (vector-ref instr 0) 'Prefix) 1059 | ;; SSE instructions, e.g., where one of these prefixes 1060 | ;; is considered part of the opcode. "Vanligt 1061 | ;; REP-prefix kan vara DÖDLIG SSE--vi har hela listan". 1062 | (lp (vector-ref instr 1063 | (cond ((enum-set-member? (prefix repz) prefixes) 2) 1064 | ((enum-set-member? (prefix repnz) prefixes) 4) 1065 | ((enum-set-member? (prefix operand) prefixes) 3) 1066 | (else 1))) 1067 | modr/m opcode 1068 | (enum-set-difference prefixes (prefix-set repz repnz operand)) 1069 | opcode-collected vex-traversed d64)) 1070 | 1071 | ((eq? (vector-ref instr 0) 'Datasize) 1072 | ;; Pick different instructions depending on 1073 | ;; effective operand size. 1074 | (lp (vector-ref instr 1075 | (case mode 1076 | ((64) 1077 | (cond ((enum-set-member? (prefix rex.w) prefixes) 3) 1078 | ((enum-set-member? (prefix operand) prefixes) 1) 1079 | (else 2))) 1080 | ((32) 1081 | (cond ((enum-set-member? (prefix operand) prefixes) 1) 1082 | (else 2))) 1083 | (else 1084 | (cond ((enum-set-member? (prefix operand) prefixes) 2) 1085 | (else 1))))) 1086 | modr/m opcode 1087 | prefixes 1088 | opcode-collected vex-traversed d64)) 1089 | 1090 | ((eq? (vector-ref instr 0) 'Addrsize) 1091 | (lp (vector-ref instr 1092 | (case mode 1093 | ((64) (if (enum-set-member? (prefix address) prefixes) 2 3)) 1094 | ((32) (if (enum-set-member? (prefix address) prefixes) 1 2)) 1095 | (else (if (enum-set-member? (prefix address) prefixes) 2 1)))) 1096 | modr/m opcode 1097 | prefixes 1098 | opcode-collected vex-traversed d64)) 1099 | 1100 | ((eq? (vector-ref instr 0) 'Mode) 1101 | ;; Choose between compatibility/legacy mode and 1102 | ;; long mode. 1103 | (lp (vector-ref instr (if (eqv? mode 64) 2 1)) 1104 | modr/m opcode 1105 | prefixes 1106 | opcode-collected vex-traversed d64)) 1107 | 1108 | ((eq? (vector-ref instr 0) 'VEX) 1109 | (lp (vector-ref instr 1110 | (cond ((enum-set-member? (prefix vex.l) prefixes) 1111 | (if (> (vector-length instr) 3) 3 2)) ;256-bit 1112 | ((enum-set-member? (prefix vex) prefixes) 2) ;128-bit 1113 | (else 1))) 1114 | modr/m opcode 1115 | prefixes 1116 | opcode-collected #t d64)) 1117 | 1118 | ((eq? (vector-ref instr 0) 'Mem/reg) 1119 | ;; Read ModR/M and see if it encodes memory or a 1120 | ;; register. Used for the MOVLPS/MOVHLPS and 1121 | ;; MOVHPS/MOVLHPS instructions (mnemonics differ) and 1122 | ;; VMOVSD (operands differ). 1123 | (let ((modr/m (get-u8 port limiter))) 1124 | (collect (tag opcode) opcode) 1125 | (collect (tag modr/m) modr/m) 1126 | (lp (vector-ref instr 1127 | (cond ((eqv? (ModR/M-mod modr/m) #b11) 2) ;register 1128 | (else 1))) 1129 | modr/m opcode 1130 | prefixes 1131 | #t vex-traversed d64))) 1132 | 1133 | ((eq? (vector-ref instr 0) 'f64) 1134 | ;; Operand size is forced to 64 bits in 64-bit mode. 1135 | (lp (vector-ref instr 1) 1136 | modr/m opcode 1137 | (if (= mode 64) 1138 | (enum-set-difference prefixes (prefix-set operand rex.w)) 1139 | prefixes) 1140 | opcode-collected vex-traversed #t)) 1141 | 1142 | ((eq? (vector-ref instr 0) 'd64) 1143 | ;; In 64-bit mode, the default operand size is 64 1144 | ;; bits. The only other possible operand size is then 1145 | ;; 16 bits. 1146 | (lp (vector-ref instr 1) 1147 | modr/m opcode 1148 | prefixes 1149 | opcode-collected vex-traversed #t)) 1150 | 1151 | ((eq? (vector-ref instr 0) 'Prefix/eos) 1152 | ;; This is for 0F 38 F0/F1 (MOVBE/CRC32). These opcodes 1153 | ;; look a lot like SSE, but they use 66 to change the 1154 | ;; effective operand size. 1155 | (lp (vector-ref instr 1156 | (cond ((enum-set-member? (prefix repnz) prefixes) 2) 1157 | (else 1))) 1158 | modr/m opcode 1159 | (enum-set-difference prefixes (prefix-set repz repnz)) 1160 | opcode-collected vex-traversed d64)) 1161 | 1162 | ((eq? (vector-ref instr 0) 'W) 1163 | ;; This handles the case where two of the operands change 1164 | ;; order based on REX.W. This is used in VEX/XOP to 1165 | ;; enable the memory operand to be either one of two 1166 | ;; operands. 1167 | (lp (vector-ref instr 1168 | (cond ((enum-set-member? (prefix rex.w) prefixes) 2) 1169 | (else 1))) 1170 | modr/m opcode 1171 | (enum-set-difference prefixes (prefix-set rex.w)) 1172 | opcode-collected vex-traversed d64)) 1173 | 1174 | (else 1175 | (collect (tag opcode) opcode) 1176 | (let ((opcode (get-u8 port limiter))) 1177 | ;; A new opcode table (two-byte or three-byte opcode) 1178 | (lp (vector-ref instr opcode) 1179 | modr/m opcode 1180 | prefixes 1181 | #f vex-traversed d64)))))))) 1182 | 1183 | ;; Read the next instruction from the given port, using the given 1184 | ;; bit mode (16, 32 or 64). The `collect' argument is either #f, or 1185 | ;; a function which accepts any number of arguments: the first 1186 | ;; argument is a type tag, and the following arguments are bytes. 1187 | ;; All bytes read from the port will be passed to the collector. 1188 | (define (get-instruction port mode collect ip) 1189 | (assert (memv mode '(16 32 64))) 1190 | (let ((collect (or collect (lambda x #f)))) 1191 | (let ((limiter 1192 | (let ((have-read 0)) 1193 | ;; The limiter works to stop get-instruction 1194 | ;; from reading more than 15 bytes. 1195 | (case-lambda 1196 | (() 1197 | have-read) 1198 | ((wanted-bytes collect tag) 1199 | (when (> (+ have-read wanted-bytes) 15) 1200 | (let* ((n (- 15 have-read)) 1201 | (bv (get-bytevector-n port n))) 1202 | (unless (or (eof-object? bv) (zero? n)) 1203 | (apply collect tag (bytevector->u8-list bv))) 1204 | (when (or (eof-object? bv) (< (bytevector-length bv) n)) 1205 | (raise-UD "End of file inside oversized instruction")) 1206 | (raise-UD "Instruction too long"))) 1207 | (set! have-read (+ have-read wanted-bytes))))))) 1208 | (if (eof-object? (lookahead-u8 port)) 1209 | (eof-object) 1210 | (get-instruction* port mode collect ip limiter))))) 1211 | 1212 | ;; Generic disassembler support. 1213 | (let ((min 1) (max 15)) 1214 | (define (wrap-get-instruction mode) 1215 | (define get-instruction* 1216 | (case-lambda 1217 | ((port) 1218 | (get-instruction port mode #f #f)) 1219 | ((port collect) 1220 | (get-instruction port mode collect #f)) 1221 | ((port collect pc) 1222 | (get-instruction port mode collect pc)))) 1223 | get-instruction*) 1224 | (register-disassembler 1225 | (make-disassembler 'x86-16 min max (wrap-get-instruction 16))) 1226 | (register-disassembler 1227 | (make-disassembler 'x86-32 min max (wrap-get-instruction 32))) 1228 | (register-disassembler 1229 | (make-disassembler 'x86-64 min max (wrap-get-instruction 64))))) 1230 | --------------------------------------------------------------------------------