├── bin ├── mingw64 │ ├── dasm_x86.a │ └── dasm_x86.dll ├── osx64 │ ├── libdasm_x86.a │ └── libdasm_x86.dylib └── linux64 │ ├── libdasm_x86.a │ └── libdasm_x86.so ├── csrc └── dynasm │ ├── build-mingw64.sh │ ├── build-linux64.sh │ ├── build.sh │ ├── WHAT │ ├── build-osx64.sh │ ├── dasm_x86.c │ ├── dasm_extern.h │ ├── dasm_proto.h │ └── dasm_x86.h ├── dasm_x64.lua ├── dasm_x86.lua ├── dynasm_demo.lua ├── dasm_mm.lua ├── dasm.lua ├── dynasm_demo_x86.dasl ├── dynasm.md ├── dynasm.lua └── dasm_x86x64.lua /bin/mingw64/dasm_x86.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luapower/dynasm/HEAD/bin/mingw64/dasm_x86.a -------------------------------------------------------------------------------- /bin/osx64/libdasm_x86.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luapower/dynasm/HEAD/bin/osx64/libdasm_x86.a -------------------------------------------------------------------------------- /bin/linux64/libdasm_x86.a: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luapower/dynasm/HEAD/bin/linux64/libdasm_x86.a -------------------------------------------------------------------------------- /bin/linux64/libdasm_x86.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luapower/dynasm/HEAD/bin/linux64/libdasm_x86.so -------------------------------------------------------------------------------- /bin/mingw64/dasm_x86.dll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luapower/dynasm/HEAD/bin/mingw64/dasm_x86.dll -------------------------------------------------------------------------------- /csrc/dynasm/build-mingw64.sh: -------------------------------------------------------------------------------- 1 | P=mingw64 L="-s -static-libgcc" D=dasm_x86.dll A=dasm_x86.a ./build.sh 2 | -------------------------------------------------------------------------------- /bin/osx64/libdasm_x86.dylib: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/luapower/dynasm/HEAD/bin/osx64/libdasm_x86.dylib -------------------------------------------------------------------------------- /csrc/dynasm/build-linux64.sh: -------------------------------------------------------------------------------- 1 | P=linux64 C=-fPIC L="-s -static-libgcc" D=libdasm_x86.so A=libdasm_x86.a ./build.sh 2 | -------------------------------------------------------------------------------- /csrc/dynasm/build.sh: -------------------------------------------------------------------------------- 1 | ${X}gcc -c -O2 $C dasm_x86.c -DDASM_CHECKS 2 | ${X}gcc *.o -shared $L -o ../../bin/$P/$D 3 | rm -f ../../bin/$P/$A 4 | ${X}ar rcs ../../bin/$P/$A *.o 5 | rm *.o 6 | -------------------------------------------------------------------------------- /csrc/dynasm/WHAT: -------------------------------------------------------------------------------- 1 | DynASM 1.4.0 from https://github.com/LuaJIT/LuaJIT (MIT License) 2 | 3 | 4 | 5 | The encoder part written in C was not modified. 6 | The Lua part was modified to support Lua mode (see docs for details). 7 | -------------------------------------------------------------------------------- /csrc/dynasm/build-osx64.sh: -------------------------------------------------------------------------------- 1 | [ `uname` = Linux ] && export X=x86_64-apple-darwin11- 2 | P=osx64 C="-arch x86_64" L="-arch x86_64 -install_name @rpath/libdasm_x86.dylib" \ 3 | D=libdasm_x86.dylib A=libdasm_x86.a ./build.sh 4 | -------------------------------------------------------------------------------- /csrc/dynasm/dasm_x86.c: -------------------------------------------------------------------------------- 1 | /* 2 | Encoding engine to use with dasm.lua. 3 | 4 | Compile with: 5 | 6 | gcc dasm_x86.c -O -fPIC -DDASM_CHECKS -shared -s -o libdasm_x86.so 7 | */ 8 | 9 | #include "dasm_extern.h" 10 | #include "dasm_proto.h" 11 | #include "dasm_x86.h" 12 | -------------------------------------------------------------------------------- /csrc/dynasm/dasm_extern.h: -------------------------------------------------------------------------------- 1 | // DASM_EXTERN_FUNC is a global function pointer to be set at runtime. 2 | // It will be used in place of the DASM_EXTERN() macro. 3 | typedef int (*DASM_EXTERN_TYPE) (void *ctx, unsigned char *addr, int idx, int type); 4 | DASM_EXTERN_TYPE DASM_EXTERN_FUNC; 5 | #define DASM_EXTERN(a,b,c,d) DASM_EXTERN_FUNC(a,b,c,d) 6 | -------------------------------------------------------------------------------- /dasm_x64.lua: -------------------------------------------------------------------------------- 1 | 2 | -- DynASM x64 loader module. 3 | 4 | --unload dasm_x86x64 if it's already loaded. 5 | if not package then package = {loaded = {}} end --for compat. with minilua 6 | package.loaded.dasm_x86x64 = nil 7 | dasm_x64 = true -- Using a global is an ugly, but effective solution. 8 | local dasm = require'dasm_x86x64' 9 | dasm_x64 = nil 10 | package.loaded.dasm_x86x64 = true 11 | return dasm 12 | -------------------------------------------------------------------------------- /dasm_x86.lua: -------------------------------------------------------------------------------- 1 | 2 | -- DynASM x86 loader module. 3 | 4 | --unload dasm_x86x64 if it's already loaded. 5 | if not package then package = {loaded = {}} end --for compat. with minilua 6 | package.loaded.dasm_x86x64 = nil 7 | dasm_x64 = false -- Using a global is an ugly, but effective solution. 8 | local dasm = require'dasm_x86x64' 9 | dasm_x64 = nil 10 | package.loaded.dasm_x86x64 = true 11 | return dasm 12 | -------------------------------------------------------------------------------- /csrc/dynasm/dasm_proto.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** DynASM encoding engine prototypes. 3 | ** Copyright (C) 2005-2015 Mike Pall. All rights reserved. 4 | ** Released under the MIT license. See dynasm.lua for full copyright notice. 5 | */ 6 | 7 | #ifndef _DASM_PROTO_H 8 | #define _DASM_PROTO_H 9 | 10 | #include 11 | #include 12 | 13 | #define DASM_IDENT "DynASM 1.4.0" 14 | #define DASM_VERSION 10400 /* 1.4.0 */ 15 | 16 | #ifndef Dst_DECL 17 | #define Dst_DECL dasm_State **Dst 18 | #endif 19 | 20 | #ifndef Dst_REF 21 | #define Dst_REF (*Dst) 22 | #endif 23 | 24 | #ifndef DASM_FDEF 25 | #define DASM_FDEF extern 26 | #endif 27 | 28 | #ifndef DASM_M_GROW 29 | #define DASM_M_GROW(ctx, t, p, sz, need) \ 30 | do { \ 31 | size_t _sz = (sz), _need = (need); \ 32 | if (_sz < _need) { \ 33 | if (_sz < 16) _sz = 16; \ 34 | while (_sz < _need) _sz += _sz; \ 35 | (p) = (t *)realloc((p), _sz); \ 36 | if ((p) == NULL) exit(1); \ 37 | (sz) = _sz; \ 38 | } \ 39 | } while(0) 40 | #endif 41 | 42 | #ifndef DASM_M_FREE 43 | #define DASM_M_FREE(ctx, p, sz) free(p) 44 | #endif 45 | 46 | /* Internal DynASM encoder state. */ 47 | typedef struct dasm_State dasm_State; 48 | 49 | 50 | /* Initialize and free DynASM state. */ 51 | DASM_FDEF void dasm_init(Dst_DECL, int maxsection); 52 | DASM_FDEF void dasm_free(Dst_DECL); 53 | 54 | /* Setup global array. Must be called before dasm_setup(). */ 55 | DASM_FDEF void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl); 56 | 57 | /* Grow PC label array. Can be called after dasm_setup(), too. */ 58 | DASM_FDEF void dasm_growpc(Dst_DECL, unsigned int maxpc); 59 | 60 | /* Setup encoder. */ 61 | DASM_FDEF void dasm_setup(Dst_DECL, const void *actionlist); 62 | 63 | /* Feed encoder with actions. Calls are generated by pre-processor. */ 64 | DASM_FDEF void dasm_put(Dst_DECL, int start, ...); 65 | 66 | /* Link sections and return the resulting size. */ 67 | DASM_FDEF int dasm_link(Dst_DECL, size_t *szp); 68 | 69 | /* Encode sections into buffer. */ 70 | DASM_FDEF int dasm_encode(Dst_DECL, void *buffer); 71 | 72 | /* Get PC label offset. */ 73 | DASM_FDEF int dasm_getpclabel(Dst_DECL, unsigned int pc); 74 | 75 | #ifdef DASM_CHECKS 76 | /* Optional sanity checker to call between isolated encoding steps. */ 77 | DASM_FDEF int dasm_checkstep(Dst_DECL, int secmatch); 78 | #else 79 | #define dasm_checkstep(a, b) 0 80 | #endif 81 | 82 | 83 | #endif /* _DASM_PROTO_H */ 84 | -------------------------------------------------------------------------------- /dynasm_demo.lua: -------------------------------------------------------------------------------- 1 | io.stdout:setvbuf'no' 2 | io.stderr:setvbuf'no' 3 | 4 | local dynasm = require'dynasm' 5 | local dasm = require'dasm' 6 | 7 | --set the globals returned from the dasl module 8 | 9 | local demo, demos, actions, externnames, globalnames, DASM_MAXSECTION, DASM_MAXGLOBAL 10 | local function set_vars(t) 11 | demo, demos, actions, externnames, globalnames, DASM_MAXSECTION, DASM_MAXGLOBAL = unpack(t) 12 | end 13 | 14 | --load dasl files via loadfile() and via require(). 15 | 16 | --load and run the dasl file from the current directory. 17 | local function load_via_loadfile() 18 | set_vars(assert(dynasm.loadfile'dynasm_demo_x86.dasl')()) 19 | end 20 | 21 | --load the same file via require() from package.path. 22 | local function load_via_require() 23 | set_vars(require'dynasm_demo_x86') 24 | end 25 | 26 | --helpers 27 | 28 | local function hr() return ('-'):rep(60) end 29 | local function printf(...) print(string.format(...)) end 30 | 31 | --assemble a demo from the dasl file, dump it and run it 32 | local function run_demo(name) 33 | collectgarbage() --clean up from the last session 34 | 35 | local gencode = demo[name] 36 | 37 | --make a new dasm state 38 | local state, globals = dasm.new(actions, externnames, DASM_MAXSECTION, DASM_MAXGLOBAL) 39 | 40 | --generate the code and get the test function for that code 41 | local runcode = gencode(state) 42 | 43 | --build the code 44 | local buf, size = state:build() 45 | 46 | --show code and size 47 | printf('%-16s %-10s %s', 'code address', '', tostring(buf)) 48 | printf('%-16s %-10d %s', 'code size', size, 'bytes') 49 | 50 | --show the labels from this code 51 | for i = 0, #globalnames do --from .globalnames directive 52 | if globals[i] ~= nil then 53 | printf('%-16s %-10s %s', 'global', globalnames[i], globals[i]) 54 | end 55 | end 56 | 57 | --dump the code 58 | print(hr()) 59 | dasm.dump(buf, size) 60 | 61 | --run the code 62 | if runcode then 63 | runcode(buf) 64 | end 65 | end 66 | 67 | --run all demos 68 | local function run_all_demos() 69 | for i,name in ipairs(demos) do 70 | print() 71 | print(name) 72 | print(hr()) 73 | run_demo(name) 74 | end 75 | end 76 | 77 | local function test_loadstring() 78 | local chunk = dynasm.loadstring[[ 79 | 80 | local ffi = require'ffi' 81 | local dasm = require'dasm' 82 | 83 | |.arch ARCH 84 | |.actionlist actions 85 | 86 | local function gen() 87 | | mov ax, bx 88 | end 89 | 90 | return actions 91 | ]] 92 | local ffi = require'ffi' 93 | local actions = chunk() 94 | print() 95 | print('loadstring test: actionlist for `mov ax, bx`:') 96 | print(hr()) 97 | print(string.byte(ffi.string(actions, ffi.sizeof(actions)), 1, ffi.sizeof(actions))) 98 | end 99 | 100 | local function test_translate_tostring() 101 | print() 102 | print'translate_tostring test:' 103 | print(hr()) 104 | local asm = [[ 105 | 106 | local ffi = require'ffi' 107 | local dasm = require'dasm' 108 | 109 | |.arch ARCH 110 | 111 | ]] 112 | print(dynasm.translate_tostring(dynasm.string_infile(asm), {lang = "lua"})) 113 | end 114 | 115 | local function run_default() 116 | load_via_loadfile() 117 | run_all_demos() 118 | --we're loading the same file again to test the reusability of dynasm. 119 | --there's a lot of global state in dynasm which needs to be reset between runs. 120 | load_via_require() 121 | run_all_demos() 122 | --additional tests... 123 | test_loadstring() 124 | test_translate_tostring() 125 | end 126 | 127 | if not ... then 128 | run_default() 129 | else 130 | load_via_require() 131 | run_demo((...)) 132 | end 133 | -------------------------------------------------------------------------------- /dasm_mm.lua: -------------------------------------------------------------------------------- 1 | 2 | --wrappers around mmap to support dynamic code exection. 3 | --Written by Cosmin Apreutesei. Public Domain. 4 | --Tested with Windows, Linux, BSD and OSX, x86 and x86-64. 5 | 6 | local ffi = require'ffi' 7 | local C = ffi.C 8 | 9 | local function checkh(ptr) return assert(ptr ~= nil and ptr) end 10 | local function checkz(ret) assert(ret == 0) end 11 | local function checknz(ret) assert(ret ~= 0) end 12 | 13 | local new, free, protect 14 | 15 | --Using VirtualAlloc allows memory protection, but can only allocate memory in multiple-of-64K chunks. 16 | local USE_VIRTUALALLOC = false 17 | 18 | if ffi.os == 'Windows' then 19 | 20 | if USE_VIRTUALALLOC then 21 | 22 | ffi.cdef[[ 23 | void* VirtualAlloc(void* lpAddress, size_t dwSize, uint32_t flAllocationType, uint32_t flProtect); 24 | int VirtualFree(void* lpAddress, size_t dwSize, uint32_t dwFreeType); 25 | int VirtualProtect(void* lpAddress, size_t dwSize, uint32_t flNewProtect, uint32_t* lpflOldProtect); 26 | ]] 27 | 28 | local PAGE_READWRITE = 0x04 29 | local PAGE_EXECUTE_READ = 0x20 30 | local MEM_COMMIT = 0x1000 31 | local MEM_RESERVE = 0x2000 32 | local MEM_RELEASE = 0x8000 33 | 34 | function new(size) 35 | return checkh(C.VirtualAlloc(nil, size, bit.bor(MEM_RESERVE, MEM_COMMIT), PAGE_READWRITE)) 36 | end 37 | 38 | function protect(addr, size) 39 | local oldprotect = ffi.new'uint32_t[1]' --because null not accepted 40 | checknz(C.VirtualProtect(addr, size, PAGE_EXECUTE_READ, oldprotect)) 41 | end 42 | 43 | function free(addr, size) 44 | assert(size, 'size required') --on other platforms 45 | checknz(C.VirtualFree(addr, 0, MEM_RELEASE)) 46 | end 47 | 48 | else 49 | 50 | local HEAP_NO_SERIALIZE = 0x00000001 51 | local HEAP_ZERO_MEMORY = 0x00000008 52 | local HEAP_CREATE_ENABLE_EXECUTE = 0x00040000 53 | 54 | ffi.cdef[[ 55 | void* HeapCreate(uint32_t flOptions, size_t dwInitialSize, size_t dwMaximumSize); 56 | void* HeapAlloc(void* hHeap, uint32_t dwFlags, size_t dwBytes); 57 | int HeapFree(void* hHeap, uint32_t dwFlags, void* lpMem); 58 | ]] 59 | 60 | local heap 61 | 62 | function new(size) 63 | heap = heap or checkh(C.HeapCreate(bit.bor(HEAP_NO_SERIALIZE, HEAP_CREATE_ENABLE_EXECUTE), 0, 0)) 64 | return checkh(C.HeapAlloc(heap, HEAP_ZERO_MEMORY, size)) 65 | end 66 | 67 | function protect(addr, size) end 68 | 69 | function free(addr, size) 70 | assert(size, 'size required') --on other platforms 71 | checknz(C.HeapFree(heap, HEAP_NO_SERIALIZE, addr)) 72 | end 73 | 74 | end 75 | 76 | elseif ffi.os == 'Linux' or ffi.os == 'BSD' or ffi.os == 'OSX' then 77 | 78 | if ffi.os == 'OSX' then 79 | ffi.cdef'typedef int64_t off_t;' 80 | else 81 | ffi.cdef'typedef long int off_t;' 82 | end 83 | 84 | ffi.cdef[[ 85 | void* mmap(void *addr, size_t length, int prot, int flags, int fd, off_t offset); 86 | int munmap(void *addr, size_t length); 87 | int mprotect(void *addr, size_t len, int prot); 88 | ]] 89 | 90 | local PROT_READ = 1 91 | local PROT_WRITE = 2 92 | local PROT_EXEC = 4 93 | local MAP_PRIVATE = 2 94 | local MAP_ANON = ffi.os == 'Linux' and 0x20 or 0x1000 95 | 96 | function new(size) 97 | local ret = C.mmap(nil, size, bit.bor(PROT_READ, PROT_WRITE), bit.bor(MAP_PRIVATE, MAP_ANON), -1, 0) 98 | if ffi.cast('intptr_t', ret) == ffi.cast('intptr_t', -1) then 99 | error(string.format('mmap errno: %d', ffi.errno())) 100 | end 101 | return checkh(ret) 102 | end 103 | 104 | function protect(addr, size) 105 | checkz(C.mprotect(addr, size, bit.bor(PROT_READ, PROT_EXEC))) 106 | end 107 | 108 | function free(addr, size) 109 | checkz(C.munmap(addr, size)) 110 | end 111 | 112 | end 113 | 114 | local new = function(size) --override for hooking to gc 115 | local addr = new(size) 116 | ffi.gc(addr, function(addr) 117 | free(addr, size) 118 | ffi.gc(addr, nil) 119 | end) 120 | return addr 121 | end 122 | 123 | if not ... then 124 | local function test(size) 125 | local addr = new(size) 126 | print(addr) 127 | addr = ffi.cast('int32_t*', addr) 128 | assert(addr[0] == 0) 129 | addr[0] = 1234 --writable 130 | assert(addr[0] == 1234) 131 | protect(addr, size) 132 | --addr[0] = 4321 --uncomment this to get a crash (r/o memory); TODO: test if executable 133 | --addr = nil; collectgarbage() --enable this to fail the assertion below 134 | return addr 135 | end 136 | local a1 = test(64*1024*1000) --64MB 137 | local a2 = test(16) --16 bytes 138 | assert(a1 ~= a2) --different pages 139 | a1 = nil 140 | a2 = nil 141 | collectgarbage() --TODO: test if finalizer was called 142 | end 143 | 144 | return {new = new, free = free, protect = protect} 145 | -------------------------------------------------------------------------------- /dasm.lua: -------------------------------------------------------------------------------- 1 | 2 | --Binding to the DynASM encoding engine. 3 | --Written by Cosmin Apreutesei. Public Domain. 4 | 5 | local ffi = require'ffi' 6 | local bit = require'bit' 7 | local arch = ffi.arch 8 | if arch == 'x64' then arch = 'x86' end --same linker for x64 9 | local C = ffi.load('dasm_'..arch) 10 | local M = {C = C} 11 | 12 | M._VERSION = 10400 13 | 14 | ffi.cdef[[ 15 | enum { 16 | DASM_S_OK = 0x00000000, 17 | DASM_S_NOMEM = 0x01000000, 18 | DASM_S_PHASE = 0x02000000, 19 | DASM_S_MATCH_SEC = 0x03000000, 20 | DASM_S_RANGE_I = 0x11000000, 21 | DASM_S_RANGE_SEC = 0x12000000, 22 | DASM_S_RANGE_LG = 0x13000000, 23 | DASM_S_RANGE_PC = 0x14000000, 24 | DASM_S_RANGE_VREG = 0x15000000, 25 | DASM_S_UNDEF_L = 0x21000000, 26 | DASM_S_UNDEF_PC = 0x22000000, 27 | }; 28 | 29 | /* Internal DynASM encoder state. */ 30 | typedef struct dasm_State_Ref { struct dasm_State *p; } dasm_State_Ref; 31 | typedef dasm_State_Ref *Dst_DECL; 32 | 33 | /* Initialize and free DynASM state. */ 34 | void dasm_init(Dst_DECL, int maxsection); 35 | void dasm_free(Dst_DECL); 36 | 37 | /* Setup global array. Must be called before dasm_setup(). */ 38 | void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl); 39 | 40 | /* Grow PC label array. Can be called after dasm_setup(), too. */ 41 | void dasm_growpc(Dst_DECL, unsigned int maxpc); 42 | 43 | /* Setup encoder. */ 44 | void dasm_setup(Dst_DECL, const void *actionlist); 45 | 46 | /* Feed encoder with actions. Calls are generated by pre-processor. */ 47 | void dasm_put(Dst_DECL, int start, ...); 48 | 49 | /* Link sections and return the resulting size. */ 50 | int dasm_link(Dst_DECL, size_t *szp); 51 | 52 | /* Encode sections into buffer. */ 53 | int dasm_encode(Dst_DECL, void *buffer); 54 | 55 | /* Get PC label offset. */ 56 | int dasm_getpclabel(Dst_DECL, unsigned int pc); 57 | 58 | /* Optional sanity checker to call between isolated encoding steps. */ 59 | int dasm_checkstep(Dst_DECL, int secmatch); 60 | 61 | typedef int (*DASM_EXTERN_TYPE) (void *ctx, unsigned char *addr, int idx, int type); 62 | DASM_EXTERN_TYPE DASM_EXTERN_FUNC; 63 | ]] 64 | 65 | local function err(...) 66 | io.stderr:setvbuf'no' 67 | io.stderr:write('dasm error: ', ...) 68 | io.stderr:write'\n' 69 | os.exit(1) 70 | end 71 | 72 | --status check helper 73 | 74 | local status_map = { 75 | [C.DASM_S_NOMEM] = 'out of memory', 76 | [C.DASM_S_PHASE] = 'phase error', 77 | [C.DASM_S_MATCH_SEC] = 'section not found', 78 | [C.DASM_S_RANGE_I] = 'immediate value out of range', 79 | [C.DASM_S_RANGE_SEC] = 'too many sections', 80 | [C.DASM_S_RANGE_LG] = 'too many global labels', 81 | [C.DASM_S_RANGE_PC] = 'too many pclabels', 82 | [C.DASM_S_RANGE_VREG] = 'variable register out of range', 83 | [C.DASM_S_UNDEF_L] = 'undefined global label', 84 | [C.DASM_S_UNDEF_PC] = 'undefined pclabel', 85 | } 86 | 87 | local function checkst(st) 88 | if st == C.DASM_S_OK then return end 89 | local status, arg = status_map[bit.band(st, 0xff000000)], bit.band(st, 0x00ffffff) 90 | if status then 91 | err(status, '. :', arg) 92 | else 93 | err(string.format('0x%08X', st)) 94 | end 95 | end 96 | 97 | --low level API 98 | 99 | M.init = C.dasm_init 100 | M.free = C.dasm_free 101 | M.setupglobal = C.dasm_setupglobal 102 | M.growpc = C.dasm_growpc 103 | M.setup = C.dasm_setup 104 | 105 | local int_ct = ffi.typeof'int' 106 | local function convert_arg(arg) --dasm_put() accepts only int32 varargs. 107 | if type(arg) == "number" then --but we make it accept uint32 too by normalizing the arg. 108 | arg = bit.tobit(arg) --non-number args are converted to int32 according to ffi rules. 109 | end 110 | return ffi.cast(int_ct, arg) 111 | end 112 | local function convert_args(...) --not a tailcall but at least it doesn't make any garbage 113 | if select('#', ...) == 0 then return end 114 | return convert_arg(...), convert_args(select(2, ...)) 115 | end 116 | function M.put(state, start, ...) 117 | C.dasm_put(state, start, convert_args(...)) 118 | end 119 | 120 | function M.link(state, sz) 121 | sz = sz or ffi.new'size_t[1]' 122 | checkst(C.dasm_link(state, sz)) 123 | return tonumber(sz[0]) 124 | end 125 | 126 | function M.encode(state, buf) 127 | checkst(C.dasm_encode(state, buf)) 128 | end 129 | jit.off(M.encode) --calls the DASM_EXTERN_FUNC callback 130 | 131 | local voidptr_ct = ffi.typeof'void*' 132 | local byteptr_ct = ffi.typeof'int8_t*' 133 | function M.getpclabel(state, pc, buf) 134 | local offset = C.dasm_getpclabel(state, pc) 135 | if buf then 136 | return ffi.cast(voidptr_ct, ffi.cast(byteptr_ct, buf) + offset) 137 | end 138 | return offset 139 | end 140 | 141 | function M.checkstep(state, section) 142 | checkst(C.dasm_checkstep(state, section or -1)) 143 | end 144 | 145 | --get the address of a standard symbol. 146 | --TODO: ask Mike to expose clib_getsym() in ffi so we can get the address 147 | --of symbols without having to declare them first. 148 | local function getsym(name) 149 | return ffi.C[name] 150 | end 151 | local getsym = function(name) 152 | local ok, sym = pcall(getsym, name) 153 | if not ok then --not found or not defined: define it and try again 154 | ffi.cdef(string.format('void %s();', name)) 155 | return getsym(name) 156 | else 157 | return sym 158 | end 159 | end 160 | 161 | --DASM_EXTERN callback plumbing 162 | 163 | local extern_names --t[idx] -> name 164 | local extern_get --f(name) -> ptr 165 | 166 | local byteptr_ct = ffi.typeof'uint8_t*' 167 | 168 | local function DASM_EXTERN_FUNC(ctx, addr, idx, type) 169 | if not extern_names or not extern_get then 170 | err'extern callback not initialized.' 171 | end 172 | local name = extern_names[idx] 173 | local ptr = extern_get(name) 174 | if ptr == nil then 175 | err('extern not found: ', name, '.') 176 | end 177 | if type ~= 0 then 178 | return ffi.cast(byteptr_ct, ptr) - addr - 4 179 | else 180 | return ptr 181 | end 182 | end 183 | 184 | function M.setupextern(_, names, getter) 185 | extern_names = names 186 | extern_get = getter or getsym 187 | if C.DASM_EXTERN_FUNC == nil then 188 | C.DASM_EXTERN_FUNC = DASM_EXTERN_FUNC 189 | end 190 | end 191 | 192 | --hi-level API 193 | 194 | function M.new(actionlist, externnames, sectioncount, globalcount, externget, globals) 195 | local state = ffi.new'dasm_State_Ref' 196 | M.init(state, sectioncount or 1) 197 | globalcount = globalcount or 256 198 | globals = globals or ffi.new('void*[?]', globalcount) 199 | ffi.gc(state, function(state) 200 | local _ = actionlist, externnames, globals, externget --anchor those: don't rely on the user doing so 201 | M.free(state) 202 | end) 203 | M.setupglobal(state, globals, globalcount) 204 | M.setupextern(state, externnames, externget) 205 | M.setup(state, actionlist) 206 | return state, globals 207 | end 208 | 209 | function M.build(state) 210 | state:checkstep(-1) 211 | local sz = state:link() 212 | if sz == 0 then err'no code?' end 213 | local mm = require'dasm_mm' --runtime dependency 214 | local buf = mm.new(sz) 215 | state:encode(buf) 216 | mm.protect(buf, sz) 217 | return buf, sz 218 | end 219 | 220 | function M.dump(addr, size, out) 221 | local disass = require('jit.dis_'..jit.arch).disass 222 | disass(ffi.string(addr, size), tonumber(ffi.cast('uintptr_t', addr)), out) 223 | end 224 | 225 | --given the globals array from dasm.new() and the globalnames list 226 | --from the `.globalnames` directive, return a map {global_name -> global_addr}. 227 | function M.globals(globals, globalnames) 228 | local t = {} 229 | for i = 0, #globalnames do 230 | if globals[i] ~= nil then 231 | t[globalnames[i]] = globals[i] 232 | end 233 | end 234 | return t 235 | end 236 | 237 | --object interface 238 | 239 | ffi.metatype('dasm_State_Ref', {__index = { 240 | --low-level API 241 | init = M.init, 242 | free = M.free, 243 | setupglobal = M.setupglobal, 244 | setupextern = M.setupextern, 245 | growpc = M.growpc, 246 | setup = M.setup, 247 | put = M.put, 248 | link = M.link, 249 | encode = M.encode, 250 | getpclabel = M.getpclabel, 251 | checkstep = M.checkstep, 252 | --hi-level API 253 | build = M.build, 254 | }}) 255 | 256 | if not ... then --demo 257 | local dasm = M 258 | local actions = ffi.new('const uint8_t[19]', 259 | {254,0,102,184,5,0,254,1,102,187,3,0,254,2,102,187,3,0,255}) 260 | local Dst, globals = dasm.new(actions, nil, 3) 261 | --|.code 262 | dasm.put(Dst, 0) 263 | --| mov ax, 5 264 | --|.sub1 265 | dasm.put(Dst, 2) 266 | --| mov bx, 3 267 | --|.sub2 268 | dasm.put(Dst, 8) 269 | --| mov bx, 3 270 | dasm.put(Dst, 14) 271 | local addr, size = Dst:build() 272 | dasm.dump(addr, size) 273 | end 274 | 275 | return M 276 | -------------------------------------------------------------------------------- /dynasm_demo_x86.dasl: -------------------------------------------------------------------------------- 1 | --go@ luajit dynasm.lua * 2 | 3 | local ffi = require'ffi' -- required 4 | local dasm = require'dasm' -- required 5 | 6 | |.arch ARCH 7 | | 8 | |.if not (X86 or X64) 9 | | .error invalid arch ARCH 10 | |.endif 11 | | 12 | |.actionlist actions // gen. `actions` to pass to dasm.new() 13 | |.externnames externnames // gen `externnames` to pass to dasm.new() to solve extern names 14 | |.section sec1, sec2 // gen. `DASM_SECTION_SEC1`, ..., `DASM_MAXSECTION` 15 | |.globals global_ // gen. `global_`, ..., `DASM_MAXGLOBAL` 16 | |.globalnames globalnames // gen. `globalnames` to get the names of global labels 17 | 18 | local demo = {} --demo table: {name = codegen_func} 19 | local demos = {} --demo list in code order: {name1, ...} 20 | setmetatable(demo, {__newindex = function(t,k,v) rawset(demo, k, v); demos[#demos+1] = k end}) 21 | 22 | --each code generator function generates some code into the Dst argument (which is a dasm state), 23 | --and optionally returns a function that knows how to test the resulting code (if nothing is returned, 24 | --the code will be just dumped for inspection). 25 | 26 | function demo.literals(Dst) 27 | | mov al, 0xff // number literals are translation-time and are range-checked. 28 | | mov eax, 0xffffffff // this is correct and will result in 0xffffffff. 29 | | mov eax, dword*0x22222222 // size overrides with number literals are also translation-time. 30 | | mov eax, [ebx + ecx * 8 + 0x7fffffff] // this mov form allows +/- 2G literal displacements. 31 | |.if X86 32 | | mov eax, [0xffffffff] // this mov form allows 0..4G literals. 33 | |.else 34 | | mov eax, [0x7fffffff] // this mov form allows only 0..2G literals and you won't get an error! 35 | |.endif 36 | end 37 | 38 | function demo.immediate_subst(Dst) --immediate value substitution 39 | | mov al, 0+0xff // expressions are encoding-time and coerced to int32. 40 | | mov eax, 0+0xffffffff // but expressions are also normalized first, so we can pass a full uint32. 41 | | mov eax, [ebx + ecx * 8 + 0x7fffffff] // this mov form allows +/- 2G expression displacements. 42 | |.if X86 43 | | mov eax, [0+0xffffffff] // this mov form allows 0..4G expressions. 44 | |.else 45 | | mov eax, [0+0x7fffffff] // this mov form allows only 0..2G expressions and you won't get an error! 46 | | mov64 eax, [0+0xdeadbeefdeadbeefULL] // mov64 allows full 64bit expressions (they're never literals). 47 | | mov64 rax, 0+0xdeadbeefdeadbeefULL // mov64 allows full 64bit expressions (they're never literals). 48 | |.endif 49 | end 50 | 51 | function demo.addr(Dst) --address substution for jump targets 52 | local addr = ffi.arch == 'x86' and 0xdeadbeef or 0xdeadbeefdeadbeefULL 53 | local ptr = ffi.cast("void*", addr) 54 | |.if X86 55 | | jmp &addr // works with numbers 56 | | jmp &ptr // works with pointers too 57 | |.else 58 | | mov64 rax, addr // no `&` on x64 but you can load an address with `mov64` 59 | | mov64 rax, ptr // works with pointers too 60 | |.endif 61 | end 62 | 63 | function demo.var_reg(Dst) --variable register numbers 64 | | mov Rd(2), 0 // Rd(2) = edx. this is always encoding-time. 65 | | fld Rf(2) // Rf(2) = st2. this is always encoding-time. 66 | end 67 | 68 | function demo.types(Dst) --type macros 69 | 70 | if not rawget(_G, '__TYPEDEFS') then 71 | rawset(_G, '__TYPEDEFS', true) 72 | ffi.cdef'typedef struct Type1 {int f1; int f2;} Type1' 73 | ffi.cdef'typedef Type1 Type2' 74 | end 75 | 76 | local n,m = 1,1 77 | 78 | |.type TP1, Type1, eax 79 | |.type TP2, Type2, ebx 80 | | 81 | | mov eax, TP1:ebx // +0 82 | | mov ebx, TP1 // +0 83 | | mov eax, TP1->f1 // +0 84 | | mov ebx, TP1.f2 // +4 85 | | mov ecx, TP2[n+m]->f1 // +16 86 | | mov edx, TP2[n-2*m].f2 // -4 87 | | mov edx, TP2:ecx[n-2*m].f2 // -4 88 | end 89 | 90 | function demo.code_sections(Dst) 91 | |.sec1; mov ax, 1 92 | |.sec2; mov ax, 2 93 | |.sec1; mov bx, 1 // append code to .sec1 94 | |.sec2; mov bx, 2 // append code to .sec2 95 | end 96 | 97 | function demo.global_labels(Dst) 98 | |->l1: 99 | | jmp ->l2 100 | |->l2: 101 | | jmp ->l1 102 | end 103 | 104 | function demo.local_labels(Dst) 105 | |2: 106 | |1: 107 | | jmp >1 108 | |1: 109 | | jmp <2 110 | end 111 | 112 | function demo.label_subst(Dst) 113 | |5: 114 | |->label_subst: 115 | | mov eax, [<5] // encoded as abs. addr. on x86 and as rip-relative on x64 116 | | mov ebx, [->label_subst] 117 | end 118 | 119 | function demo.align(Dst) 120 | local n = 0x90 121 | | mov eax, 0x22222222 122 | |.byte n 123 | |.align word // puts nop 124 | |.word bit.bor(bit.lshift(n,8), n) // expressions work too 125 | |.align dword // puts nop 126 | |.space 1+1, 0x90 // size can be variable, filler must be constant 127 | | mov eax, 0x22222222 128 | end 129 | 130 | function demo.cond(Dst) --conditional compilation 131 | |.define def1, 1 132 | |.define def2, 1 133 | |.if def1 + def2 == 2 // any lua expression works. note: a 0 result evaluates to false! 134 | | nop 135 | |.elif def2 136 | | .fatal this won't be parsed 137 | |.else 138 | | .fatal this won't be parsed 139 | |.endif 140 | end 141 | 142 | function demo.macros(Dst) 143 | |.macro saveregs 144 | |.if X86 145 | | push ebp; push edi; push esi; push ebx 146 | |.else 147 | | push rbp; push rdi; push rsi; push rbx 148 | |.endif 149 | |.endmacro 150 | | 151 | |.macro restoreregs 152 | |.if X86 153 | | pop ebx; pop esi; pop edi; pop ebp 154 | |.else 155 | | pop rbx; pop rsi; pop rdi; pop rbp 156 | |.endif 157 | |.endmacro 158 | | 159 | |.macro cat, s1, s2, s3 160 | | s1 .. s2 .. s3 // concatenation like `##` in C 161 | |.endmacro 162 | | 163 | | saveregs 164 | | restoreregs 165 | | mov eax, 0xffffffff 166 | | cat n, o, p 167 | end 168 | 169 | function demo.captures(Dst) 170 | |.capture c1; mov eax, 0x1a; .endcapture 171 | |.capture c2; mov ebx, 0x2a; .endcapture 172 | |.capture c1; mov eax, 0x1b; .endcapture 173 | |.capture c2; mov ebx, 0x2b; .endcapture 174 | |.dumpcapture c1 175 | |.dumpcapture c2 176 | |.dumpcapture c2 // we can dump it again 177 | end 178 | 179 | function demo.extern(Dst) 180 | 181 | local s1, s2 = 'Hello from %s!\n', 'DynASM' 182 | local p1 = ffi.cast('const char*', s1) -- with `const` we can take addr. of a Lua string without making a copy! 183 | local p2 = ffi.cast('const char*', s2) 184 | if ffi.arch == 'x64' then 185 | ffi.cdef'void printf(...)' --we need this to get printf's address 186 | end 187 | 188 | |.if X86 // ptr args in stack in reverse order 189 | | sub esp, 4 // align the stack for OSX: 0 - call:4 - p1:4 - p2:4 = -12 190 | | push &p2 191 | | push &p1 192 | | call extern printf 193 | | add esp, 8 // pop args 194 | | add esp, 4 // dealign 195 | | ret 196 | |.elif WINDOWS // ptr args in rcx, rdx, ... 197 | | sub rsp, 32 // allocate shadow space for printf 198 | | mov64 rcx, p1 199 | | mov64 rdx, p2 200 | | mov64 rax, ffi.C.printf // extern doesn't work on x64 201 | | call rax 202 | | add rsp, 32 // put it back 203 | | ret 204 | |.else // ptr args in rdi, rsi, ... 205 | | sub rsp, 8 // align the stack for OSX: 0 - call:8 = -8 206 | | mov rdi, &p1 207 | | mov rsi, &p2 208 | | mov64 rdx, ffi.C.printf // extern doesn't work on x64 209 | | call rdx 210 | | add rsp, 8 211 | | ret 212 | |.endif 213 | 214 | return function(buf) 215 | local _ = s1, s2 --pin the strings 216 | local code = ffi.cast('void __cdecl (*) ()', buf) 217 | code() 218 | end 219 | end 220 | 221 | function demo.multiply(Dst) 222 | 223 | |.if X86 // int args in stack in reverse order; int ret in eax 224 | | mov eax, [esp+4] 225 | | imul dword [esp+8] 226 | | ret 227 | |.elif WINDOWS // int args in rcx, rdx, ...; int ret in rax 228 | | mov rax, rcx 229 | | imul rdx 230 | | ret 231 | |.else // int args in rdi, rsi, ...; int ret in rax 232 | | mov rax, rdi 233 | | imul rsi 234 | | ret 235 | |.endif 236 | 237 | return function(buf) 238 | local mul = ffi.cast('int32_t __cdecl (*) (int32_t x, int32_t y)', buf) 239 | assert(mul(7, -5) == -35) 240 | assert(mul(-4, 0x08000000) == -0x20000000) 241 | print'ok' 242 | end 243 | end 244 | 245 | return {demo, demos, actions, externnames, globalnames, DASM_MAXSECTION, DASM_MAXGLOBAL} 246 | -------------------------------------------------------------------------------- /dynasm.md: -------------------------------------------------------------------------------- 1 | --- 2 | tagline: DynASM with Lua mode 3 | --- 4 | 5 | ## `local dynasm = require'dynasm'` 6 | 7 | This is a modified version of [DynASM](http://luajit.org/dynasm.html) that allows generating, 8 | compiling, and running x86 and x86-64 assembly code directly from Lua. It also exposes the DynASM assembler 9 | and linker to be used as Lua modules. 10 | 11 | Jump To: [Examples](#examples) | [DynASM API](#dynasm-api) | [DASM API](#dasm-api) | 12 | [Changes to DynASM](#changes-to-dynasm) | 13 | [Instructions](http://corsix.github.io/dynasm-doc/instructions.html) | 14 | [Directives](http://corsix.github.io/dynasm-doc/reference.html#directives) 15 | 16 | ## Features 17 | 18 | * translate, compile and run Lua/ASM code from Lua (no C glue) 19 | * load Lua/ASM (.dasl) files with `require()` 20 | * works with file, string and stream inputs and outputs 21 | 22 | ## Before you start 23 | 24 | 1. DynASM is [not an inline assembler](http://www.corsix.org/content/what-is-dynasm), it's a code generator. 25 | The following code: 26 | 27 | ~~~{.lua} 28 | function codegen(Dst) 29 | for i = 1, 3 do 30 | | mov ax, i 31 | end 32 | end 33 | ~~~ 34 | 35 | does _not_ run the assembly instruction 3 times when codegen is called, instead, it merely adds the 36 | instruction sequence `mov ax, 1; mov ax, 2; mov ax, 3` to the dynasm state `Dst` when codegen is called. 37 | Mixing Lua and ASM code like this has the effect of generating code, not running it. 38 | 39 | 2. DynASM has two parts: the assembler/preprocessor, written in Lua, and the the linker/encoder, written in C. 40 | `dynasm.lua` is the preprocessor. It takes mixed C/ASM code as input (from a file, string or file-like object) 41 | and generates C code (to a file, string, or file-like object). Alternatively, it can take mixed Lua/ASM 42 | code (like the above example) and generate Lua code, which is what the "Lua mode" part is all about. 43 | `dasm.lua` is the binding to the C part of DynASM (the linker/encoder) which deals with building the code into 44 | executable memory that can be called into. 45 | 46 | 3. `.dasl` files refer to Lua/ASM files, `.dasc` files refer to C/ASM files. dasl files can be used transparently 47 | as Lua modules (they are translated on-the-fly). 48 | 49 | ## Examples 50 | 51 | ### 1. Self-contained module 52 | 53 | This simple, self-contained module publishes the function multiply(x, y) -> x * y. 54 | 55 | #### `multiply_x86.dasl:` 56 | 57 | ~~~{.lua} 58 | local ffi = require'ffi' --required 59 | local dasm = require'dasm' --required 60 | 61 | |.arch x86 --must be the first instruction 62 | |.actionlist actions --make an action list called `actions` 63 | 64 | local Dst = dasm.new(actions) --make a dasm state 65 | 66 | -- the next chunk of asm code will be added to the action list, and a call 67 | -- to `dasm.put(Dst, 0)` will be generated in its place, which will be copying 68 | -- the code from the start of the action list into the Dst state. 69 | 70 | | mov eax, [esp+4] 71 | | imul dword [esp+8] 72 | | ret 73 | 74 | local code = Dst:build() --check, link and encode the code 75 | local fptr = ffi.cast('int32_t __cdecl (*) (int32_t x, int32_t y)', code) --take a callable pointer to it 76 | 77 | return function(x, y) 78 | local _ = code --pin the code buffer so it doesn't get collected 79 | return fptr(x, y) 80 | end 81 | ~~~ 82 | 83 | The best way to understand how the above code is supposed to work is to translate it: 84 | 85 | > lua dynasm.lua multiply_x86.dasl 86 | 87 | #### `main.lua`: 88 | 89 | ~~~{.lua} 90 | require'dynasm' --hook in the `require` loader for .dasl files 91 | local multiply = require'multiply_x86' --translate, load and run `multiply_x86.dasl` 92 | assert(multiply(-7, 5) == -35) 93 | ~~~ 94 | 95 | ### 2. Code gen / build split 96 | 97 | This is an idea on how you can keep your asm code separated from the plumbing required to build it, 98 | and also how you can make separate functions out of different asm chunks from the same dasl file. 99 | 100 | #### `funcs_x86.dasl`: 101 | 102 | ~~~{.lua} 103 | local ffi = require'ffi' 104 | local dasm = require'dasm' 105 | 106 | |.arch x86 107 | |.actionlist actions 108 | |.globalnames globalnames 109 | 110 | local gen = {} 111 | 112 | function gen.mul(Dst) --function which generates code into the dynasm state called `Dst` 113 | |->mul: --and returns a "make" function which gets a dasm.globals() map 114 | | mov eax, [esp+4] --and returns a function that knows how to call into its code. 115 | | imul dword [esp+8] 116 | | ret 117 | return function(globals) 118 | return ffi.cast('int32_t __cdecl (*) (int32_t x, int32_t y)', globals.mul) 119 | end 120 | end 121 | 122 | function gen.add(Dst) 123 | |->add: 124 | | mov eax, [esp+4] 125 | | add eax, dword [esp+8] 126 | | ret 127 | return function(globals) 128 | return ffi.cast('int32_t __cdecl (*) (int32_t x, int32_t y)', globals.add) 129 | end 130 | end 131 | 132 | return {gen = gen, actions = actions, globalnames = globalnames} 133 | ~~~ 134 | 135 | #### `funcs.lua`: 136 | 137 | ~~~{.lua} 138 | local dynasm = require'dynasm' 139 | local dasm = require'dasm' 140 | local funcs = require'funcs_x86' 141 | 142 | local state, globals = dasm.new(funcs.actions) --create a dynasm state with the generated action list 143 | 144 | local M = {} --generate the code, collecting the make functions 145 | for name, gen in pairs(funcs.gen) do 146 | M[name] = gen(state) 147 | end 148 | 149 | local buf, size = state:build() --check, link and encode the code 150 | local globals = dasm.globals(globals, funcs.globalnames) --get the map of global_name -> global_addr 151 | 152 | for name, make in pairs(M) do --make the callable functions 153 | M[name] = make(globals) 154 | end 155 | 156 | M.__buf = buf --pin buf so it doesn't get collected 157 | 158 | return M 159 | ~~~ 160 | 161 | #### `main.lua` 162 | 163 | ~~~{.lua} 164 | local funcs = require'funcs' 165 | 166 | assert(funcs.mul(-7, 5) == -35) 167 | assert(funcs.add(-7, 5) == -2) 168 | ~~~ 169 | 170 | ### 3. Load code from a string 171 | 172 | ~~~{.lua} 173 | local dynasm = require'dynasm' 174 | 175 | local gencode, actions = dynasm.loadstring([[ 176 | local ffi = require'ffi' 177 | local dasm = require'dasm' 178 | 179 | |.arch x86 180 | |.actionlist actions 181 | 182 | local function gencode(Dst) 183 | | mov ax, bx 184 | end 185 | 186 | return gencode, actions 187 | ]])() 188 | ~~~ 189 | 190 | ### 4. Translate from Lua 191 | 192 | ~~~{.lua} 193 | local dynasm = require'dynasm' 194 | print(dynasm.translate_tostring'multiply_x86.dasl') 195 | ~~~ 196 | 197 | The above is equivalent to the command line: 198 | 199 | > lua dynasm.lua multiply_x86.dasl 200 | 201 | > __Tip__: You can pre-assemble `foo.dasl` into `foo.lua` -- `require()` will then choose `foo.lua` 202 | over `foo.dasl`, so you basically get transparent caching for free. This speeds up app loading a bit, 203 | and you can ship your app without the assembler (you still need to ship the linker/encoder for 204 | all the platforms that you support). 205 | 206 | 207 | ### 5. Included demo/tutorial 208 | 209 | Check out the included [dynasm_demo_x86.dasl] and [dynasm_demo.lua] modules for more in-depth knowledge 210 | about DynASM/Lua interaction. It works on Windows, Linux and OSX, both x86 and x64. 211 | 212 | [dynasm_demo.lua]: https://github.com/luapower/dynasm/blob/master/dynasm_demo.lua 213 | [dynasm_demo_x86.dasl]: https://github.com/luapower/dynasm/blob/master/dynasm_demo_x86.dasl 214 | 215 | 216 | ### 6. Brainfuck JIT compiler 217 | 218 | The examples above don't do DynASM enough justice, because DynASM was after all made for building JIT compilers. 219 | The [bf project](https://github.com/luapower/bf) contains a Lua/ASM translation of the code from Josh Haberman's 220 | [tutorial](http://blog.reverberate.org/2012/12/hello-jit-world-joy-of-simple-jits.html) on DynASM and JITs, 221 | and probably the simplest JIT compiler you could write. It too works on Windows, Linux and OSX, x86 and x64. 222 | 223 | ## DynASM API 224 | 225 | -------------------------------------------------------- -------------------------------------------------- 226 | __hi-level__ 227 | `dynasm.loadfile(infile[, opt]) -> chunk` load a dasl file and return it as a Lua chunk 228 | `dynasm.loadstring(s[, opt]) -> chunk` load a dasl string and return it as a Lua chunk 229 | __low-level__ 230 | `dynasm.translate(infile, outfile[, opt])` translate a dasc or dasl file 231 | `dynasm.string_infile(s) -> infile` use a string as an infile to translate() 232 | `dynasm.func_outfile(func) -> outfile` make an outfile that calls func(s) for each piece 233 | `dynasm.table_outfile(t) -> outfile` make an outfile that writes pieces to a table 234 | `dynasm.translate_tostring(infile[, opt]) -> s` translate to a string 235 | `dynasm.translate_toiter(infile[, opt]) -> iter() -> s` translate to an iterator of string pieces 236 | -------------------------------------------------------- -------------------------------------------------- 237 | 238 | ## DASM API 239 | 240 | ----------------------------------------------------- -------------------------------------------------- 241 | __hi-level__ 242 | 243 | `dasm.new(` \ make a dasm state for an action list. \ 244 | `actionlist,` \ -> per `.actionlist` directive. \ 245 | `[externnames],` \ -> per `.externnames` directive. \ 246 | `[sectioncount],` \ -> DASM_MAXSECTION from `.sections` directive. \ 247 | `[globalcount],` \ -> DASM_MAXGLOBAL from `.globals` directive. \ 248 | `[externget],` \ -> `func(externname) -> addr`, for solving `extern`s \ 249 | `[globals]) -> state, globals` -> `void*[DASM_MAXGLOBAL]`, to hold globals 250 | 251 | `state:build() -> buf, size` check, link, alloc, encode and mprotect the code 252 | 253 | `dasm.dump(buf, size)` dump the code using the included disassembler in luajit 254 | 255 | `dasm.globals(globals, globalnames)->{name -> addr}` given the globals array returned by dasm.new() and 256 | the globalnames list per `.globalnames` directive, 257 | return a table that maps the names to their address. 258 | 259 | __low-level__ 260 | 261 | `state:init(maxsection)` init a state 262 | 263 | `state:free()` free the state 264 | 265 | `state:setupglobal(globals, globalcount)` set up the globals buffer 266 | 267 | `state:growpc(maxpc)` grow the number of available pc labels 268 | 269 | `state:setup(actionlist)` set up the state with an action list 270 | 271 | `state:put(state, ...)` the assembler generates these calls 272 | 273 | `state:link() -> size` link the code and get its size 274 | 275 | `state:encode(buf)` encode the code into a buffer 276 | 277 | `state:getpclabel(pclabel[, buf])` get pc label offset, or pointer if buf is passed 278 | 279 | `state:checkstep(secmatch)` check code before encoding 280 | 281 | `state:setupextern(externnames, getter)` set up a new `extern` handler 282 | ----------------------------------------------------- -------------------------------------------------- 283 | 284 | ## Changes to DynASM 285 | 286 | The [source code changes] made to DynASM were kept to a minimum in order 287 | to preserve DynASM semantics, make it easy to merge back changes 288 | from upstream, and to make it easy to add the Lua mode to other 289 | architectures supported by DynASM in the future. 290 | As for the user-facing changes, the list is again small: 291 | 292 | * added `-l, --lang C|Lua` command line option (set automatically for dasl and dasc files). 293 | * asm comments can start with both `--` and `//` in Lua mode. 294 | * the defines ARCH, OS, X86, X64, WINDOWS, LINUX, OSX are available by default in Lua mode. 295 | * the `.globals` directive generates DASM_MAXGLOBAL in Lua mode. 296 | * `.type` usage is limited in Lua mode: `FOO.field`, `FOO[expr]` and `FOO[expr].field` 297 | are ok, but arbitrary expressions like `FOO[5].bar[2].baz` are not. 298 | * `extern foo` resolves to `ffi.C.foo` by default; if foo has no cdef, 299 | `ffi.cdef'void foo()'` is called (i.e. a dummy cdef is made for it - caveat emptor). 300 | 301 | [source code changes]: https://github.com/luapower/dynasm/compare/93805cb...master 302 | 303 | ## Assembler tutorials & ref docs 304 | 305 | * [x64 tutorial](https://software.intel.com/en-us/articles/introduction-to-x64-assembly/) 306 | * [SSE quick ref](http://softpixel.com/~cwright/programming/simd/sse.php) 307 | * [FPU tutorial & ref](http://www.website.masmforum.com/tutorials/fptute/index.html) 308 | * [Agner Fog - Calling Conventions](http://www.agner.org/optimize/calling_conventions.pdf) 309 | * [Agner Fog - CPU Internals](http://www.agner.org/optimize/microarchitecture.pdf) 310 | * [Agner Fog - Optimization Guide](http://www.agner.org/optimize/optimizing_assembly.pdf) 311 | * [Agner Fog - Instruction Tables](http://www.agner.org/optimize/instruction_tables.pdf) 312 | 313 | -------------------------------------------------------------------------------- /csrc/dynasm/dasm_x86.h: -------------------------------------------------------------------------------- 1 | /* 2 | ** DynASM x86 encoding engine. 3 | ** Copyright (C) 2005-2015 Mike Pall. All rights reserved. 4 | ** Released under the MIT license. See dynasm.lua for full copyright notice. 5 | */ 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | #define DASM_ARCH "x86" 13 | 14 | #ifndef DASM_EXTERN 15 | #define DASM_EXTERN(a,b,c,d) 0 16 | #endif 17 | 18 | /* Action definitions. DASM_STOP must be 255. */ 19 | enum { 20 | DASM_DISP = 233, 21 | DASM_IMM_S, DASM_IMM_B, DASM_IMM_W, DASM_IMM_D, DASM_IMM_WB, DASM_IMM_DB, 22 | DASM_VREG, DASM_SPACE, DASM_SETLABEL, DASM_REL_A, DASM_REL_LG, DASM_REL_PC, 23 | DASM_IMM_LG, DASM_IMM_PC, DASM_LABEL_LG, DASM_LABEL_PC, DASM_ALIGN, 24 | DASM_EXTERN, DASM_ESC, DASM_MARK, DASM_SECTION, DASM_STOP 25 | }; 26 | 27 | /* Maximum number of section buffer positions for a single dasm_put() call. */ 28 | #define DASM_MAXSECPOS 25 29 | 30 | /* DynASM encoder status codes. Action list offset or number are or'ed in. */ 31 | #define DASM_S_OK 0x00000000 32 | #define DASM_S_NOMEM 0x01000000 33 | #define DASM_S_PHASE 0x02000000 34 | #define DASM_S_MATCH_SEC 0x03000000 35 | #define DASM_S_RANGE_I 0x11000000 36 | #define DASM_S_RANGE_SEC 0x12000000 37 | #define DASM_S_RANGE_LG 0x13000000 38 | #define DASM_S_RANGE_PC 0x14000000 39 | #define DASM_S_RANGE_VREG 0x15000000 40 | #define DASM_S_UNDEF_L 0x21000000 41 | #define DASM_S_UNDEF_PC 0x22000000 42 | 43 | /* Macros to convert positions (8 bit section + 24 bit index). */ 44 | #define DASM_POS2IDX(pos) ((pos)&0x00ffffff) 45 | #define DASM_POS2BIAS(pos) ((pos)&0xff000000) 46 | #define DASM_SEC2POS(sec) ((sec)<<24) 47 | #define DASM_POS2SEC(pos) ((pos)>>24) 48 | #define DASM_POS2PTR(D, pos) (D->sections[DASM_POS2SEC(pos)].rbuf + (pos)) 49 | 50 | /* Action list type. */ 51 | typedef const unsigned char *dasm_ActList; 52 | 53 | /* Per-section structure. */ 54 | typedef struct dasm_Section { 55 | int *rbuf; /* Biased buffer pointer (negative section bias). */ 56 | int *buf; /* True buffer pointer. */ 57 | size_t bsize; /* Buffer size in bytes. */ 58 | int pos; /* Biased buffer position. */ 59 | int epos; /* End of biased buffer position - max single put. */ 60 | int ofs; /* Byte offset into section. */ 61 | } dasm_Section; 62 | 63 | /* Core structure holding the DynASM encoding state. */ 64 | struct dasm_State { 65 | size_t psize; /* Allocated size of this structure. */ 66 | dasm_ActList actionlist; /* Current actionlist pointer. */ 67 | int *lglabels; /* Local/global chain/pos ptrs. */ 68 | size_t lgsize; 69 | int *pclabels; /* PC label chains/pos ptrs. */ 70 | size_t pcsize; 71 | void **globals; /* Array of globals (bias -10). */ 72 | dasm_Section *section; /* Pointer to active section. */ 73 | size_t codesize; /* Total size of all code sections. */ 74 | int maxsection; /* 0 <= sectionidx < maxsection. */ 75 | int status; /* Status code. */ 76 | dasm_Section sections[1]; /* All sections. Alloc-extended. */ 77 | }; 78 | 79 | /* The size of the core structure depends on the max. number of sections. */ 80 | #define DASM_PSZ(ms) (sizeof(dasm_State)+(ms-1)*sizeof(dasm_Section)) 81 | 82 | 83 | /* Initialize DynASM state. */ 84 | void dasm_init(Dst_DECL, int maxsection) 85 | { 86 | dasm_State *D; 87 | size_t psz = 0; 88 | int i; 89 | Dst_REF = NULL; 90 | DASM_M_GROW(Dst, struct dasm_State, Dst_REF, psz, DASM_PSZ(maxsection)); 91 | D = Dst_REF; 92 | D->psize = psz; 93 | D->lglabels = NULL; 94 | D->lgsize = 0; 95 | D->pclabels = NULL; 96 | D->pcsize = 0; 97 | D->globals = NULL; 98 | D->maxsection = maxsection; 99 | for (i = 0; i < maxsection; i++) { 100 | D->sections[i].buf = NULL; /* Need this for pass3. */ 101 | D->sections[i].rbuf = D->sections[i].buf - DASM_SEC2POS(i); 102 | D->sections[i].bsize = 0; 103 | D->sections[i].epos = 0; /* Wrong, but is recalculated after resize. */ 104 | } 105 | } 106 | 107 | /* Free DynASM state. */ 108 | void dasm_free(Dst_DECL) 109 | { 110 | dasm_State *D = Dst_REF; 111 | int i; 112 | for (i = 0; i < D->maxsection; i++) 113 | if (D->sections[i].buf) 114 | DASM_M_FREE(Dst, D->sections[i].buf, D->sections[i].bsize); 115 | if (D->pclabels) DASM_M_FREE(Dst, D->pclabels, D->pcsize); 116 | if (D->lglabels) DASM_M_FREE(Dst, D->lglabels, D->lgsize); 117 | DASM_M_FREE(Dst, D, D->psize); 118 | } 119 | 120 | /* Setup global label array. Must be called before dasm_setup(). */ 121 | void dasm_setupglobal(Dst_DECL, void **gl, unsigned int maxgl) 122 | { 123 | dasm_State *D = Dst_REF; 124 | D->globals = gl - 10; /* Negative bias to compensate for locals. */ 125 | DASM_M_GROW(Dst, int, D->lglabels, D->lgsize, (10+maxgl)*sizeof(int)); 126 | } 127 | 128 | /* Grow PC label array. Can be called after dasm_setup(), too. */ 129 | void dasm_growpc(Dst_DECL, unsigned int maxpc) 130 | { 131 | dasm_State *D = Dst_REF; 132 | size_t osz = D->pcsize; 133 | DASM_M_GROW(Dst, int, D->pclabels, D->pcsize, maxpc*sizeof(int)); 134 | memset((void *)(((unsigned char *)D->pclabels)+osz), 0, D->pcsize-osz); 135 | } 136 | 137 | /* Setup encoder. */ 138 | void dasm_setup(Dst_DECL, const void *actionlist) 139 | { 140 | dasm_State *D = Dst_REF; 141 | int i; 142 | D->actionlist = (dasm_ActList)actionlist; 143 | D->status = DASM_S_OK; 144 | D->section = &D->sections[0]; 145 | memset((void *)D->lglabels, 0, D->lgsize); 146 | if (D->pclabels) memset((void *)D->pclabels, 0, D->pcsize); 147 | for (i = 0; i < D->maxsection; i++) { 148 | D->sections[i].pos = DASM_SEC2POS(i); 149 | D->sections[i].ofs = 0; 150 | } 151 | } 152 | 153 | 154 | #ifdef DASM_CHECKS 155 | #define CK(x, st) \ 156 | do { if (!(x)) { \ 157 | D->status = DASM_S_##st|(int)(p-D->actionlist-1); return; } } while (0) 158 | #define CKPL(kind, st) \ 159 | do { if ((size_t)((char *)pl-(char *)D->kind##labels) >= D->kind##size) { \ 160 | D->status=DASM_S_RANGE_##st|(int)(p-D->actionlist-1); return; } } while (0) 161 | #else 162 | #define CK(x, st) ((void)0) 163 | #define CKPL(kind, st) ((void)0) 164 | #endif 165 | 166 | /* Pass 1: Store actions and args, link branches/labels, estimate offsets. */ 167 | void dasm_put(Dst_DECL, int start, ...) 168 | { 169 | va_list ap; 170 | dasm_State *D = Dst_REF; 171 | dasm_ActList p = D->actionlist + start; 172 | dasm_Section *sec = D->section; 173 | int pos = sec->pos, ofs = sec->ofs, mrm = -1; 174 | int *b; 175 | 176 | if (pos >= sec->epos) { 177 | DASM_M_GROW(Dst, int, sec->buf, sec->bsize, 178 | sec->bsize + 2*DASM_MAXSECPOS*sizeof(int)); 179 | sec->rbuf = sec->buf - DASM_POS2BIAS(pos); 180 | sec->epos = (int)sec->bsize/sizeof(int) - DASM_MAXSECPOS+DASM_POS2BIAS(pos); 181 | } 182 | 183 | b = sec->rbuf; 184 | b[pos++] = start; 185 | 186 | va_start(ap, start); 187 | while (1) { 188 | int action = *p++; 189 | if (action < DASM_DISP) { 190 | ofs++; 191 | } else if (action <= DASM_REL_A) { 192 | int n = va_arg(ap, int); 193 | b[pos++] = n; 194 | switch (action) { 195 | case DASM_DISP: 196 | if (n == 0) { if (mrm < 0) mrm = p[-2]; if ((mrm&7) != 5) break; } 197 | case DASM_IMM_DB: if (((n+128)&-256) == 0) goto ob; 198 | case DASM_REL_A: /* Assumes ptrdiff_t is int. !x64 */ 199 | case DASM_IMM_D: ofs += 4; break; 200 | case DASM_IMM_S: CK(((n+128)&-256) == 0, RANGE_I); goto ob; 201 | case DASM_IMM_B: CK((n&-256) == 0, RANGE_I); ob: ofs++; break; 202 | case DASM_IMM_WB: if (((n+128)&-256) == 0) goto ob; 203 | case DASM_IMM_W: CK((n&-65536) == 0, RANGE_I); ofs += 2; break; 204 | case DASM_SPACE: p++; ofs += n; break; 205 | case DASM_SETLABEL: b[pos-2] = -0x40000000; break; /* Neg. label ofs. */ 206 | case DASM_VREG: CK((n&-16) == 0 && (n != 4 || (*p>>5) != 2), RANGE_VREG); 207 | if (*p < 0x40 && p[1] == DASM_DISP) mrm = n; 208 | if (*p < 0x20 && (n&7) == 4) ofs++; 209 | switch ((*p++ >> 3) & 3) { 210 | case 3: n |= b[pos-3]; 211 | case 2: n |= b[pos-2]; 212 | case 1: if (n <= 7) { b[pos-1] |= 0x10; ofs--; } 213 | } 214 | continue; 215 | } 216 | mrm = -1; 217 | } else { 218 | int *pl, n; 219 | switch (action) { 220 | case DASM_REL_LG: 221 | case DASM_IMM_LG: 222 | n = *p++; pl = D->lglabels + n; 223 | /* Bkwd rel or global. */ 224 | if (n <= 246) { CK(n>=10||*pl<0, RANGE_LG); CKPL(lg, LG); goto putrel; } 225 | pl -= 246; n = *pl; 226 | if (n < 0) n = 0; /* Start new chain for fwd rel if label exists. */ 227 | goto linkrel; 228 | case DASM_REL_PC: 229 | case DASM_IMM_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); 230 | putrel: 231 | n = *pl; 232 | if (n < 0) { /* Label exists. Get label pos and store it. */ 233 | b[pos] = -n; 234 | } else { 235 | linkrel: 236 | b[pos] = n; /* Else link to rel chain, anchored at label. */ 237 | *pl = pos; 238 | } 239 | pos++; 240 | ofs += 4; /* Maximum offset needed. */ 241 | if (action == DASM_REL_LG || action == DASM_REL_PC) 242 | b[pos++] = ofs; /* Store pass1 offset estimate. */ 243 | break; 244 | case DASM_LABEL_LG: pl = D->lglabels + *p++; CKPL(lg, LG); goto putlabel; 245 | case DASM_LABEL_PC: pl = D->pclabels + va_arg(ap, int); CKPL(pc, PC); 246 | putlabel: 247 | n = *pl; /* n > 0: Collapse rel chain and replace with label pos. */ 248 | while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = pos; } 249 | *pl = -pos; /* Label exists now. */ 250 | b[pos++] = ofs; /* Store pass1 offset estimate. */ 251 | break; 252 | case DASM_ALIGN: 253 | ofs += *p++; /* Maximum alignment needed (arg is 2**n-1). */ 254 | b[pos++] = ofs; /* Store pass1 offset estimate. */ 255 | break; 256 | case DASM_EXTERN: p += 2; ofs += 4; break; 257 | case DASM_ESC: p++; ofs++; break; 258 | case DASM_MARK: mrm = p[-2]; break; 259 | case DASM_SECTION: 260 | n = *p; CK(n < D->maxsection, RANGE_SEC); D->section = &D->sections[n]; 261 | case DASM_STOP: goto stop; 262 | } 263 | } 264 | } 265 | stop: 266 | va_end(ap); 267 | sec->pos = pos; 268 | sec->ofs = ofs; 269 | } 270 | #undef CK 271 | 272 | /* Pass 2: Link sections, shrink branches/aligns, fix label offsets. */ 273 | int dasm_link(Dst_DECL, size_t *szp) 274 | { 275 | dasm_State *D = Dst_REF; 276 | int secnum; 277 | int ofs = 0; 278 | 279 | #ifdef DASM_CHECKS 280 | *szp = 0; 281 | if (D->status != DASM_S_OK) return D->status; 282 | { 283 | int pc; 284 | for (pc = 0; pc*sizeof(int) < D->pcsize; pc++) 285 | if (D->pclabels[pc] > 0) return DASM_S_UNDEF_PC|pc; 286 | } 287 | #endif 288 | 289 | { /* Handle globals not defined in this translation unit. */ 290 | int idx; 291 | for (idx = 10; idx*sizeof(int) < D->lgsize; idx++) { 292 | int n = D->lglabels[idx]; 293 | /* Undefined label: Collapse rel chain and replace with marker (< 0). */ 294 | while (n > 0) { int *pb = DASM_POS2PTR(D, n); n = *pb; *pb = -idx; } 295 | } 296 | } 297 | 298 | /* Combine all code sections. No support for data sections (yet). */ 299 | for (secnum = 0; secnum < D->maxsection; secnum++) { 300 | dasm_Section *sec = D->sections + secnum; 301 | int *b = sec->rbuf; 302 | int pos = DASM_SEC2POS(secnum); 303 | int lastpos = sec->pos; 304 | 305 | while (pos != lastpos) { 306 | dasm_ActList p = D->actionlist + b[pos++]; 307 | while (1) { 308 | int op, action = *p++; 309 | switch (action) { 310 | case DASM_REL_LG: p++; op = p[-3]; goto rel_pc; 311 | case DASM_REL_PC: op = p[-2]; rel_pc: { 312 | int shrink = op == 0xe9 ? 3 : ((op&0xf0) == 0x80 ? 4 : 0); 313 | if (shrink) { /* Shrinkable branch opcode? */ 314 | int lofs, lpos = b[pos]; 315 | if (lpos < 0) goto noshrink; /* Ext global? */ 316 | lofs = *DASM_POS2PTR(D, lpos); 317 | if (lpos > pos) { /* Fwd label: add cumulative section offsets. */ 318 | int i; 319 | for (i = secnum; i < DASM_POS2SEC(lpos); i++) 320 | lofs += D->sections[i].ofs; 321 | } else { 322 | lofs -= ofs; /* Bkwd label: unfix offset. */ 323 | } 324 | lofs -= b[pos+1]; /* Short branch ok? */ 325 | if (lofs >= -128-shrink && lofs <= 127) ofs -= shrink; /* Yes. */ 326 | else { noshrink: shrink = 0; } /* No, cannot shrink op. */ 327 | } 328 | b[pos+1] = shrink; 329 | pos += 2; 330 | break; 331 | } 332 | case DASM_SPACE: case DASM_IMM_LG: case DASM_VREG: p++; 333 | case DASM_DISP: case DASM_IMM_S: case DASM_IMM_B: case DASM_IMM_W: 334 | case DASM_IMM_D: case DASM_IMM_WB: case DASM_IMM_DB: 335 | case DASM_SETLABEL: case DASM_REL_A: case DASM_IMM_PC: pos++; break; 336 | case DASM_LABEL_LG: p++; 337 | case DASM_LABEL_PC: b[pos++] += ofs; break; /* Fix label offset. */ 338 | case DASM_ALIGN: ofs -= (b[pos++]+ofs)&*p++; break; /* Adjust ofs. */ 339 | case DASM_EXTERN: p += 2; break; 340 | case DASM_ESC: p++; break; 341 | case DASM_MARK: break; 342 | case DASM_SECTION: case DASM_STOP: goto stop; 343 | } 344 | } 345 | stop: (void)0; 346 | } 347 | ofs += sec->ofs; /* Next section starts right after current section. */ 348 | } 349 | 350 | D->codesize = ofs; /* Total size of all code sections */ 351 | *szp = ofs; 352 | return DASM_S_OK; 353 | } 354 | 355 | #define dasmb(x) *cp++ = (unsigned char)(x) 356 | #ifndef DASM_ALIGNED_WRITES 357 | #define dasmw(x) \ 358 | do { *((unsigned short *)cp) = (unsigned short)(x); cp+=2; } while (0) 359 | #define dasmd(x) \ 360 | do { *((unsigned int *)cp) = (unsigned int)(x); cp+=4; } while (0) 361 | #else 362 | #define dasmw(x) do { dasmb(x); dasmb((x)>>8); } while (0) 363 | #define dasmd(x) do { dasmw(x); dasmw((x)>>16); } while (0) 364 | #endif 365 | 366 | /* Pass 3: Encode sections. */ 367 | int dasm_encode(Dst_DECL, void *buffer) 368 | { 369 | dasm_State *D = Dst_REF; 370 | unsigned char *base = (unsigned char *)buffer; 371 | unsigned char *cp = base; 372 | int secnum; 373 | 374 | /* Encode all code sections. No support for data sections (yet). */ 375 | for (secnum = 0; secnum < D->maxsection; secnum++) { 376 | dasm_Section *sec = D->sections + secnum; 377 | int *b = sec->buf; 378 | int *endb = sec->rbuf + sec->pos; 379 | 380 | while (b != endb) { 381 | dasm_ActList p = D->actionlist + *b++; 382 | unsigned char *mark = NULL; 383 | while (1) { 384 | int action = *p++; 385 | int n = (action >= DASM_DISP && action <= DASM_ALIGN) ? *b++ : 0; 386 | switch (action) { 387 | case DASM_DISP: if (!mark) mark = cp; { 388 | unsigned char *mm = mark; 389 | if (*p != DASM_IMM_DB && *p != DASM_IMM_WB) mark = NULL; 390 | if (n == 0) { int mrm = mm[-1]&7; if (mrm == 4) mrm = mm[0]&7; 391 | if (mrm != 5) { mm[-1] -= 0x80; break; } } 392 | if (((n+128) & -256) != 0) goto wd; else mm[-1] -= 0x40; 393 | } 394 | case DASM_IMM_S: case DASM_IMM_B: wb: dasmb(n); break; 395 | case DASM_IMM_DB: if (((n+128)&-256) == 0) { 396 | db: if (!mark) mark = cp; mark[-2] += 2; mark = NULL; goto wb; 397 | } else mark = NULL; 398 | case DASM_IMM_D: wd: dasmd(n); break; 399 | case DASM_IMM_WB: if (((n+128)&-256) == 0) goto db; else mark = NULL; 400 | case DASM_IMM_W: dasmw(n); break; 401 | case DASM_VREG: { 402 | int t = *p++; 403 | unsigned char *ex = cp - (t&7); 404 | if ((n & 8) && t < 0xa0) { 405 | if (*ex & 0x80) ex[1] ^= 0x20 << (t>>6); else *ex ^= 1 << (t>>6); 406 | n &= 7; 407 | } else if (n & 0x10) { 408 | if (*ex & 0x80) { 409 | *ex = 0xc5; ex[1] = (ex[1] & 0x80) | ex[2]; ex += 2; 410 | } 411 | while (++ex < cp) ex[-1] = *ex; 412 | if (mark) mark--; 413 | cp--; 414 | n &= 7; 415 | } 416 | if (t >= 0xc0) n <<= 4; 417 | else if (t >= 0x40) n <<= 3; 418 | else if (n == 4 && t < 0x20) { cp[-1] ^= n; *cp++ = 0x20; } 419 | cp[-1] ^= n; 420 | break; 421 | } 422 | case DASM_REL_LG: p++; if (n >= 0) goto rel_pc; 423 | b++; n = (int)(ptrdiff_t)D->globals[-n]; 424 | case DASM_REL_A: rel_a: n -= (int)(ptrdiff_t)(cp+4); goto wd; /* !x64 */ 425 | case DASM_REL_PC: rel_pc: { 426 | int shrink = *b++; 427 | int *pb = DASM_POS2PTR(D, n); if (*pb < 0) { n = pb[1]; goto rel_a; } 428 | n = *pb - ((int)(cp-base) + 4-shrink); 429 | if (shrink == 0) goto wd; 430 | if (shrink == 4) { cp--; cp[-1] = *cp-0x10; } else cp[-1] = 0xeb; 431 | goto wb; 432 | } 433 | case DASM_IMM_LG: 434 | p++; if (n < 0) { n = (int)(ptrdiff_t)D->globals[-n]; goto wd; } 435 | case DASM_IMM_PC: { 436 | int *pb = DASM_POS2PTR(D, n); 437 | n = *pb < 0 ? pb[1] : (*pb + (int)(ptrdiff_t)base); 438 | goto wd; 439 | } 440 | case DASM_LABEL_LG: { 441 | int idx = *p++; 442 | if (idx >= 10) 443 | D->globals[idx] = (void *)(base + (*p == DASM_SETLABEL ? *b : n)); 444 | break; 445 | } 446 | case DASM_LABEL_PC: case DASM_SETLABEL: break; 447 | case DASM_SPACE: { int fill = *p++; while (n--) *cp++ = fill; break; } 448 | case DASM_ALIGN: 449 | n = *p++; 450 | while (((cp-base) & n)) *cp++ = 0x90; /* nop */ 451 | break; 452 | case DASM_EXTERN: n = DASM_EXTERN(Dst, cp, p[1], *p); p += 2; goto wd; 453 | case DASM_MARK: mark = cp; break; 454 | case DASM_ESC: action = *p++; 455 | default: *cp++ = action; break; 456 | case DASM_SECTION: case DASM_STOP: goto stop; 457 | } 458 | } 459 | stop: (void)0; 460 | } 461 | } 462 | 463 | if (base + D->codesize != cp) /* Check for phase errors. */ 464 | return DASM_S_PHASE; 465 | return DASM_S_OK; 466 | } 467 | 468 | /* Get PC label offset. */ 469 | int dasm_getpclabel(Dst_DECL, unsigned int pc) 470 | { 471 | dasm_State *D = Dst_REF; 472 | if (pc*sizeof(int) < D->pcsize) { 473 | int pos = D->pclabels[pc]; 474 | if (pos < 0) return *DASM_POS2PTR(D, -pos); 475 | if (pos > 0) return -1; /* Undefined. */ 476 | } 477 | return -2; /* Unused or out of range. */ 478 | } 479 | 480 | #ifdef DASM_CHECKS 481 | /* Optional sanity checker to call between isolated encoding steps. */ 482 | int dasm_checkstep(Dst_DECL, int secmatch) 483 | { 484 | dasm_State *D = Dst_REF; 485 | if (D->status == DASM_S_OK) { 486 | int i; 487 | for (i = 1; i <= 9; i++) { 488 | if (D->lglabels[i] > 0) { D->status = DASM_S_UNDEF_L|i; break; } 489 | D->lglabels[i] = 0; 490 | } 491 | } 492 | if (D->status == DASM_S_OK && secmatch >= 0 && 493 | D->section != &D->sections[secmatch]) 494 | D->status = DASM_S_MATCH_SEC|(int)(D->section-D->sections); 495 | return D->status; 496 | } 497 | #endif 498 | 499 | -------------------------------------------------------------------------------- /dynasm.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------ 2 | -- DynASM. A dynamic assembler for code generation engines. 3 | -- Originally designed and implemented for LuaJIT. 4 | -- 5 | -- Copyright (C) 2005-2015 Mike Pall. All rights reserved. 6 | -- See below for full copyright notice. 7 | ------------------------------------------------------------------------------ 8 | 9 | -- Application information. 10 | local _info = { 11 | name = "DynASM", 12 | description = "A dynamic assembler for code generation engines", 13 | version = "1.4.0_luamode", 14 | vernum = 10400, 15 | release = "2015-10-18", 16 | author = "Mike Pall", 17 | url = "http://luajit.org/dynasm.html", 18 | license = "MIT", 19 | copyright = [[ 20 | Copyright (C) 2005-2015 Mike Pall. All rights reserved. 21 | 22 | Permission is hereby granted, free of charge, to any person obtaining 23 | a copy of this software and associated documentation files (the 24 | "Software"), to deal in the Software without restriction, including 25 | without limitation the rights to use, copy, modify, merge, publish, 26 | distribute, sublicense, and/or sell copies of the Software, and to 27 | permit persons to whom the Software is furnished to do so, subject to 28 | the following conditions: 29 | 30 | The above copyright notice and this permission notice shall be 31 | included in all copies or substantial portions of the Software. 32 | 33 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 34 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 35 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 36 | IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY 37 | CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, 38 | TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE 39 | SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 40 | 41 | [ MIT license: http://www.opensource.org/licenses/mit-license.php ] 42 | ]], 43 | } 44 | 45 | -- Cache library functions. 46 | local type, pairs, ipairs = type, pairs, ipairs 47 | local pcall, error, assert = pcall, error, assert 48 | local select, tostring = select, tostring 49 | local _s = string 50 | local sub, match, gmatch, gsub = _s.sub, _s.match, _s.gmatch, _s.gsub 51 | local format, rep, upper = _s.format, _s.rep, _s.upper 52 | local _t = table 53 | local insert, remove, concat, sort = _t.insert, _t.remove, _t.concat, _t.sort 54 | local exit = os.exit 55 | local io = io 56 | local stdin, stdout, stderr = io.stdin, io.stdout, io.stderr 57 | 58 | -- Helper to update a table with the elements another table. 59 | local function update(dst, src) 60 | if src then 61 | for k,v in pairs(src) do 62 | dst[k] = v 63 | end 64 | end 65 | return dst 66 | end 67 | 68 | ------------------------------------------------------------------------------ 69 | 70 | -- Program options. 71 | local g_opt 72 | 73 | -- Global state for current file. 74 | local g_fname, g_curline, g_indent, g_lineno, g_synclineno, g_arch 75 | local g_errcount 76 | 77 | -- Write buffer for output file. 78 | local g_wbuffer, g_capbuffer 79 | 80 | -- Map for defines (initially empty, chains to arch-specific map). 81 | local map_def 82 | 83 | -- Sections names. 84 | local map_sections 85 | 86 | -- The opcode map. Begins as an empty map set to inherit from map_initop, 87 | -- It's later changed to inherit from the arch-specific map, which itself is set 88 | -- to inherit from map_coreop. 89 | local map_op 90 | 91 | -- The initial opcode map to initialize map_op with on each global reset. 92 | local map_initop = {} 93 | 94 | -- Dummy action flush function. Replaced with arch-specific function later. 95 | local function dummy_wflush(term) end 96 | local wflush 97 | 98 | -- Init/reset the global state for processing a new file. 99 | local function reset() 100 | g_opt = {} 101 | g_opt.dumpdef = 0 102 | g_opt.include = { "" } 103 | g_opt.lang = nil 104 | g_opt.comment = "//|" 105 | g_opt.endcomment = "" 106 | g_opt.cpp = true -- Write `#line` directives 107 | g_fname = nil 108 | g_curline = nil 109 | g_indent = "" 110 | g_lineno = 0 111 | g_synclineno = -1 112 | g_errcount = 0 113 | g_arch = nil 114 | g_wbuffer = {} 115 | g_capbuffer = nil 116 | map_def = {} 117 | map_sections = {} 118 | map_op = setmetatable({}, { __index = map_initop }) 119 | wflush = dummy_wflush 120 | end 121 | 122 | ------------------------------------------------------------------------------ 123 | 124 | -- Write an output line (or callback function) to the buffer. 125 | local function wline(line, needindent) 126 | local buf = g_capbuffer or g_wbuffer 127 | buf[#buf+1] = needindent and g_indent..line or line 128 | g_synclineno = g_synclineno + 1 129 | end 130 | 131 | -- Write assembler line as a comment, if requestd. 132 | local function wcomment(aline) 133 | if g_opt.comment then 134 | wline(g_opt.comment..aline..g_opt.endcomment, true) 135 | end 136 | end 137 | 138 | -- Resync CPP line numbers. 139 | local function wsync() 140 | if g_synclineno ~= g_lineno and g_opt.cpp then 141 | if not g_opt.lang == "lua" then 142 | wline("#line "..g_lineno..' "'..g_fname..'"') 143 | end 144 | g_synclineno = g_lineno 145 | end 146 | end 147 | 148 | -- Dump all buffered output lines. 149 | local function wdumplines(out, buf) 150 | for _,line in ipairs(buf) do 151 | if type(line) == "string" then 152 | assert(out:write(line, "\n")) 153 | else 154 | -- Special callback to dynamically insert lines after end of processing. 155 | line(out) 156 | end 157 | end 158 | end 159 | 160 | ------------------------------------------------------------------------------ 161 | 162 | -- Emit an error. Processing continues with next statement. 163 | local function werror(msg) 164 | error(format("%s:%s: error: %s:\n%s", g_fname, g_lineno, msg, g_curline), 0) 165 | end 166 | 167 | -- Emit a fatal error. Processing stops. 168 | local function wfatal(msg) 169 | g_errcount = "fatal" 170 | werror(msg) 171 | end 172 | 173 | -- Print a warning. Processing continues. 174 | local function wwarn(msg) 175 | stderr:write(format("%s:%s: warning: %s:\n%s\n", 176 | g_fname, g_lineno, msg, g_curline)) 177 | end 178 | 179 | -- Print caught error message. But suppress excessive errors. 180 | local function wprinterr(...) 181 | if type(g_errcount) == "number" then 182 | -- Regular error. 183 | g_errcount = g_errcount + 1 184 | if g_errcount < 21 then -- Seems to be a reasonable limit. 185 | stderr:write(...) 186 | elseif g_errcount == 21 then 187 | stderr:write(g_fname, 188 | ":*: warning: too many errors (suppressed further messages).\n") 189 | end 190 | else 191 | -- Fatal error. 192 | stderr:write(...) 193 | return true -- Stop processing. 194 | end 195 | end 196 | 197 | ------------------------------------------------------------------------------ 198 | 199 | -- Map holding all option handlers. 200 | local opt_map = {} 201 | local opt_current 202 | 203 | -- Print error and exit with error status. 204 | local function opterror(...) 205 | stderr:write("dynasm.lua: ERROR: ", ...) 206 | stderr:write("\n") 207 | exit(1) 208 | end 209 | 210 | -- Get option parameter. 211 | local function optparam(args) 212 | local argn = args.argn 213 | local p = args[argn] 214 | if not p then 215 | opterror("missing parameter for option `", opt_current, "'.") 216 | end 217 | args.argn = argn + 1 218 | return p 219 | end 220 | 221 | ------------------------------------------------------------------------------ 222 | 223 | -- Core pseudo-opcodes. 224 | local map_coreop = {} 225 | 226 | -- Forward declarations. 227 | local dostmt 228 | local readfile 229 | 230 | ------------------------------------------------------------------------------ 231 | 232 | -- Pseudo-opcode to define a substitution. 233 | map_coreop[".define_2"] = function(params, nparams) 234 | if not params then return nparams == 1 and "name" or "name, subst" end 235 | local name, def = params[1], params[2] or "1" 236 | if not match(name, "^[%a_][%w_]*$") then werror("bad or duplicate define") end 237 | map_def[name] = def 238 | end 239 | map_coreop[".define_1"] = map_coreop[".define_2"] 240 | 241 | -- Define a substitution on the command line. 242 | function opt_map.D(args) 243 | local namesubst = optparam(args) 244 | local name, subst = match(namesubst, "^([%a_][%w_]*)=(.*)$") 245 | if name then 246 | map_def[name] = subst 247 | elseif match(namesubst, "^[%a_][%w_]*$") then 248 | map_def[namesubst] = "1" 249 | else 250 | opterror("bad define") 251 | end 252 | end 253 | 254 | -- Undefine a substitution on the command line. 255 | function opt_map.U(args) 256 | local name = optparam(args) 257 | if match(name, "^[%a_][%w_]*$") then 258 | map_def[name] = nil 259 | else 260 | opterror("bad define") 261 | end 262 | end 263 | 264 | -- Helper for definesubst. 265 | local gotsubst 266 | 267 | local function definesubst_one(word) 268 | local subst = map_def[word] 269 | if subst then gotsubst = word; return subst else return word end 270 | end 271 | 272 | -- Iteratively substitute defines. 273 | local function definesubst(stmt) 274 | -- Limit number of iterations. 275 | for i=1,100 do 276 | gotsubst = false 277 | stmt = gsub(stmt, "#?[%w_]+", definesubst_one) 278 | if not gotsubst then break end 279 | end 280 | if gotsubst then wfatal("recursive define involving `"..gotsubst.."'") end 281 | return stmt 282 | end 283 | 284 | -- Dump all defines. 285 | local function dumpdefines(out, lvl) 286 | local t = {} 287 | for name in pairs(map_def) do 288 | t[#t+1] = name 289 | end 290 | sort(t) 291 | out:write("Defines:\n") 292 | for _,name in ipairs(t) do 293 | local subst = map_def[name] 294 | if g_arch then subst = g_arch.revdef(subst) end 295 | out:write(format(" %-20s %s\n", name, subst)) 296 | end 297 | out:write("\n") 298 | end 299 | 300 | ------------------------------------------------------------------------------ 301 | 302 | -- Support variables for conditional assembly. 303 | local condlevel = 0 304 | local condstack = {} 305 | 306 | -- Evaluate condition with a Lua expression. Substitutions already performed. 307 | local function cond_eval(cond) 308 | local func, err 309 | if setfenv then 310 | func, err = loadstring("return "..cond, "=expr") 311 | else 312 | -- No globals. All unknown identifiers evaluate to nil. 313 | func, err = load("return "..cond, "=expr", "t", {}) 314 | end 315 | if func then 316 | if setfenv then 317 | setfenv(func, {}) -- No globals. All unknown identifiers evaluate to nil. 318 | end 319 | local ok, res = pcall(func) 320 | if ok then 321 | if res == 0 then return false end -- Oh well. 322 | return not not res 323 | end 324 | err = res 325 | end 326 | wfatal("bad condition: "..err) 327 | end 328 | 329 | -- Skip statements until next conditional pseudo-opcode at the same level. 330 | local function stmtskip() 331 | local dostmt_save = dostmt 332 | local lvl = 0 333 | dostmt = function(stmt) 334 | local op = match(stmt, "^%s*(%S+)") 335 | if op == ".if" then 336 | lvl = lvl + 1 337 | elseif lvl ~= 0 then 338 | if op == ".endif" then lvl = lvl - 1 end 339 | elseif op == ".elif" or op == ".else" or op == ".endif" then 340 | dostmt = dostmt_save 341 | dostmt(stmt) 342 | end 343 | end 344 | end 345 | 346 | -- Pseudo-opcodes for conditional assembly. 347 | map_coreop[".if_1"] = function(params) 348 | if not params then return "condition" end 349 | local lvl = condlevel + 1 350 | local res = cond_eval(params[1]) 351 | condlevel = lvl 352 | condstack[lvl] = res 353 | if not res then stmtskip() end 354 | end 355 | 356 | map_coreop[".elif_1"] = function(params) 357 | if not params then return "condition" end 358 | if condlevel == 0 then wfatal(".elif without .if") end 359 | local lvl = condlevel 360 | local res = condstack[lvl] 361 | if res then 362 | if res == "else" then wfatal(".elif after .else") end 363 | else 364 | res = cond_eval(params[1]) 365 | if res then 366 | condstack[lvl] = res 367 | return 368 | end 369 | end 370 | stmtskip() 371 | end 372 | 373 | map_coreop[".else_0"] = function(params) 374 | if condlevel == 0 then wfatal(".else without .if") end 375 | local lvl = condlevel 376 | local res = condstack[lvl] 377 | condstack[lvl] = "else" 378 | if res then 379 | if res == "else" then wfatal(".else after .else") end 380 | stmtskip() 381 | end 382 | end 383 | 384 | map_coreop[".endif_0"] = function(params) 385 | local lvl = condlevel 386 | if lvl == 0 then wfatal(".endif without .if") end 387 | condlevel = lvl - 1 388 | end 389 | 390 | -- Check for unfinished conditionals. 391 | local function checkconds() 392 | if g_errcount ~= "fatal" and condlevel ~= 0 then 393 | wprinterr(g_fname, ":*: error: unbalanced conditional\n") 394 | end 395 | end 396 | 397 | ------------------------------------------------------------------------------ 398 | 399 | -- Search for a file in the given path and open it for reading. 400 | local function pathopen(path, name) 401 | local dirsep = package and match(package.path, "\\") and "\\" or "/" 402 | for _,p in ipairs(path) do 403 | local fullname = p == "" and name or p..dirsep..name 404 | local fin = io.open(fullname, "r") 405 | if fin then 406 | g_fname = fullname 407 | return fin 408 | end 409 | end 410 | end 411 | 412 | -- Include a file. 413 | map_coreop[".include_1"] = function(params) 414 | if not params then return "filename" end 415 | local name = params[1] 416 | -- Save state. Ugly, I know. but upvalues are fast. 417 | local gf, gl, gcl, gi = g_fname, g_lineno, g_curline, g_indent 418 | -- Read the included file. 419 | local fatal = readfile(pathopen(g_opt.include, name) or 420 | wfatal("include file `"..name.."' not found")) 421 | -- Restore state. 422 | g_synclineno = -1 423 | g_fname, g_lineno, g_curline, g_indent = gf, gl, gcl, gi 424 | if fatal then wfatal("in include file") end 425 | end 426 | 427 | -- Make .include and conditionals initially available, too. 428 | map_initop[".include_1"] = map_coreop[".include_1"] 429 | map_initop[".if_1"] = map_coreop[".if_1"] 430 | map_initop[".elif_1"] = map_coreop[".elif_1"] 431 | map_initop[".else_0"] = map_coreop[".else_0"] 432 | map_initop[".endif_0"] = map_coreop[".endif_0"] 433 | 434 | ------------------------------------------------------------------------------ 435 | 436 | -- Support variables for macros. 437 | local mac_capture, mac_lineno, mac_name 438 | local mac_active = {} 439 | local mac_list = {} 440 | 441 | -- Pseudo-opcode to define a macro. 442 | map_coreop[".macro_*"] = function(mparams) 443 | if not mparams then return "name [, params...]" end 444 | -- Split off and validate macro name. 445 | local name = remove(mparams, 1) 446 | if not name then werror("missing macro name") end 447 | if not (match(name, "^[%a_][%w_%.]*$") or match(name, "^%.[%w_%.]*$")) then 448 | wfatal("bad macro name `"..name.."'") 449 | end 450 | -- Validate macro parameter names. 451 | local mdup = {} 452 | for _,mp in ipairs(mparams) do 453 | if not match(mp, "^[%a_][%w_]*$") then 454 | wfatal("bad macro parameter name `"..mp.."'") 455 | end 456 | if mdup[mp] then wfatal("duplicate macro parameter name `"..mp.."'") end 457 | mdup[mp] = true 458 | end 459 | -- Check for duplicate or recursive macro definitions. 460 | local opname = name.."_"..#mparams 461 | if map_op[opname] or map_op[name.."_*"] then 462 | wfatal("duplicate macro `"..name.."' ("..#mparams.." parameters)") 463 | end 464 | if mac_capture then wfatal("recursive macro definition") end 465 | 466 | -- Enable statement capture. 467 | local lines = {} 468 | mac_lineno = g_lineno 469 | mac_name = name 470 | mac_capture = function(stmt) -- Statement capture function. 471 | -- Stop macro definition with .endmacro pseudo-opcode. 472 | if not match(stmt, "^%s*.endmacro%s*$") then 473 | lines[#lines+1] = stmt 474 | return 475 | end 476 | mac_capture = nil 477 | mac_lineno = nil 478 | mac_name = nil 479 | mac_list[#mac_list+1] = opname 480 | -- Add macro-op definition. 481 | map_op[opname] = function(params) 482 | if not params then return mparams, lines end 483 | -- Protect against recursive macro invocation. 484 | if mac_active[opname] then wfatal("recursive macro invocation") end 485 | mac_active[opname] = true 486 | -- Setup substitution map. 487 | local subst = {} 488 | for i,mp in ipairs(mparams) do subst[mp] = params[i] end 489 | local mcom 490 | if g_opt.maccomment and g_opt.comment then 491 | mcom = " MACRO "..name.." ("..#mparams..")" 492 | wcomment("{"..mcom) 493 | end 494 | -- Loop through all captured statements 495 | for _,stmt in ipairs(lines) do 496 | -- Substitute macro parameters. 497 | local st = gsub(stmt, "[%w_]+", subst) 498 | st = definesubst(st) 499 | st = gsub(st, "%s*%.%.%s*", "") -- Token paste a..b. 500 | if mcom and sub(st, 1, 1) ~= "|" then wcomment(st) end 501 | -- Emit statement. Use a protected call for better diagnostics. 502 | local ok, err = pcall(dostmt, st) 503 | if not ok then 504 | -- Add the captured statement to the error. 505 | wprinterr(err, "\n", g_indent, "| ", stmt, 506 | "\t[MACRO ", name, " (", #mparams, ")]\n") 507 | end 508 | end 509 | if mcom then wcomment("}"..mcom) end 510 | mac_active[opname] = nil 511 | end 512 | end 513 | end 514 | 515 | -- An .endmacro pseudo-opcode outside of a macro definition is an error. 516 | map_coreop[".endmacro_0"] = function(params) 517 | wfatal(".endmacro without .macro") 518 | end 519 | 520 | -- Dump all macros and their contents (with -PP only). 521 | local function dumpmacros(out, lvl) 522 | sort(mac_list) 523 | out:write("Macros:\n") 524 | for _,opname in ipairs(mac_list) do 525 | local name = sub(opname, 1, -3) 526 | local params, lines = map_op[opname]() 527 | out:write(format(" %-20s %s\n", name, concat(params, ", "))) 528 | if lvl > 1 then 529 | for _,line in ipairs(lines) do 530 | out:write(" |", line, "\n") 531 | end 532 | out:write("\n") 533 | end 534 | end 535 | out:write("\n") 536 | end 537 | 538 | -- Check for unfinished macro definitions. 539 | local function checkmacros() 540 | if mac_capture then 541 | wprinterr(g_fname, ":", mac_lineno, 542 | ": error: unfinished .macro `", mac_name ,"'\n") 543 | end 544 | end 545 | 546 | ------------------------------------------------------------------------------ 547 | 548 | -- Support variables for captures. 549 | local cap_lineno, cap_name 550 | local cap_buffers = {} 551 | local cap_used = {} 552 | 553 | -- Start a capture. 554 | map_coreop[".capture_1"] = function(params) 555 | if not params then return "name" end 556 | wflush() 557 | local name = params[1] 558 | if not match(name, "^[%a_][%w_]*$") then 559 | wfatal("bad capture name `"..name.."'") 560 | end 561 | if cap_name then 562 | wfatal("already capturing to `"..cap_name.."' since line "..cap_lineno) 563 | end 564 | cap_name = name 565 | cap_lineno = g_lineno 566 | -- Create or continue a capture buffer and start the output line capture. 567 | local buf = cap_buffers[name] 568 | if not buf then buf = {}; cap_buffers[name] = buf end 569 | g_capbuffer = buf 570 | g_synclineno = 0 571 | end 572 | 573 | -- Stop a capture. 574 | map_coreop[".endcapture_0"] = function(params) 575 | wflush() 576 | if not cap_name then wfatal(".endcapture without a valid .capture") end 577 | cap_name = nil 578 | cap_lineno = nil 579 | g_capbuffer = nil 580 | g_synclineno = 0 581 | end 582 | 583 | -- Dump a capture buffer. 584 | map_coreop[".dumpcapture_1"] = function(params) 585 | if not params then return "name" end 586 | wflush() 587 | local name = params[1] 588 | if not match(name, "^[%a_][%w_]*$") then 589 | wfatal("bad capture name `"..name.."'") 590 | end 591 | cap_used[name] = true 592 | wline(function(out) 593 | local buf = cap_buffers[name] 594 | if buf then wdumplines(out, buf) end 595 | end) 596 | g_synclineno = 0 597 | end 598 | 599 | -- Dump all captures and their buffers (with -PP only). 600 | local function dumpcaptures(out, lvl) 601 | out:write("Captures:\n") 602 | for name,buf in pairs(cap_buffers) do 603 | out:write(format(" %-20s %4s)\n", name, "("..#buf)) 604 | if lvl > 1 then 605 | local bar = rep("=", 76) 606 | out:write(" ", bar, "\n") 607 | for _,line in ipairs(buf) do 608 | out:write(" ", line, "\n") 609 | end 610 | out:write(" ", bar, "\n\n") 611 | end 612 | end 613 | out:write("\n") 614 | end 615 | 616 | -- Check for unfinished or unused captures. 617 | local function checkcaptures() 618 | if cap_name then 619 | wprinterr(g_fname, ":", cap_lineno, 620 | ": error: unfinished .capture `", cap_name,"'\n") 621 | return 622 | end 623 | for name in pairs(cap_buffers) do 624 | if not cap_used[name] then 625 | wprinterr(g_fname, ":*: error: missing .dumpcapture ", name ,"\n") 626 | end 627 | end 628 | end 629 | 630 | ------------------------------------------------------------------------------ 631 | 632 | -- Pseudo-opcode to define code sections. 633 | -- TODO: Data sections, BSS sections. Needs extra C code and API. 634 | map_coreop[".section_*"] = function(params) 635 | if not params then return "name..." end 636 | if #map_sections > 0 then werror("duplicate section definition") end 637 | wflush() 638 | for sn,name in ipairs(params) do 639 | local opname = "."..name.."_0" 640 | if not match(name, "^[%a][%w_]*$") or 641 | map_op[opname] or map_op["."..name.."_*"] then 642 | werror("bad section name `"..name.."'") 643 | end 644 | map_sections[#map_sections+1] = name 645 | if g_opt.lang == "lua" then 646 | wline(format("local DASM_SECTION_%s\t= %d", upper(name), sn-1)) 647 | else 648 | wline(format("#define DASM_SECTION_%s\t%d", upper(name), sn-1)) 649 | end 650 | map_op[opname] = function(params) g_arch.section(sn-1) end 651 | end 652 | if g_opt.lang == "lua" then 653 | wline(format("local DASM_MAXSECTION\t= %d", #map_sections)) 654 | else 655 | wline(format("#define DASM_MAXSECTION\t\t%d", #map_sections)) 656 | end 657 | end 658 | 659 | -- Dump all sections. 660 | local function dumpsections(out, lvl) 661 | out:write("Sections:\n") 662 | for _,name in ipairs(map_sections) do 663 | out:write(format(" %s\n", name)) 664 | end 665 | out:write("\n") 666 | end 667 | 668 | ------------------------------------------------------------------------------ 669 | 670 | -- Replacement for customized Lua, which lacks the package library. 671 | local prefix = "" 672 | if not require then 673 | function require(name) 674 | local fp = assert(io.open(prefix..name..".lua")) 675 | local s = fp:read("*a") 676 | assert(fp:close()) 677 | return assert(loadstring(s, "@"..name..".lua"))() 678 | end 679 | end 680 | 681 | -- Load architecture-specific module. 682 | local function loadarch(arch) 683 | if not match(arch, "^[%w_]+$") then return "bad arch name" end 684 | local ok, m_arch = pcall(require, "dasm_"..arch) 685 | if not ok then return "cannot load module: "..m_arch end 686 | g_arch = m_arch 687 | wflush = m_arch.passcb(wline, werror, wfatal, wwarn) 688 | m_arch.setup(arch, g_opt) 689 | local arch_map_op 690 | arch_map_op, map_def = m_arch.mergemaps(map_coreop, map_def) 691 | map_op = setmetatable(map_op, { __index = arch_map_op }) 692 | end 693 | 694 | -- Dump architecture description. 695 | function opt_map.dumparch(args) 696 | local name = optparam(args) 697 | if not g_arch then 698 | local err = loadarch(name) 699 | if err then opterror(err) end 700 | end 701 | 702 | local t = {} 703 | for name in pairs(map_coreop) do t[#t+1] = name end 704 | for name in pairs(map_op) do t[#t+1] = name end 705 | sort(t) 706 | 707 | local out = stdout 708 | local _arch = g_arch._info 709 | out:write(format("%s version %s, released %s, %s\n", 710 | _info.name, _info.version, _info.release, _info.url)) 711 | g_arch.dumparch(out) 712 | 713 | local pseudo = true 714 | out:write("Pseudo-Opcodes:\n") 715 | for _,sname in ipairs(t) do 716 | local name, nparam = match(sname, "^(.+)_([0-9%*])$") 717 | if name then 718 | if pseudo and sub(name, 1, 1) ~= "." then 719 | out:write("\nOpcodes:\n") 720 | pseudo = false 721 | end 722 | local f = map_op[sname] 723 | local s 724 | if nparam ~= "*" then nparam = nparam + 0 end 725 | if nparam == 0 then 726 | s = "" 727 | elseif type(f) == "string" then 728 | s = map_op[".template__"](nil, f, nparam) 729 | else 730 | s = f(nil, nparam) 731 | end 732 | if type(s) == "table" then 733 | for _,s2 in ipairs(s) do 734 | out:write(format(" %-12s %s\n", name, s2)) 735 | end 736 | else 737 | out:write(format(" %-12s %s\n", name, s)) 738 | end 739 | end 740 | end 741 | out:write("\n") 742 | exit(0) 743 | end 744 | 745 | -- Pseudo-opcode to set the architecture. 746 | -- Only initially available (map_op is replaced when called). 747 | map_initop[".arch_1"] = function(params) 748 | if not params then return "name" end 749 | local err = loadarch(params[1]) 750 | if err then wfatal(err) end 751 | if g_opt.lang == "lua" then 752 | wline(format("if dasm._VERSION ~= %d then", _info.vernum)) 753 | wline(' error("Version mismatch between DynASM and included encoding engine")') 754 | wline("end") 755 | else 756 | wline(format("#if DASM_VERSION != %d", _info.vernum)) 757 | wline('#error "Version mismatch between DynASM and included encoding engine"') 758 | wline("#endif") 759 | end 760 | end 761 | 762 | -- Dummy .arch pseudo-opcode to improve the error report. 763 | map_coreop[".arch_1"] = function(params) 764 | if not params then return "name" end 765 | if g_arch._info.arch ~= params[1] then 766 | wfatal("invalid .arch statement. arch already loaded: `", g_arch._info.arch, "`.") 767 | end 768 | end 769 | 770 | ------------------------------------------------------------------------------ 771 | 772 | -- Dummy pseudo-opcode. Don't confuse '.nop' with 'nop'. 773 | map_coreop[".nop_*"] = function(params) 774 | if not params then return "[ignored...]" end 775 | end 776 | 777 | -- Pseudo-opcodes to raise errors. 778 | map_coreop[".error_1"] = function(params) 779 | if not params then return "message" end 780 | werror(params[1]) 781 | end 782 | 783 | map_coreop[".fatal_1"] = function(params) 784 | if not params then return "message" end 785 | wfatal(params[1]) 786 | end 787 | 788 | -- Dump all user defined elements. 789 | local function dumpdef(out) 790 | local lvl = g_opt.dumpdef 791 | if lvl == 0 then return end 792 | dumpsections(out, lvl) 793 | dumpdefines(out, lvl) 794 | if g_arch then g_arch.dumpdef(out, lvl) end 795 | dumpmacros(out, lvl) 796 | dumpcaptures(out, lvl) 797 | end 798 | 799 | ------------------------------------------------------------------------------ 800 | 801 | -- Helper for splitstmt. 802 | local splitlvl 803 | 804 | local function splitstmt_one(c) 805 | if c == "(" then 806 | splitlvl = ")"..splitlvl 807 | elseif c == "[" then 808 | splitlvl = "]"..splitlvl 809 | elseif c == "{" then 810 | splitlvl = "}"..splitlvl 811 | elseif c == ")" or c == "]" or c == "}" then 812 | if sub(splitlvl, 1, 1) ~= c then werror("unbalanced (), [] or {}") end 813 | splitlvl = sub(splitlvl, 2) 814 | elseif splitlvl == "" then 815 | return " \0 " 816 | end 817 | return c 818 | end 819 | 820 | -- Split statement into (pseudo-)opcode and params. 821 | local function splitstmt(stmt) 822 | -- Convert label with trailing-colon into .label statement. 823 | local label = match(stmt, "^%s*(.+):%s*$") 824 | if label then return ".label", {label} end 825 | 826 | -- Split at commas and equal signs, but obey parentheses and brackets. 827 | splitlvl = "" 828 | stmt = gsub(stmt, "[,%(%)%[%]{}]", splitstmt_one) 829 | if splitlvl ~= "" then werror("unbalanced () or []") end 830 | 831 | -- Split off opcode. 832 | local op, other = match(stmt, "^%s*([^%s%z]+)%s*(.*)$") 833 | if not op then werror("bad statement syntax") end 834 | 835 | -- Split parameters. 836 | local params = {} 837 | for p in gmatch(other, "%s*(%Z+)%z?") do 838 | params[#params+1] = gsub(p, "%s+$", "") 839 | end 840 | if #params > 16 then werror("too many parameters") end 841 | 842 | params.op = op 843 | return op, params 844 | end 845 | 846 | -- Process a single statement. 847 | dostmt = function(stmt) 848 | -- Ignore empty statements. 849 | if match(stmt, "^%s*$") then return end 850 | 851 | -- Capture macro defs before substitution. 852 | if mac_capture then return mac_capture(stmt) end 853 | stmt = definesubst(stmt) 854 | 855 | -- Emit C code without parsing the line. 856 | if sub(stmt, 1, 1) == "|" then 857 | local tail = sub(stmt, 2) 858 | wflush() 859 | if sub(tail, 1, 2) == "//" then wcomment(tail) else wline(tail, true) end 860 | return 861 | end 862 | 863 | -- Split into (pseudo-)opcode and params. 864 | local op, params = splitstmt(stmt) 865 | 866 | -- Get opcode handler (matching # of parameters or generic handler). 867 | local f = map_op[op.."_"..#params] or map_op[op.."_*"] 868 | if not f then 869 | if not g_arch then wfatal("first statement must be .arch") end 870 | -- Improve error report. 871 | for i=0,9 do 872 | if map_op[op.."_"..i] then 873 | werror("wrong number of parameters for `"..op.."'") 874 | end 875 | end 876 | werror("unknown statement `"..op.."'") 877 | end 878 | 879 | -- Call opcode handler or special handler for template strings. 880 | if type(f) == "string" then 881 | map_op[".template__"](params, f) 882 | else 883 | f(params) 884 | end 885 | end 886 | 887 | -- Process a single line. 888 | local function doline(line) 889 | if g_opt.flushline then wflush() end 890 | 891 | -- Assembler line? 892 | local indent, aline = match(line, "^(%s*)%|(.*)$") 893 | if not aline then 894 | -- No, plain C code line, need to flush first. 895 | wflush() 896 | wsync() 897 | wline(line, false) 898 | return 899 | end 900 | 901 | g_indent = indent -- Remember current line indentation. 902 | 903 | -- Emit C code (even from macros). Avoids echo and line parsing. 904 | if sub(aline, 1, 1) == "|" then 905 | if not mac_capture then 906 | wsync() 907 | elseif g_opt.comment then 908 | wsync() 909 | wcomment(aline) 910 | end 911 | dostmt(aline) 912 | return 913 | end 914 | 915 | -- Echo assembler line as a comment. 916 | if g_opt.comment then 917 | wsync() 918 | wcomment(aline) 919 | end 920 | 921 | -- Strip assembler comments. 922 | aline = gsub(aline, "//.*$", "") 923 | if g_opt.lang == "lua" then 924 | aline = gsub(aline, "%-%-.*$", "") 925 | end 926 | 927 | -- Split line into statements at semicolons. 928 | if match(aline, ";") then 929 | for stmt in gmatch(aline, "[^;]+") do dostmt(stmt) end 930 | else 931 | dostmt(aline) 932 | end 933 | end 934 | 935 | ------------------------------------------------------------------------------ 936 | 937 | -- Write DynASM header. 938 | local function dasmhead(out) 939 | if not g_opt.comment then return end 940 | if g_opt.lang == "lua" then 941 | out:write(format([[ 942 | -- 943 | -- This file has been pre-processed with DynASM. 944 | -- %s 945 | -- DynASM version %s, DynASM %s version %s 946 | -- DO NOT EDIT! The original file is in "%s". 947 | -- 948 | 949 | ]], _info.url, 950 | _info.version, g_arch._info.arch, g_arch._info.version, 951 | g_fname)) 952 | else 953 | out:write(format([[ 954 | /* 955 | ** This file has been pre-processed with DynASM. 956 | ** %s 957 | ** DynASM version %s, DynASM %s version %s 958 | ** DO NOT EDIT! The original file is in "%s". 959 | */ 960 | 961 | ]], _info.url, 962 | _info.version, g_arch._info.arch, g_arch._info.version, 963 | g_fname)) 964 | end 965 | end 966 | 967 | -- Read input file. 968 | readfile = function(fin) 969 | -- Process all lines. 970 | for line in fin:lines() do 971 | g_lineno = g_lineno + 1 972 | g_curline = line 973 | local ok, err = pcall(doline, line) 974 | if not ok and wprinterr(err, "\n") then return true end 975 | end 976 | wflush() 977 | 978 | -- Close input file. 979 | assert(fin == stdin or fin:close()) 980 | end 981 | 982 | -- Write output file. 983 | local function writefile(outfile) 984 | local fout 985 | 986 | -- Open output file. 987 | if outfile == nil or outfile == "-" then 988 | fout = stdout 989 | elseif type(outfile) == "string" then 990 | fout = assert(io.open(outfile, "w")) 991 | else 992 | fout = outfile 993 | end 994 | 995 | -- Write all buffered lines 996 | wdumplines(fout, g_wbuffer) 997 | 998 | -- Close output file. 999 | assert(fout == stdout or fout:close()) 1000 | 1001 | -- Optionally dump definitions. 1002 | dumpdef(fout == stdout and stderr or stdout) 1003 | end 1004 | 1005 | -- Translate an input file to an output file. 1006 | local function translate(infile, outfile) 1007 | 1008 | -- Put header. 1009 | wline(dasmhead) 1010 | 1011 | -- Read input file. 1012 | local fin 1013 | if infile == "-" then 1014 | g_fname = "(stdin)" 1015 | fin = stdin 1016 | elseif type(infile) == "string" then 1017 | g_fname = infile 1018 | fin = assert(io.open(infile, "r")) 1019 | else 1020 | g_fname = "=(translate)" 1021 | fin = infile 1022 | end 1023 | readfile(fin) 1024 | 1025 | -- Check for errors. 1026 | if not g_arch then 1027 | wprinterr(g_fname, ":*: error: missing .arch directive\n") 1028 | end 1029 | checkconds() 1030 | checkmacros() 1031 | checkcaptures() 1032 | 1033 | if g_errcount ~= 0 then 1034 | stderr:write(g_fname, ":*: info: ", g_errcount, " error", 1035 | (type(g_errcount) == "number" and g_errcount > 1) and "s" or "", 1036 | " in input file -- no output file generated.\n") 1037 | dumpdef(stderr) 1038 | exit(1) 1039 | end 1040 | 1041 | -- Write output file. 1042 | writefile(outfile) 1043 | end 1044 | 1045 | ------------------------------------------------------------------------------ 1046 | 1047 | -- Print help text. 1048 | function opt_map.help() 1049 | stdout:write("DynASM -- ", _info.description, ".\n") 1050 | stdout:write("DynASM ", _info.version, " ", _info.release, " ", _info.url, "\n") 1051 | stdout:write[[ 1052 | 1053 | Usage: dynasm [OPTION]... INFILE.dasc|INFILE.dasl|- 1054 | 1055 | -h, --help Display this help text. 1056 | -V, --version Display version and copyright information. 1057 | 1058 | -o, --outfile FILE Output file name (default is stdout). 1059 | -I, --include DIR Add directory to the include search path. 1060 | 1061 | -l, --lang C|Lua Generate C or Lua code (default C for dasc, Lua for dasl). 1062 | 1063 | -c, --ccomment Use /* */ comments for assembler lines. 1064 | -C, --cppcomment Use // comments for assembler lines (default). 1065 | -N, --nocomment Suppress assembler lines in output. 1066 | -M, --maccomment Show macro expansions as comments (default off). 1067 | 1068 | -L, --nolineno Suppress CPP line number information in output. 1069 | -F, --flushline Flush action list for every line. 1070 | 1071 | -D NAME[=SUBST] Define a substitution. 1072 | -U NAME Undefine a substitution. 1073 | 1074 | -P, --dumpdef Dump defines, macros, etc. Repeat for more output. 1075 | -A, --dumparch ARCH Load architecture ARCH and dump description. 1076 | ]] 1077 | exit(0) 1078 | end 1079 | 1080 | -- Print version information. 1081 | function opt_map.version() 1082 | stdout:write(format("%s version %s, released %s\n%s\n\n%s", 1083 | _info.name, _info.version, _info.release, _info.url, _info.copyright)) 1084 | exit(0) 1085 | end 1086 | 1087 | -- Misc. options. 1088 | function opt_map.lang(args) g_opt.lang = optparam(args):lower() end 1089 | function opt_map.outfile(args) g_opt.outfile = optparam(args) end 1090 | function opt_map.include(args) insert(g_opt.include, 1, optparam(args)) end 1091 | function opt_map.ccomment() g_opt.comment = "/*|"; g_opt.endcomment = " */" end 1092 | function opt_map.cppcomment() g_opt.comment = "//|"; g_opt.endcomment = "" end 1093 | function opt_map.nocomment() g_opt.comment = false end 1094 | function opt_map.maccomment() g_opt.maccomment = true end 1095 | function opt_map.nolineno() g_opt.cpp = false end 1096 | function opt_map.flushline() g_opt.flushline = true end 1097 | function opt_map.dumpdef() g_opt.dumpdef = g_opt.dumpdef + 1 end 1098 | 1099 | ------------------------------------------------------------------------------ 1100 | 1101 | -- Short aliases for long options. 1102 | local opt_alias = { 1103 | h = "help", ["?"] = "help", V = "version", 1104 | o = "outfile", I = "include", 1105 | l = "lang", 1106 | c = "ccomment", C = "cppcomment", N = "nocomment", M = "maccomment", 1107 | L = "nolineno", F = "flushline", 1108 | P = "dumpdef", A = "dumparch", 1109 | } 1110 | 1111 | -- Parse single option. 1112 | local function parseopt(opt, args) 1113 | opt_current = #opt == 1 and "-"..opt or "--"..opt 1114 | local f = opt_map[opt] or opt_map[opt_alias[opt]] 1115 | if not f then 1116 | opterror("unrecognized option `", opt_current, "'. Try `--help'.\n") 1117 | end 1118 | f(args) 1119 | end 1120 | 1121 | local languages = {c = true, lua = true} 1122 | local langext = {dasc = "c", dasl = "lua"} 1123 | 1124 | --Set language options (Lua or C code gen) based on file extension. 1125 | local function setlang(infile) 1126 | 1127 | -- Infer language from file extension, if `lang` not set. 1128 | if not g_opt.lang and type(infile) == "string" then 1129 | g_opt.lang = langext[match(infile, "%.([^%.]+)$")] or "c" 1130 | end 1131 | 1132 | -- Check that the `lang` option is valid. 1133 | if not languages[g_opt.lang] then 1134 | opterror("invalid language `", tostring(g_opt.lang), "`.") 1135 | end 1136 | 1137 | -- Adjust comment options for Lua mode. 1138 | if g_opt.lang == "lua" then 1139 | if g_opt.comment then 1140 | g_opt.cpp = false 1141 | g_opt.comment = "--|" 1142 | g_opt.endcomment = "" 1143 | end 1144 | -- Set initial defines only available in Lua mode. 1145 | local ffi = require("ffi") 1146 | map_def.ARCH = ffi.arch --for `.arch ARCH` 1147 | map_def[upper(ffi.arch)] = 1 --for `.if X86 ...` 1148 | map_def.OS = ffi.os --for `.if OS == 'Windows'` 1149 | map_def[upper(ffi.os)] = 1 --for `.if WINDOWS ...` 1150 | end 1151 | end 1152 | 1153 | -- Parse arguments. 1154 | local function parseargs(args) 1155 | 1156 | --Reset globals. 1157 | reset() 1158 | 1159 | -- Process all option arguments. 1160 | args.argn = 1 1161 | repeat 1162 | local a = args[args.argn] 1163 | if not a then break end 1164 | local lopt, opt = match(a, "^%-(%-?)(.+)") 1165 | if not opt then break end 1166 | args.argn = args.argn + 1 1167 | if lopt == "" then 1168 | -- Loop through short options. 1169 | for o in gmatch(opt, ".") do parseopt(o, args) end 1170 | else 1171 | -- Long option. 1172 | parseopt(opt, args) 1173 | end 1174 | until false 1175 | 1176 | -- Check for proper number of arguments. 1177 | local nargs = #args - args.argn + 1 1178 | if nargs ~= 1 then 1179 | if nargs == 0 then 1180 | if g_opt.dumpdef > 0 then return dumpdef(stdout) end 1181 | end 1182 | opt_map.help() 1183 | end 1184 | 1185 | local infile = args[args.argn] 1186 | 1187 | -- Set language options. 1188 | setlang(infile) 1189 | 1190 | -- Translate a single input file to a single output file 1191 | -- TODO: Handle multiple files? 1192 | translate(infile, g_opt.outfile) 1193 | end 1194 | 1195 | ------------------------------------------------------------------------------ 1196 | 1197 | if ... == "dynasm" then -- use as module 1198 | 1199 | -- Make a reusable translate() function with support for setting options. 1200 | local translate = function(infile, outfile, opt) 1201 | reset() 1202 | update(g_opt, opt) 1203 | setlang(infile) 1204 | if g_opt.subst then 1205 | for name, subst in pairs(g_opt.subst) do 1206 | map_def[name] = tostring(subst) 1207 | end 1208 | end 1209 | translate(infile, outfile) 1210 | end 1211 | 1212 | -- Dummy file:close() method. 1213 | local function dummyclose() 1214 | return true 1215 | end 1216 | 1217 | -- Create a pseudo-file object that implements the file:lines() method 1218 | -- which reads data from a string. 1219 | local function string_infile(s) 1220 | local lines = function() 1221 | local term = 1222 | match(s, "\r\n") and "\r\n" or 1223 | match(s, "\r") and "\r" or 1224 | match(s, "\n") and "\n" or "" 1225 | return gmatch(s, "([^\n\r]*)"..term) 1226 | end 1227 | return {lines = lines, close = dummyclose} 1228 | end 1229 | 1230 | -- Create a pseudo-file object that implements the file:write() method 1231 | -- which forwards each non-empty value to a function. 1232 | local function func_outfile(func) 1233 | local function write(_, ...) 1234 | for i = 1, select('#', ...) do 1235 | local v = select(i, ...) 1236 | assert(type(v) == "string" or type(v) == "number", "invalid value") 1237 | local s = tostring(v) 1238 | if #s > 0 then 1239 | func(s) 1240 | end 1241 | end 1242 | return true 1243 | end 1244 | return {write = write, close = dummyclose} 1245 | end 1246 | 1247 | -- Create a pseudo-file object that accumulates writes to a table. 1248 | local function table_outfile(t) 1249 | return func_outfile(function(s) 1250 | t[#t+1] = s 1251 | end) 1252 | end 1253 | 1254 | -- Translate an input file to a string. 1255 | local function translate_tostring(infile, opt) 1256 | local t = {} 1257 | translate(infile, table_outfile(t), opt) 1258 | return table.concat(t) 1259 | end 1260 | 1261 | -- Create an iterator that translates an input file 1262 | -- and returns the translated file in chunks. 1263 | local function translate_toiter(infile, opt) 1264 | return coroutine.wrap(function() 1265 | translate(infile, func_outfile(coroutine.yield), opt) 1266 | end) 1267 | end 1268 | 1269 | -- Load a dasl file and return it as a Lua chunk. 1270 | local function dasl_loadfile(infile, opt) 1271 | local opt = update({lang = "lua"}, opt) 1272 | local read = translate_toiter(infile, opt) 1273 | local filename = type(infile) == "string" and infile or "=(load)" 1274 | return load(read, filename) 1275 | end 1276 | 1277 | -- Load a dasl string and return it as a Lua chunk. 1278 | local function dasl_loadstring(s, opt) 1279 | return dasl_loadfile(string_infile(s), opt) 1280 | end 1281 | 1282 | -- Register a module loader for *.dasl files. 1283 | insert(package.loaders, function(modname) 1284 | local daslpath = gsub(gsub(package.path, "%.lua;", ".dasl;"), "%.lua$", ".dasl") 1285 | local path, reason = package.searchpath(modname, daslpath) 1286 | if not path then return reason end 1287 | return function() 1288 | local chunk = assert(dasl_loadfile(path, {comment = false})) 1289 | return chunk(modname) 1290 | end 1291 | end) 1292 | 1293 | -- Make and return the DynASM API. 1294 | return { 1295 | --low-level intf. 1296 | translate = translate, 1297 | string_infile = string_infile, 1298 | func_outfile = func_outfile, 1299 | table_outfile = table_outfile, 1300 | translate_tostring = translate_tostring, 1301 | translate_toiter = translate_toiter, 1302 | --hi-level intf. 1303 | loadfile = dasl_loadfile, 1304 | loadstring = dasl_loadstring, 1305 | } 1306 | 1307 | else -- use as standalone script 1308 | 1309 | -- Add the directory dynasm.lua resides in to the Lua module search path. 1310 | local arg = arg 1311 | if arg and arg[0] then 1312 | prefix = match(arg[0], "^(.*[/\\])") 1313 | if package and prefix then package.path = prefix.."?.lua;"..package.path end 1314 | end 1315 | 1316 | -- Start DynASM. 1317 | parseargs{...} 1318 | 1319 | end 1320 | 1321 | ------------------------------------------------------------------------------ 1322 | -------------------------------------------------------------------------------- /dasm_x86x64.lua: -------------------------------------------------------------------------------- 1 | ------------------------------------------------------------------------------ 2 | -- DynASM x86/x64 module. 3 | -- 4 | -- Copyright (C) 2005-2017 Mike Pall. All rights reserved. 5 | -- See dynasm.lua for full copyright notice. 6 | ------------------------------------------------------------------------------ 7 | 8 | local x64 = rawget(_G, "dasm_x64") --rawget so it works with strict.lua 9 | if x64 == nil then return end --must be loaded by dasm_x86.lua or dasm_x64.lua 10 | 11 | -- Module information: 12 | local _info = { 13 | arch = x64 and "x64" or "x86", 14 | description = "DynASM x86/x64 module", 15 | version = "1.4.0_luamode", 16 | vernum = 10400, 17 | release = "2015-10-18", 18 | author = "Mike Pall", 19 | license = "MIT", 20 | } 21 | 22 | -- Exported glue functions for the arch-specific module. 23 | local _M = { _info = _info } 24 | 25 | -- Cache library functions. 26 | local type, tonumber, pairs, ipairs = type, tonumber, pairs, ipairs 27 | local assert, unpack, setmetatable = assert, unpack or table.unpack, setmetatable 28 | local _s = string 29 | local sub, format, byte, char = _s.sub, _s.format, _s.byte, _s.char 30 | local find, match, gmatch, gsub = _s.find, _s.match, _s.gmatch, _s.gsub 31 | local concat, sort, remove = table.concat, table.sort, table.remove 32 | local bit = bit or require("bit") 33 | local band, bxor, shl, shr = bit.band, bit.bxor, bit.lshift, bit.rshift 34 | 35 | -- Inherited tables and callbacks. 36 | local g_opt, g_arch, g_map_def 37 | local wline, werror, wfatal, wwarn 38 | 39 | -- Global flag to generate Lua code instead of C code. 40 | local luamode 41 | 42 | -- Action name list. 43 | -- CHECK: Keep this in sync with the C code! 44 | local action_names = { 45 | -- int arg, 1 buffer pos: 46 | "DISP", "IMM_S", "IMM_B", "IMM_W", "IMM_D", "IMM_WB", "IMM_DB", 47 | -- action arg (1 byte), int arg, 1 buffer pos (reg/num): 48 | "VREG", "SPACE", 49 | -- ptrdiff_t arg, 1 buffer pos (address): !x64 50 | "SETLABEL", "REL_A", 51 | -- action arg (1 byte) or int arg, 2 buffer pos (link, offset): 52 | "REL_LG", "REL_PC", 53 | -- action arg (1 byte) or int arg, 1 buffer pos (link): 54 | "IMM_LG", "IMM_PC", 55 | -- action arg (1 byte) or int arg, 1 buffer pos (offset): 56 | "LABEL_LG", "LABEL_PC", 57 | -- action arg (1 byte), 1 buffer pos (offset): 58 | "ALIGN", 59 | -- action args (2 bytes), no buffer pos. 60 | "EXTERN", 61 | -- action arg (1 byte), no buffer pos. 62 | "ESC", 63 | -- no action arg, no buffer pos. 64 | "MARK", 65 | -- action arg (1 byte), no buffer pos, terminal action: 66 | "SECTION", 67 | -- no args, no buffer pos, terminal action: 68 | "STOP" 69 | } 70 | 71 | -- Maximum number of section buffer positions for dasm_put(). 72 | -- CHECK: Keep this in sync with the C code! 73 | local maxsecpos = 25 -- Keep this low, to avoid excessively long C lines. 74 | 75 | -- Action name -> action number (dynamically generated below). 76 | local map_action = {} 77 | -- First action number. Everything below does not need to be escaped. 78 | local actfirst = 256-#action_names 79 | 80 | -- Action list buffer and string (only used to remove dupes). 81 | local actlist, actstr 82 | 83 | -- Argument list for next dasm_put(). 84 | local actargs 85 | 86 | -- Current number of section buffer positions for dasm_put(). 87 | local secpos 88 | 89 | local function init_actionlist() 90 | actlist = {} 91 | actstr = "" 92 | actargs = { 0 } -- Start with offset 0 into the action list. 93 | secpos = 1 94 | end 95 | 96 | -- VREG kind encodings, pre-shifted by 5 bits. 97 | local map_vreg = { 98 | ["modrm.rm.m"] = 0x00, 99 | ["modrm.rm.r"] = 0x20, 100 | ["opcode"] = 0x20, 101 | ["sib.base"] = 0x20, 102 | ["sib.index"] = 0x40, 103 | ["modrm.reg"] = 0x80, 104 | ["vex.v"] = 0xa0, 105 | ["imm.hi"] = 0xc0, 106 | } 107 | 108 | -- Current number of VREG actions contributing to REX/VEX shrinkage. 109 | local vreg_shrink_count = 0 110 | 111 | ------------------------------------------------------------------------------ 112 | 113 | -- Compute action numbers for action names. 114 | for n,name in ipairs(action_names) do 115 | local num = actfirst + n - 1 116 | map_action[name] = num 117 | end 118 | 119 | -- Dump action names and numbers. 120 | local function dumpactions(out) 121 | out:write("DynASM encoding engine action codes:\n") 122 | for n,name in ipairs(action_names) do 123 | local num = map_action[name] 124 | out:write(format(" %-10s %02X %d\n", name, num, num)) 125 | end 126 | out:write("\n") 127 | end 128 | 129 | -- Write action list buffer as a huge static C array. 130 | local function writeactions(out, name) 131 | local nn = #actlist 132 | local last = actlist[nn] or 255 133 | actlist[nn] = nil -- Remove last byte. 134 | if nn == 0 then nn = 1 end 135 | if luamode then 136 | out:write("local ", name, " = ffi.new('const uint8_t[", nn, "]', {\n") 137 | else 138 | out:write("static const unsigned char ", name, "[", nn, "] = {\n") 139 | end 140 | local s = " " 141 | for n,b in ipairs(actlist) do 142 | s = s..b.."," 143 | if #s >= 75 then 144 | assert(out:write(s, "\n")) 145 | s = " " 146 | end 147 | end 148 | if luamode then 149 | out:write(s, last, "\n})\n\n") -- Add last byte back. 150 | else 151 | out:write(s, last, "\n};\n\n") -- Add last byte back. 152 | end 153 | end 154 | 155 | ------------------------------------------------------------------------------ 156 | 157 | -- Add byte to action list. 158 | local function wputxb(n) 159 | assert(n >= 0 and n <= 255 and n % 1 == 0, "byte out of range") 160 | actlist[#actlist+1] = n 161 | end 162 | 163 | -- Add action to list with optional arg. Advance buffer pos, too. 164 | local function waction(action, a, num) 165 | wputxb(assert(map_action[action], "bad action name `"..action.."'")) 166 | if a then actargs[#actargs+1] = a end 167 | if a or num then secpos = secpos + (num or 1) end 168 | end 169 | 170 | -- Optionally add a VREG action. 171 | local function wvreg(kind, vreg, psz, sk, defer) 172 | if not vreg then return end 173 | waction("VREG", vreg) 174 | local b = assert(map_vreg[kind], "bad vreg kind `"..vreg.."'") 175 | if b < (sk or 0) then 176 | vreg_shrink_count = vreg_shrink_count + 1 177 | end 178 | if not defer then 179 | b = b + vreg_shrink_count * 8 180 | vreg_shrink_count = 0 181 | end 182 | wputxb(b + (psz or 0)) 183 | end 184 | 185 | -- Add call to embedded DynASM C code. 186 | local function wcall(func, args) 187 | if luamode then 188 | wline(format("dasm.%s(Dst, %s)", func, concat(args, ", ")), true) 189 | else 190 | wline(format("dasm_%s(Dst, %s);", func, concat(args, ", ")), true) 191 | end 192 | end 193 | 194 | -- Delete duplicate action list chunks. A tad slow, but so what. 195 | local function dedupechunk(offset) 196 | local al, as = actlist, actstr 197 | local chunk = char(unpack(al, offset+1, #al)) 198 | local orig = find(as, chunk, 1, true) 199 | if orig then 200 | actargs[1] = orig-1 -- Replace with original offset. 201 | for i=offset+1,#al do al[i] = nil end -- Kill dupe. 202 | else 203 | actstr = as..chunk 204 | end 205 | end 206 | 207 | -- Flush action list (intervening C code or buffer pos overflow). 208 | local function wflush(term) 209 | local offset = actargs[1] 210 | if #actlist == offset then return end -- Nothing to flush. 211 | if not term then waction("STOP") end -- Terminate action list. 212 | dedupechunk(offset) 213 | wcall("put", actargs) -- Add call to dasm_put(). 214 | actargs = { #actlist } -- Actionlist offset is 1st arg to next dasm_put(). 215 | secpos = 1 -- The actionlist offset occupies a buffer position, too. 216 | end 217 | 218 | -- Put escaped byte. 219 | local function wputb(n) 220 | if n >= actfirst then waction("ESC") end -- Need to escape byte. 221 | wputxb(n) 222 | end 223 | 224 | ------------------------------------------------------------------------------ 225 | 226 | -- Global label name -> global label number. With auto assignment on 1st use. 227 | local next_global, map_global 228 | 229 | local globals_meta = { __index = function(t, name) 230 | if not match(name, "^[%a_][%w_@]*$") then werror("bad global label") end 231 | local n = next_global 232 | if n > 246 then werror("too many global labels") end 233 | next_global = n + 1 234 | t[name] = n 235 | return n 236 | end} 237 | 238 | local function init_map_global() 239 | next_global = 10 240 | map_global = setmetatable({}, globals_meta) 241 | end 242 | 243 | -- Dump global labels. 244 | local function dumpglobals(out, lvl) 245 | local t = {} 246 | for name, n in pairs(map_global) do t[n] = name end 247 | out:write("Global labels:\n") 248 | for i=10,next_global-1 do 249 | out:write(format(" %s\n", t[i])) 250 | end 251 | out:write("\n") 252 | end 253 | 254 | -- Write global label enum. 255 | local function writeglobals(out, prefix) 256 | local t = {} 257 | for name, n in pairs(map_global) do t[n] = name end 258 | if luamode then 259 | local n = 0 260 | for i=10,next_global-1 do 261 | out:write("local ", prefix, gsub(t[i], "@.*", ""), "\t= ", n, "\n") 262 | n = n + 1 263 | end 264 | out:write("local ", prefix, "_MAX\t= ", n, "\n") --for compatibility with the C protocol 265 | out:write("local DASM_MAXGLOBAL\t= ", n, "\n") 266 | else 267 | out:write("enum {\n") 268 | for i=10,next_global-1 do 269 | out:write(" ", prefix, gsub(t[i], "@.*", ""), ",\n") 270 | end 271 | out:write(" ", prefix, "_MAX\n};\n") 272 | end 273 | end 274 | 275 | -- Write global label names. 276 | local function writeglobalnames(out, name) 277 | local t = {} 278 | for name, n in pairs(map_global) do t[n] = name end 279 | if luamode then 280 | out:write("local ", name, " = {\n") 281 | for i=10,next_global-1 do 282 | out:write(" ", i == 10 and "[0] = " or "", "\"", t[i], "\",\n") 283 | end 284 | out:write("}\n") 285 | else 286 | out:write("static const char *const ", name, "[] = {\n") 287 | for i=10,next_global-1 do 288 | out:write(" \"", t[i], "\",\n") 289 | end 290 | out:write(" (const char *)0\n};\n") 291 | end 292 | end 293 | 294 | ------------------------------------------------------------------------------ 295 | 296 | -- Extern label name -> extern label number. With auto assignment on 1st use. 297 | local next_extern, map_extern 298 | 299 | local extern_meta = { __index = function(t, name) 300 | -- No restrictions on the name for now. 301 | local n = next_extern 302 | if n < -256 then werror("too many extern labels") end 303 | next_extern = n - 1 304 | t[name] = n 305 | return n 306 | end} 307 | 308 | local function init_map_extern() 309 | next_extern = -1 310 | map_extern = setmetatable({}, extern_meta) 311 | end 312 | 313 | -- Dump extern labels. 314 | local function dumpexterns(out, lvl) 315 | local t = {} 316 | for name, n in pairs(map_extern) do t[-n] = name end 317 | out:write("Extern labels:\n") 318 | for i=1,-next_extern-1 do 319 | out:write(format(" %s\n", t[i])) 320 | end 321 | out:write("\n") 322 | end 323 | 324 | -- Write extern label names. 325 | local function writeexternnames(out, name) 326 | local t = {} 327 | for name, n in pairs(map_extern) do t[-n] = name end 328 | if luamode then 329 | out:write("local ", name, " = {\n") 330 | for i=1,-next_extern-1 do 331 | out:write(i==1 and "[0] = " or "", "\"", t[i], "\",\n") 332 | end 333 | out:write("}\n") 334 | else 335 | out:write("static const char *const ", name, "[] = {\n") 336 | for i=1,-next_extern-1 do 337 | out:write(" \"", t[i], "\",\n") 338 | end 339 | out:write(" (const char *)0\n};\n") 340 | end 341 | end 342 | 343 | ------------------------------------------------------------------------------ 344 | 345 | -- Arch-specific maps. 346 | local map_archdef = {} -- Ext. register name -> int. name. 347 | local map_reg_rev = {} -- Int. register name -> ext. name. 348 | local map_reg_num = {} -- Int. register name -> register number. 349 | local map_reg_opsize = {} -- Int. register name -> operand size. 350 | local map_reg_valid_base = {} -- Int. register name -> valid base register? 351 | local map_reg_valid_index = {} -- Int. register name -> valid index register? 352 | local map_reg_needrex = {} -- Int. register name -> need rex vs. no rex. 353 | local reg_list = {} -- Canonical list of int. register names. 354 | 355 | local map_type -- Type name -> { ctype, reg } 356 | local ctypenum -- Type number (for _PTx macros). 357 | 358 | local function init_map_type() 359 | map_type = {} 360 | ctypenum = 0 361 | end 362 | 363 | local addrsize = x64 and "q" or "d" -- Size for address operands. 364 | 365 | -- Helper functions to fill register maps. 366 | local function mkrmap(sz, cl, names) 367 | local cname = format("@%s", sz) 368 | reg_list[#reg_list+1] = cname 369 | map_archdef[cl] = cname 370 | map_reg_rev[cname] = cl 371 | map_reg_num[cname] = -1 372 | map_reg_opsize[cname] = sz 373 | if sz == addrsize or sz == "d" then 374 | map_reg_valid_base[cname] = true 375 | map_reg_valid_index[cname] = true 376 | end 377 | if names then 378 | for n,name in ipairs(names) do 379 | local iname = format("@%s%x", sz, n-1) 380 | reg_list[#reg_list+1] = iname 381 | map_archdef[name] = iname 382 | map_reg_rev[iname] = name 383 | map_reg_num[iname] = n-1 384 | map_reg_opsize[iname] = sz 385 | if sz == "b" and n > 4 then map_reg_needrex[iname] = false end 386 | if sz == addrsize or sz == "d" then 387 | map_reg_valid_base[iname] = true 388 | map_reg_valid_index[iname] = true 389 | end 390 | end 391 | end 392 | for i=0,(x64 and sz ~= "f") and 15 or 7 do 393 | local needrex = sz == "b" and i > 3 394 | local iname = format("@%s%x%s", sz, i, needrex and "R" or "") 395 | if needrex then map_reg_needrex[iname] = true end 396 | local name 397 | if sz == "o" or sz == "y" then name = format("%s%d", cl, i) 398 | elseif sz == "f" then name = format("st%d", i) 399 | else name = format("r%d%s", i, sz == addrsize and "" or sz) end 400 | map_archdef[name] = iname 401 | if not map_reg_rev[iname] then 402 | reg_list[#reg_list+1] = iname 403 | map_reg_rev[iname] = name 404 | map_reg_num[iname] = i 405 | map_reg_opsize[iname] = sz 406 | if sz == addrsize or sz == "d" then 407 | map_reg_valid_base[iname] = true 408 | map_reg_valid_index[iname] = true 409 | end 410 | end 411 | end 412 | reg_list[#reg_list+1] = "" 413 | end 414 | 415 | -- Integer registers (qword, dword, word and byte sized). 416 | if x64 then 417 | mkrmap("q", "Rq", {"rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi"}) 418 | end 419 | mkrmap("d", "Rd", {"eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi"}) 420 | mkrmap("w", "Rw", {"ax", "cx", "dx", "bx", "sp", "bp", "si", "di"}) 421 | mkrmap("b", "Rb", {"al", "cl", "dl", "bl", "ah", "ch", "dh", "bh"}) 422 | map_reg_valid_index[map_archdef.esp] = false 423 | if x64 then map_reg_valid_index[map_archdef.rsp] = false end 424 | if x64 then map_reg_needrex[map_archdef.Rb] = true end 425 | map_archdef["Ra"] = "@"..addrsize 426 | 427 | -- FP registers (internally tword sized, but use "f" as operand size). 428 | mkrmap("f", "Rf") 429 | 430 | -- SSE registers (oword sized, but qword and dword accessible). 431 | mkrmap("o", "xmm") 432 | 433 | -- AVX registers (yword sized, but oword, qword and dword accessible). 434 | mkrmap("y", "ymm") 435 | 436 | -- Operand size prefixes to codes. 437 | local map_opsize = { 438 | byte = "b", word = "w", dword = "d", qword = "q", oword = "o", yword = "y", 439 | tword = "t", aword = addrsize, 440 | } 441 | 442 | -- Operand size code to number. 443 | local map_opsizenum = { 444 | b = 1, w = 2, d = 4, q = 8, o = 16, y = 32, t = 10, 445 | } 446 | 447 | -- Operand size code to name. 448 | local map_opsizename = { 449 | b = "byte", w = "word", d = "dword", q = "qword", o = "oword", y = "yword", 450 | t = "tword", f = "fpword", 451 | } 452 | 453 | -- Valid index register scale factors. 454 | local map_xsc = { 455 | ["1"] = 0, ["2"] = 1, ["4"] = 2, ["8"] = 3, 456 | } 457 | 458 | -- Condition codes. 459 | local map_cc = { 460 | o = 0, no = 1, b = 2, nb = 3, e = 4, ne = 5, be = 6, nbe = 7, 461 | s = 8, ns = 9, p = 10, np = 11, l = 12, nl = 13, le = 14, nle = 15, 462 | c = 2, nae = 2, nc = 3, ae = 3, z = 4, nz = 5, na = 6, a = 7, 463 | pe = 10, po = 11, nge = 12, ge = 13, ng = 14, g = 15, 464 | } 465 | 466 | 467 | -- Reverse defines for registers. 468 | function _M.revdef(s) 469 | return gsub(s, "@%w+", map_reg_rev) 470 | end 471 | 472 | -- Dump register names and numbers 473 | local function dumpregs(out) 474 | out:write("Register names, sizes and internal numbers:\n") 475 | for _,reg in ipairs(reg_list) do 476 | if reg == "" then 477 | out:write("\n") 478 | else 479 | local name = map_reg_rev[reg] 480 | local num = map_reg_num[reg] 481 | local opsize = map_opsizename[map_reg_opsize[reg]] 482 | out:write(format(" %-5s %-8s %s\n", name, opsize, 483 | num < 0 and "(variable)" or num)) 484 | end 485 | end 486 | end 487 | 488 | ------------------------------------------------------------------------------ 489 | 490 | -- Put action for label arg (IMM_LG, IMM_PC, REL_LG, REL_PC). 491 | local function wputlabel(aprefix, imm, num) 492 | if type(imm) == "number" then 493 | if imm < 0 then 494 | waction("EXTERN") 495 | wputxb(aprefix == "IMM_" and 0 or 1) 496 | imm = -imm-1 497 | else 498 | waction(aprefix.."LG", nil, num); 499 | end 500 | wputxb(imm) 501 | else 502 | waction(aprefix.."PC", imm, num) 503 | end 504 | end 505 | 506 | -- Put signed byte or arg. 507 | local function wputsbarg(n) 508 | if type(n) == "number" then 509 | if n < -128 or n > 127 then 510 | werror("signed immediate byte out of range") 511 | end 512 | if n < 0 then n = n + 256 end 513 | wputb(n) 514 | else waction("IMM_S", n) end 515 | end 516 | 517 | -- Put unsigned byte or arg. 518 | local function wputbarg(n) 519 | if type(n) == "number" then 520 | if n < 0 or n > 255 then 521 | werror("unsigned immediate byte out of range") 522 | end 523 | wputb(n) 524 | else waction("IMM_B", n) end 525 | end 526 | 527 | -- Put unsigned word or arg. 528 | local function wputwarg(n) 529 | if type(n) == "number" then 530 | if shr(n, 16) ~= 0 then 531 | werror("unsigned immediate word out of range") 532 | end 533 | wputb(band(n, 255)); wputb(shr(n, 8)); 534 | else waction("IMM_W", n) end 535 | end 536 | 537 | -- Put signed or unsigned dword or arg. 538 | local function wputdarg(n) 539 | local tn = type(n) 540 | if tn == "number" then 541 | wputb(band(n, 255)) 542 | wputb(band(shr(n, 8), 255)) 543 | wputb(band(shr(n, 16), 255)) 544 | wputb(shr(n, 24)) 545 | elseif tn == "table" then 546 | wputlabel("IMM_", n[1], 1) 547 | else 548 | waction("IMM_D", n) 549 | end 550 | end 551 | 552 | -- Put operand-size dependent number or arg (defaults to dword). 553 | local function wputszarg(sz, n) 554 | if not sz or sz == "d" or sz == "q" then wputdarg(n) 555 | elseif sz == "w" then wputwarg(n) 556 | elseif sz == "b" then wputbarg(n) 557 | elseif sz == "s" then wputsbarg(n) 558 | else werror("bad operand size") end 559 | end 560 | 561 | -- Put multi-byte opcode with operand-size dependent modifications. 562 | local function wputop(sz, op, rex, vex, vregr, vregxb) 563 | local psz, sk = 0, nil 564 | if vex then 565 | local tail 566 | if vex.m == 1 and band(rex, 11) == 0 then 567 | if x64 and vregxb then 568 | sk = map_vreg["modrm.reg"] 569 | else 570 | wputb(0xc5) 571 | tail = shl(bxor(band(rex, 4), 4), 5) 572 | psz = 3 573 | end 574 | end 575 | if not tail then 576 | wputb(0xc4) 577 | wputb(shl(bxor(band(rex, 7), 7), 5) + vex.m) 578 | tail = shl(band(rex, 8), 4) 579 | psz = 4 580 | end 581 | local reg, vreg = 0, nil 582 | if vex.v then 583 | reg = vex.v.reg 584 | if not reg then werror("bad vex operand") end 585 | if reg < 0 then reg = 0; vreg = vex.v.vreg end 586 | end 587 | if sz == "y" or vex.l then tail = tail + 4 end 588 | wputb(tail + shl(bxor(reg, 15), 3) + vex.p) 589 | wvreg("vex.v", vreg) 590 | rex = 0 591 | if op >= 256 then werror("bad vex opcode") end 592 | else 593 | if rex ~= 0 then 594 | if not x64 then werror("bad operand size") end 595 | elseif (vregr or vregxb) and x64 then 596 | rex = 0x10 597 | sk = map_vreg["vex.v"] 598 | end 599 | end 600 | local r 601 | if sz == "w" then wputb(102) end 602 | -- Needs >32 bit numbers, but only for crc32 eax, word [ebx] 603 | if op >= 4294967296 then r = op%4294967296 wputb((op-r)/4294967296) op = r end 604 | if op >= 16777216 then wputb(shr(op, 24)); op = band(op, 0xffffff) end 605 | if op >= 65536 then 606 | if rex ~= 0 then 607 | local opc3 = band(op, 0xffff00) 608 | if opc3 == 0x0f3a00 or opc3 == 0x0f3800 then 609 | wputb(64 + band(rex, 15)); rex = 0; psz = 2 610 | end 611 | end 612 | wputb(shr(op, 16)); op = band(op, 0xffff); psz = psz + 1 613 | end 614 | if op >= 256 then 615 | local b = shr(op, 8) 616 | if b == 15 and rex ~= 0 then wputb(64 + band(rex, 15)); rex = 0; psz = 2 end 617 | wputb(b); op = band(op, 255); psz = psz + 1 618 | end 619 | if rex ~= 0 then wputb(64 + band(rex, 15)); psz = 2 end 620 | if sz == "b" then op = op - 1 end 621 | wputb(op) 622 | return psz, sk 623 | end 624 | 625 | -- Put ModRM or SIB formatted byte. 626 | local function wputmodrm(m, s, rm, vs, vrm) 627 | assert(m < 4 and s < 16 and rm < 16, "bad modrm operands") 628 | wputb(shl(m, 6) + shl(band(s, 7), 3) + band(rm, 7)) 629 | end 630 | 631 | -- Put ModRM/SIB plus optional displacement. 632 | local function wputmrmsib(t, imark, s, vsreg, psz, sk) 633 | local vreg, vxreg 634 | local reg, xreg = t.reg, t.xreg 635 | if reg and reg < 0 then reg = 0; vreg = t.vreg end 636 | if xreg and xreg < 0 then xreg = 0; vxreg = t.vxreg end 637 | if s < 0 then s = 0 end 638 | 639 | -- Register mode. 640 | if sub(t.mode, 1, 1) == "r" then 641 | wputmodrm(3, s, reg) 642 | wvreg("modrm.reg", vsreg, psz+1, sk, vreg) 643 | wvreg("modrm.rm.r", vreg, psz+1, sk) 644 | return 645 | end 646 | 647 | local disp = t.disp 648 | local tdisp = type(disp) 649 | -- No base register? 650 | if not reg then 651 | local riprel = false 652 | if xreg then 653 | -- Indexed mode with index register only. 654 | -- [xreg*xsc+disp] -> (0, s, esp) (xsc, xreg, ebp) 655 | wputmodrm(0, s, 4) 656 | if imark == "I" then waction("MARK") end 657 | wvreg("modrm.reg", vsreg, psz+1, sk, vxreg) 658 | wputmodrm(t.xsc, xreg, 5) 659 | wvreg("sib.index", vxreg, psz+2, sk) 660 | else 661 | -- Pure 32 bit displacement. 662 | if x64 and tdisp ~= "table" then 663 | wputmodrm(0, s, 4) -- [disp] -> (0, s, esp) (0, esp, ebp) 664 | wvreg("modrm.reg", vsreg, psz+1, sk) 665 | if imark == "I" then waction("MARK") end 666 | wputmodrm(0, 4, 5) 667 | else 668 | riprel = x64 669 | wputmodrm(0, s, 5) -- [disp|rip-label] -> (0, s, ebp) 670 | wvreg("modrm.reg", vsreg, psz+1, sk) 671 | if imark == "I" then waction("MARK") end 672 | end 673 | end 674 | if riprel then -- Emit rip-relative displacement. 675 | if match("UWSiI", imark) then 676 | werror("NYI: rip-relative displacement followed by immediate") 677 | end 678 | -- The previous byte in the action buffer cannot be 0xe9 or 0x80-0x8f. 679 | wputlabel("REL_", disp[1], 2) 680 | else 681 | wputdarg(disp) 682 | end 683 | return 684 | end 685 | 686 | local m 687 | if tdisp == "number" then -- Check displacement size at assembly time. 688 | if disp == 0 and band(reg, 7) ~= 5 then -- [ebp] -> [ebp+0] (in SIB, too) 689 | if not vreg then m = 0 end -- Force DISP to allow [Rd(5)] -> [ebp+0] 690 | elseif disp >= -128 and disp <= 127 then m = 1 691 | else m = 2 end 692 | elseif tdisp == "table" then 693 | m = 2 694 | end 695 | 696 | -- Index register present or esp as base register: need SIB encoding. 697 | if xreg or band(reg, 7) == 4 then 698 | wputmodrm(m or 2, s, 4) -- ModRM. 699 | if m == nil or imark == "I" then waction("MARK") end 700 | wvreg("modrm.reg", vsreg, psz+1, sk, vxreg or vreg) 701 | wputmodrm(t.xsc or 0, xreg or 4, reg) -- SIB. 702 | wvreg("sib.index", vxreg, psz+2, sk, vreg) 703 | wvreg("sib.base", vreg, psz+2, sk) 704 | else 705 | wputmodrm(m or 2, s, reg) -- ModRM. 706 | if (imark == "I" and (m == 1 or m == 2)) or 707 | (m == nil and (vsreg or vreg)) then waction("MARK") end 708 | wvreg("modrm.reg", vsreg, psz+1, sk, vreg) 709 | wvreg("modrm.rm.m", vreg, psz+1, sk) 710 | end 711 | 712 | -- Put displacement. 713 | if m == 1 then wputsbarg(disp) 714 | elseif m == 2 then wputdarg(disp) 715 | elseif m == nil then waction("DISP", disp) end 716 | end 717 | 718 | ------------------------------------------------------------------------------ 719 | 720 | -- Return human-readable operand mode string. 721 | local function opmodestr(op, args) 722 | local m = {} 723 | for i=1,#args do 724 | local a = args[i] 725 | m[#m+1] = sub(a.mode, 1, 1)..(a.opsize or "?") 726 | end 727 | return op.." "..concat(m, ",") 728 | end 729 | 730 | -- Convert number to valid integer or nil. 731 | local function toint(expr) 732 | local n = tonumber(expr) 733 | if n then 734 | if n % 1 ~= 0 or n < -2147483648 or n > 4294967295 then 735 | werror("bad integer number `"..expr.."'") 736 | end 737 | return n 738 | end 739 | end 740 | 741 | -- Parse immediate expression. 742 | local function immexpr(expr) 743 | -- &expr (pointer) 744 | if sub(expr, 1, 1) == "&" then 745 | return "iPJ", format(luamode and "(%s)" or "(ptrdiff_t)(%s)", sub(expr,2)) 746 | end 747 | 748 | local prefix = sub(expr, 1, 2) 749 | -- =>expr (pc label reference) 750 | if prefix == "=>" then 751 | return "iJ", sub(expr, 3) 752 | end 753 | -- ->name (global label reference) 754 | if prefix == "->" then 755 | return "iJ", map_global[sub(expr, 3)] 756 | end 757 | 758 | -- [<>][1-9] (local label reference) 759 | local dir, lnum = match(expr, "^([<>])([1-9])$") 760 | if dir then -- Fwd: 247-255, Bkwd: 1-9. 761 | return "iJ", lnum + (dir == ">" and 246 or 0) 762 | end 763 | 764 | local extname = match(expr, "^extern%s+(%S+)$") 765 | if extname then 766 | return "iJ", map_extern[extname] 767 | end 768 | 769 | -- expr (interpreted as immediate) 770 | return "iI", expr 771 | end 772 | 773 | -- Parse displacement expression: +-num, +-expr, +-opsize*num 774 | local function dispexpr(expr) 775 | local disp = expr == "" and 0 or toint(expr) 776 | if disp then return disp end 777 | local c, dispt = match(expr, "^([+-])%s*(.+)$") 778 | if c == "+" then 779 | expr = dispt 780 | elseif not c then 781 | werror("bad displacement expression `"..expr.."'") 782 | end 783 | local opsize, tailops = match(dispt, "^(%w+)%s*%*%s*(.+)$") 784 | local ops, imm = map_opsize[opsize], toint(tailops) 785 | if ops and imm then 786 | if c == "-" then imm = -imm end 787 | return imm*map_opsizenum[ops] 788 | end 789 | local mode, iexpr = immexpr(dispt) 790 | if mode == "iJ" then 791 | if c == "-" then werror("cannot invert label reference") end 792 | return { iexpr } 793 | end 794 | return expr -- Need to return original signed expression. 795 | end 796 | 797 | -- Parse register or type expression. 798 | local function rtexpr(expr) 799 | if not expr then return end 800 | local tname, ovreg = match(expr, "^([%w_]+):(@[%w_]+)$") 801 | local tp = map_type[tname or expr] 802 | if tp then 803 | local reg = ovreg or tp.reg 804 | local rnum = map_reg_num[reg] 805 | if not rnum then 806 | werror("type `"..(tname or expr).."' needs a register override") 807 | end 808 | if not map_reg_valid_base[reg] then 809 | werror("bad base register override `"..(map_reg_rev[reg] or reg).."'") 810 | end 811 | return reg, rnum, tp 812 | end 813 | return expr, map_reg_num[expr] 814 | end 815 | 816 | -- Parse operand and return { mode, opsize, reg, xreg, xsc, disp, imm }. 817 | local function parseoperand(param) 818 | local t = {} 819 | 820 | local expr = param 821 | local opsize, tailops = match(param, "^(%w+)%s*(.+)$") 822 | if opsize then 823 | t.opsize = map_opsize[opsize] 824 | if t.opsize then expr = tailops end 825 | end 826 | 827 | local br = match(expr, "^%[%s*(.-)%s*%]$") 828 | repeat 829 | if br then 830 | t.mode = "xm" 831 | 832 | -- [disp] 833 | t.disp = toint(br) 834 | if t.disp then 835 | t.mode = x64 and "xm" or "xmO" 836 | break 837 | end 838 | 839 | -- [reg...] 840 | local tp 841 | local reg, tailr = match(br, "^([@%w_:]+)%s*(.*)$") 842 | reg, t.reg, tp = rtexpr(reg) 843 | if not t.reg then 844 | -- [expr] 845 | t.mode = x64 and "xm" or "xmO" 846 | t.disp = dispexpr("+"..br) 847 | break 848 | end 849 | 850 | if t.reg == -1 then 851 | t.vreg, tailr = match(tailr, "^(%b())(.*)$") 852 | if not t.vreg then werror("bad variable register expression") end 853 | end 854 | 855 | -- [xreg*xsc] or [xreg*xsc+-disp] or [xreg*xsc+-expr] 856 | local xsc, tailsc = match(tailr, "^%*%s*([1248])%s*(.*)$") 857 | if xsc then 858 | if not map_reg_valid_index[reg] then 859 | werror("bad index register `"..map_reg_rev[reg].."'") 860 | end 861 | t.xsc = map_xsc[xsc] 862 | t.xreg = t.reg 863 | t.vxreg = t.vreg 864 | t.reg = nil 865 | t.vreg = nil 866 | t.disp = dispexpr(tailsc) 867 | break 868 | end 869 | if not map_reg_valid_base[reg] then 870 | werror("bad base register `"..map_reg_rev[reg].."'") 871 | end 872 | 873 | -- [reg] or [reg+-disp] 874 | t.disp = toint(tailr) or (tailr == "" and 0) 875 | if t.disp then break end 876 | 877 | -- [reg+xreg...] 878 | local xreg, tailx = match(tailr, "^+%s*([@%w_:]+)%s*(.*)$") 879 | xreg, t.xreg, tp = rtexpr(xreg) 880 | if not t.xreg then 881 | -- [reg+-expr] 882 | t.disp = dispexpr(tailr) 883 | break 884 | end 885 | if not map_reg_valid_index[xreg] then 886 | werror("bad index register `"..map_reg_rev[xreg].."'") 887 | end 888 | 889 | if t.xreg == -1 then 890 | t.vxreg, tailx = match(tailx, "^(%b())(.*)$") 891 | if not t.vxreg then werror("bad variable register expression") end 892 | end 893 | 894 | -- [reg+xreg*xsc...] 895 | local xsc, tailsc = match(tailx, "^%*%s*([1248])%s*(.*)$") 896 | if xsc then 897 | t.xsc = map_xsc[xsc] 898 | tailx = tailsc 899 | end 900 | 901 | -- [...] or [...+-disp] or [...+-expr] 902 | t.disp = dispexpr(tailx) 903 | else 904 | -- imm or opsize*imm 905 | local imm = toint(expr) 906 | if not imm and sub(expr, 1, 1) == "*" and t.opsize then 907 | imm = toint(sub(expr, 2)) 908 | if imm then 909 | imm = imm * map_opsizenum[t.opsize] 910 | t.opsize = nil 911 | end 912 | end 913 | if imm then 914 | if t.opsize then werror("bad operand size override") end 915 | local m = "i" 916 | if imm == 1 then m = m.."1" end 917 | if imm >= 4294967168 and imm <= 4294967295 then imm = imm-4294967296 end 918 | if imm >= -128 and imm <= 127 then m = m.."S" end 919 | t.imm = imm 920 | t.mode = m 921 | break 922 | end 923 | 924 | local tp 925 | local reg, tailr = match(expr, "^([@%w_:]+)%s*(.*)$") 926 | reg, t.reg, tp = rtexpr(reg) 927 | if t.reg then 928 | if t.reg == -1 then 929 | t.vreg, tailr = match(tailr, "^(%b())(.*)$") 930 | if not t.vreg then werror("bad variable register expression") end 931 | end 932 | -- reg 933 | if tailr == "" then 934 | if t.opsize then werror("bad operand size override") end 935 | t.opsize = map_reg_opsize[reg] 936 | if t.opsize == "f" then 937 | t.mode = t.reg == 0 and "fF" or "f" 938 | else 939 | if reg == "@w4" or (x64 and reg == "@d4") then 940 | wwarn("bad idea, try again with `"..(x64 and "rsp'" or "esp'")) 941 | end 942 | t.mode = t.reg == 0 and "rmR" or (reg == "@b1" and "rmC" or "rm") 943 | end 944 | t.needrex = map_reg_needrex[reg] 945 | break 946 | end 947 | 948 | -- type[idx], type[idx].field, type->field -> [reg+offset_expr] 949 | if not tp then werror("bad operand `"..param.."'") end 950 | t.mode = "xm" 951 | if luamode then 952 | t.disp = tp.ctypefmt(tailr) 953 | else 954 | t.disp = format(tp.ctypefmt, tailr) 955 | end 956 | else 957 | t.mode, t.imm = immexpr(expr) 958 | if sub(t.mode, -1) == "J" then 959 | if t.opsize and t.opsize ~= addrsize then 960 | werror("bad operand size override") 961 | end 962 | t.opsize = addrsize 963 | end 964 | end 965 | end 966 | until true 967 | return t 968 | end 969 | 970 | ------------------------------------------------------------------------------ 971 | -- x86 Template String Description 972 | -- =============================== 973 | -- 974 | -- Each template string is a list of [match:]pattern pairs, 975 | -- separated by "|". The first match wins. No match means a 976 | -- bad or unsupported combination of operand modes or sizes. 977 | -- 978 | -- The match part and the ":" is omitted if the operation has 979 | -- no operands. Otherwise the first N characters are matched 980 | -- against the mode strings of each of the N operands. 981 | -- 982 | -- The mode string for each operand type is (see parseoperand()): 983 | -- Integer register: "rm", +"R" for eax, ax, al, +"C" for cl 984 | -- FP register: "f", +"F" for st0 985 | -- Index operand: "xm", +"O" for [disp] (pure offset) 986 | -- Immediate: "i", +"S" for signed 8 bit, +"1" for 1, 987 | -- +"I" for arg, +"P" for pointer 988 | -- Any: +"J" for valid jump targets 989 | -- 990 | -- So a match character "m" (mixed) matches both an integer register 991 | -- and an index operand (to be encoded with the ModRM/SIB scheme). 992 | -- But "r" matches only a register and "x" only an index operand 993 | -- (e.g. for FP memory access operations). 994 | -- 995 | -- The operand size match string starts right after the mode match 996 | -- characters and ends before the ":". "dwb" or "qdwb" is assumed, if empty. 997 | -- The effective data size of the operation is matched against this list. 998 | -- 999 | -- If only the regular "b", "w", "d", "q", "t" operand sizes are 1000 | -- present, then all operands must be the same size. Unspecified sizes 1001 | -- are ignored, but at least one operand must have a size or the pattern 1002 | -- won't match (use the "byte", "word", "dword", "qword", "tword" 1003 | -- operand size overrides. E.g.: mov dword [eax], 1). 1004 | -- 1005 | -- If the list has a "1" or "2" prefix, the operand size is taken 1006 | -- from the respective operand and any other operand sizes are ignored. 1007 | -- If the list contains only ".", all operand sizes are ignored. 1008 | -- If the list has a "/" prefix, the concatenated (mixed) operand sizes 1009 | -- are compared to the match. 1010 | -- 1011 | -- E.g. "rrdw" matches for either two dword registers or two word 1012 | -- registers. "Fx2dq" matches an st0 operand plus an index operand 1013 | -- pointing to a dword (float) or qword (double). 1014 | -- 1015 | -- Every character after the ":" is part of the pattern string: 1016 | -- Hex chars are accumulated to form the opcode (left to right). 1017 | -- "n" disables the standard opcode mods 1018 | -- (otherwise: -1 for "b", o16 prefix for "w", rex.w for "q") 1019 | -- "X" Force REX.W. 1020 | -- "r"/"R" adds the reg. number from the 1st/2nd operand to the opcode. 1021 | -- "m"/"M" generates ModRM/SIB from the 1st/2nd operand. 1022 | -- The spare 3 bits are either filled with the last hex digit or 1023 | -- the result from a previous "r"/"R". The opcode is restored. 1024 | -- "u" Use VEX encoding, vvvv unused. 1025 | -- "v"/"V" Use VEX encoding, vvvv from 1st/2nd operand (the operand is 1026 | -- removed from the list used by future characters). 1027 | -- "w" Use VEX encoding, vvvv from 3rd operand. 1028 | -- "L" Force VEX.L 1029 | -- 1030 | -- All of the following characters force a flush of the opcode: 1031 | -- "o"/"O" stores a pure 32 bit disp (offset) from the 1st/2nd operand. 1032 | -- "s" stores a 4 bit immediate from the last register operand, 1033 | -- followed by 4 zero bits. 1034 | -- "S" stores a signed 8 bit immediate from the last operand. 1035 | -- "U" stores an unsigned 8 bit immediate from the last operand. 1036 | -- "W" stores an unsigned 16 bit immediate from the last operand. 1037 | -- "i" stores an operand sized immediate from the last operand. 1038 | -- "I" dito, but generates an action code to optionally modify 1039 | -- the opcode (+2) for a signed 8 bit immediate. 1040 | -- "J" generates one of the REL action codes from the last operand. 1041 | -- 1042 | ------------------------------------------------------------------------------ 1043 | 1044 | -- Template strings for x86 instructions. Ordered by first opcode byte. 1045 | -- Unimplemented opcodes (deliberate omissions) are marked with *. 1046 | local map_op = { 1047 | -- 00-05: add... 1048 | -- 06: *push es 1049 | -- 07: *pop es 1050 | -- 08-0D: or... 1051 | -- 0E: *push cs 1052 | -- 0F: two byte opcode prefix 1053 | -- 10-15: adc... 1054 | -- 16: *push ss 1055 | -- 17: *pop ss 1056 | -- 18-1D: sbb... 1057 | -- 1E: *push ds 1058 | -- 1F: *pop ds 1059 | -- 20-25: and... 1060 | es_0 = "26", 1061 | -- 27: *daa 1062 | -- 28-2D: sub... 1063 | cs_0 = "2E", 1064 | -- 2F: *das 1065 | -- 30-35: xor... 1066 | ss_0 = "36", 1067 | -- 37: *aaa 1068 | -- 38-3D: cmp... 1069 | ds_0 = "3E", 1070 | -- 3F: *aas 1071 | inc_1 = x64 and "m:FF0m" or "rdw:40r|m:FF0m", 1072 | dec_1 = x64 and "m:FF1m" or "rdw:48r|m:FF1m", 1073 | push_1 = (x64 and "rq:n50r|rw:50r|mq:nFF6m|mw:FF6m" or 1074 | "rdw:50r|mdw:FF6m").."|S.:6AS|ib:n6Ai|i.:68i", 1075 | pop_1 = x64 and "rq:n58r|rw:58r|mq:n8F0m|mw:8F0m" or "rdw:58r|mdw:8F0m", 1076 | -- 60: *pusha, *pushad, *pushaw 1077 | -- 61: *popa, *popad, *popaw 1078 | -- 62: *bound rdw,x 1079 | -- 63: x86: *arpl mw,rw 1080 | movsxd_2 = x64 and "rm/qd:63rM", 1081 | fs_0 = "64", 1082 | gs_0 = "65", 1083 | o16_0 = "66", 1084 | a16_0 = not x64 and "67" or nil, 1085 | a32_0 = x64 and "67", 1086 | -- 68: push idw 1087 | -- 69: imul rdw,mdw,idw 1088 | -- 6A: push ib 1089 | -- 6B: imul rdw,mdw,S 1090 | -- 6C: *insb 1091 | -- 6D: *insd, *insw 1092 | -- 6E: *outsb 1093 | -- 6F: *outsd, *outsw 1094 | -- 70-7F: jcc lb 1095 | -- 80: add... mb,i 1096 | -- 81: add... mdw,i 1097 | -- 82: *undefined 1098 | -- 83: add... mdw,S 1099 | test_2 = "mr:85Rm|rm:85rM|Ri:A9ri|mi:F70mi", 1100 | -- 86: xchg rb,mb 1101 | -- 87: xchg rdw,mdw 1102 | -- 88: mov mb,r 1103 | -- 89: mov mdw,r 1104 | -- 8A: mov r,mb 1105 | -- 8B: mov r,mdw 1106 | -- 8C: *mov mdw,seg 1107 | lea_2 = "rx1dq:8DrM", 1108 | -- 8E: *mov seg,mdw 1109 | -- 8F: pop mdw 1110 | nop_0 = "90", 1111 | xchg_2 = "Rrqdw:90R|rRqdw:90r|rm:87rM|mr:87Rm", 1112 | cbw_0 = "6698", 1113 | cwde_0 = "98", 1114 | cdqe_0 = "4898", 1115 | cwd_0 = "6699", 1116 | cdq_0 = "99", 1117 | cqo_0 = "4899", 1118 | -- 9A: *call iw:idw 1119 | wait_0 = "9B", 1120 | fwait_0 = "9B", 1121 | pushf_0 = "9C", 1122 | pushfd_0 = not x64 and "9C", 1123 | pushfq_0 = x64 and "9C", 1124 | popf_0 = "9D", 1125 | popfd_0 = not x64 and "9D", 1126 | popfq_0 = x64 and "9D", 1127 | sahf_0 = "9E", 1128 | lahf_0 = "9F", 1129 | mov_2 = "OR:A3o|RO:A1O|mr:89Rm|rm:8BrM|rib:nB0ri|ridw:B8ri|mi:C70mi", 1130 | movsb_0 = "A4", 1131 | movsw_0 = "66A5", 1132 | movsd_0 = "A5", 1133 | cmpsb_0 = "A6", 1134 | cmpsw_0 = "66A7", 1135 | cmpsd_0 = "A7", 1136 | -- A8: test Rb,i 1137 | -- A9: test Rdw,i 1138 | stosb_0 = "AA", 1139 | stosw_0 = "66AB", 1140 | stosd_0 = "AB", 1141 | lodsb_0 = "AC", 1142 | lodsw_0 = "66AD", 1143 | lodsd_0 = "AD", 1144 | scasb_0 = "AE", 1145 | scasw_0 = "66AF", 1146 | scasd_0 = "AF", 1147 | -- B0-B7: mov rb,i 1148 | -- B8-BF: mov rdw,i 1149 | -- C0: rol... mb,i 1150 | -- C1: rol... mdw,i 1151 | ret_1 = "i.:nC2W", 1152 | ret_0 = "C3", 1153 | -- C4: *les rdw,mq 1154 | -- C5: *lds rdw,mq 1155 | -- C6: mov mb,i 1156 | -- C7: mov mdw,i 1157 | -- C8: *enter iw,ib 1158 | leave_0 = "C9", 1159 | -- CA: *retf iw 1160 | -- CB: *retf 1161 | int3_0 = "CC", 1162 | int_1 = "i.:nCDU", 1163 | into_0 = "CE", 1164 | -- CF: *iret 1165 | -- D0: rol... mb,1 1166 | -- D1: rol... mdw,1 1167 | -- D2: rol... mb,cl 1168 | -- D3: rol... mb,cl 1169 | -- D4: *aam ib 1170 | -- D5: *aad ib 1171 | -- D6: *salc 1172 | -- D7: *xlat 1173 | -- D8-DF: floating point ops 1174 | -- E0: *loopne 1175 | -- E1: *loope 1176 | -- E2: *loop 1177 | -- E3: *jcxz, *jecxz 1178 | -- E4: *in Rb,ib 1179 | -- E5: *in Rdw,ib 1180 | -- E6: *out ib,Rb 1181 | -- E7: *out ib,Rdw 1182 | call_1 = x64 and "mq:nFF2m|J.:E8nJ" or "md:FF2m|J.:E8J", 1183 | jmp_1 = x64 and "mq:nFF4m|J.:E9nJ" or "md:FF4m|J.:E9J", -- short: EB 1184 | -- EA: *jmp iw:idw 1185 | -- EB: jmp ib 1186 | -- EC: *in Rb,dx 1187 | -- ED: *in Rdw,dx 1188 | -- EE: *out dx,Rb 1189 | -- EF: *out dx,Rdw 1190 | lock_0 = "F0", 1191 | int1_0 = "F1", 1192 | repne_0 = "F2", 1193 | repnz_0 = "F2", 1194 | rep_0 = "F3", 1195 | repe_0 = "F3", 1196 | repz_0 = "F3", 1197 | -- F4: *hlt 1198 | cmc_0 = "F5", 1199 | -- F6: test... mb,i; div... mb 1200 | -- F7: test... mdw,i; div... mdw 1201 | clc_0 = "F8", 1202 | stc_0 = "F9", 1203 | -- FA: *cli 1204 | cld_0 = "FC", 1205 | std_0 = "FD", 1206 | -- FE: inc... mb 1207 | -- FF: inc... mdw 1208 | 1209 | -- misc ops 1210 | not_1 = "m:F72m", 1211 | neg_1 = "m:F73m", 1212 | mul_1 = "m:F74m", 1213 | imul_1 = "m:F75m", 1214 | div_1 = "m:F76m", 1215 | idiv_1 = "m:F77m", 1216 | 1217 | imul_2 = "rmqdw:0FAFrM|rIqdw:69rmI|rSqdw:6BrmS|riqdw:69rmi", 1218 | imul_3 = "rmIqdw:69rMI|rmSqdw:6BrMS|rmiqdw:69rMi", 1219 | 1220 | movzx_2 = "rm/db:0FB6rM|rm/qb:|rm/wb:0FB6rM|rm/dw:0FB7rM|rm/qw:", 1221 | movsx_2 = "rm/db:0FBErM|rm/qb:|rm/wb:0FBErM|rm/dw:0FBFrM|rm/qw:", 1222 | 1223 | bswap_1 = "rqd:0FC8r", 1224 | bsf_2 = "rmqdw:0FBCrM", 1225 | bsr_2 = "rmqdw:0FBDrM", 1226 | bt_2 = "mrqdw:0FA3Rm|miqdw:0FBA4mU", 1227 | btc_2 = "mrqdw:0FBBRm|miqdw:0FBA7mU", 1228 | btr_2 = "mrqdw:0FB3Rm|miqdw:0FBA6mU", 1229 | bts_2 = "mrqdw:0FABRm|miqdw:0FBA5mU", 1230 | 1231 | shld_3 = "mriqdw:0FA4RmU|mrC/qq:0FA5Rm|mrC/dd:|mrC/ww:", 1232 | shrd_3 = "mriqdw:0FACRmU|mrC/qq:0FADRm|mrC/dd:|mrC/ww:", 1233 | 1234 | rdtsc_0 = "0F31", -- P1+ 1235 | rdpmc_0 = "0F33", -- P6+ 1236 | cpuid_0 = "0FA2", -- P1+ 1237 | 1238 | -- floating point ops 1239 | fst_1 = "ff:DDD0r|xd:D92m|xq:nDD2m", 1240 | fstp_1 = "ff:DDD8r|xd:D93m|xq:nDD3m|xt:DB7m", 1241 | fld_1 = "ff:D9C0r|xd:D90m|xq:nDD0m|xt:DB5m", 1242 | 1243 | fpop_0 = "DDD8", -- Alias for fstp st0. 1244 | 1245 | fist_1 = "xw:nDF2m|xd:DB2m", 1246 | fistp_1 = "xw:nDF3m|xd:DB3m|xq:nDF7m", 1247 | fild_1 = "xw:nDF0m|xd:DB0m|xq:nDF5m", 1248 | 1249 | fxch_0 = "D9C9", 1250 | fxch_1 = "ff:D9C8r", 1251 | fxch_2 = "fFf:D9C8r|Fff:D9C8R", 1252 | 1253 | fucom_1 = "ff:DDE0r", 1254 | fucom_2 = "Fff:DDE0R", 1255 | fucomp_1 = "ff:DDE8r", 1256 | fucomp_2 = "Fff:DDE8R", 1257 | fucomi_1 = "ff:DBE8r", -- P6+ 1258 | fucomi_2 = "Fff:DBE8R", -- P6+ 1259 | fucomip_1 = "ff:DFE8r", -- P6+ 1260 | fucomip_2 = "Fff:DFE8R", -- P6+ 1261 | fcomi_1 = "ff:DBF0r", -- P6+ 1262 | fcomi_2 = "Fff:DBF0R", -- P6+ 1263 | fcomip_1 = "ff:DFF0r", -- P6+ 1264 | fcomip_2 = "Fff:DFF0R", -- P6+ 1265 | fucompp_0 = "DAE9", 1266 | fcompp_0 = "DED9", 1267 | 1268 | fldenv_1 = "x.:D94m", 1269 | fnstenv_1 = "x.:D96m", 1270 | fstenv_1 = "x.:9BD96m", 1271 | fldcw_1 = "xw:nD95m", 1272 | fstcw_1 = "xw:n9BD97m", 1273 | fnstcw_1 = "xw:nD97m", 1274 | fstsw_1 = "Rw:n9BDFE0|xw:n9BDD7m", 1275 | fnstsw_1 = "Rw:nDFE0|xw:nDD7m", 1276 | fclex_0 = "9BDBE2", 1277 | fnclex_0 = "DBE2", 1278 | 1279 | fnop_0 = "D9D0", 1280 | -- D9D1-D9DF: unassigned 1281 | 1282 | fchs_0 = "D9E0", 1283 | fabs_0 = "D9E1", 1284 | -- D9E2: unassigned 1285 | -- D9E3: unassigned 1286 | ftst_0 = "D9E4", 1287 | fxam_0 = "D9E5", 1288 | -- D9E6: unassigned 1289 | -- D9E7: unassigned 1290 | fld1_0 = "D9E8", 1291 | fldl2t_0 = "D9E9", 1292 | fldl2e_0 = "D9EA", 1293 | fldpi_0 = "D9EB", 1294 | fldlg2_0 = "D9EC", 1295 | fldln2_0 = "D9ED", 1296 | fldz_0 = "D9EE", 1297 | -- D9EF: unassigned 1298 | 1299 | f2xm1_0 = "D9F0", 1300 | fyl2x_0 = "D9F1", 1301 | fptan_0 = "D9F2", 1302 | fpatan_0 = "D9F3", 1303 | fxtract_0 = "D9F4", 1304 | fprem1_0 = "D9F5", 1305 | fdecstp_0 = "D9F6", 1306 | fincstp_0 = "D9F7", 1307 | fprem_0 = "D9F8", 1308 | fyl2xp1_0 = "D9F9", 1309 | fsqrt_0 = "D9FA", 1310 | fsincos_0 = "D9FB", 1311 | frndint_0 = "D9FC", 1312 | fscale_0 = "D9FD", 1313 | fsin_0 = "D9FE", 1314 | fcos_0 = "D9FF", 1315 | 1316 | -- SSE, SSE2 1317 | andnpd_2 = "rmo:660F55rM", 1318 | andnps_2 = "rmo:0F55rM", 1319 | andpd_2 = "rmo:660F54rM", 1320 | andps_2 = "rmo:0F54rM", 1321 | clflush_1 = "x.:0FAE7m", 1322 | cmppd_3 = "rmio:660FC2rMU", 1323 | cmpps_3 = "rmio:0FC2rMU", 1324 | cmpsd_3 = "rrio:F20FC2rMU|rxi/oq:", 1325 | cmpss_3 = "rrio:F30FC2rMU|rxi/od:", 1326 | comisd_2 = "rro:660F2FrM|rx/oq:", 1327 | comiss_2 = "rro:0F2FrM|rx/od:", 1328 | cvtdq2pd_2 = "rro:F30FE6rM|rx/oq:", 1329 | cvtdq2ps_2 = "rmo:0F5BrM", 1330 | cvtpd2dq_2 = "rmo:F20FE6rM", 1331 | cvtpd2ps_2 = "rmo:660F5ArM", 1332 | cvtpi2pd_2 = "rx/oq:660F2ArM", 1333 | cvtpi2ps_2 = "rx/oq:0F2ArM", 1334 | cvtps2dq_2 = "rmo:660F5BrM", 1335 | cvtps2pd_2 = "rro:0F5ArM|rx/oq:", 1336 | cvtsd2si_2 = "rr/do:F20F2DrM|rr/qo:|rx/dq:|rxq:", 1337 | cvtsd2ss_2 = "rro:F20F5ArM|rx/oq:", 1338 | cvtsi2sd_2 = "rm/od:F20F2ArM|rm/oq:F20F2ArXM", 1339 | cvtsi2ss_2 = "rm/od:F30F2ArM|rm/oq:F30F2ArXM", 1340 | cvtss2sd_2 = "rro:F30F5ArM|rx/od:", 1341 | cvtss2si_2 = "rr/do:F30F2DrM|rr/qo:|rxd:|rx/qd:", 1342 | cvttpd2dq_2 = "rmo:660FE6rM", 1343 | cvttps2dq_2 = "rmo:F30F5BrM", 1344 | cvttsd2si_2 = "rr/do:F20F2CrM|rr/qo:|rx/dq:|rxq:", 1345 | cvttss2si_2 = "rr/do:F30F2CrM|rr/qo:|rxd:|rx/qd:", 1346 | fxsave_1 = "x.:0FAE0m", 1347 | fxrstor_1 = "x.:0FAE1m", 1348 | ldmxcsr_1 = "xd:0FAE2m", 1349 | lfence_0 = "0FAEE8", 1350 | maskmovdqu_2 = "rro:660FF7rM", 1351 | mfence_0 = "0FAEF0", 1352 | movapd_2 = "rmo:660F28rM|mro:660F29Rm", 1353 | movaps_2 = "rmo:0F28rM|mro:0F29Rm", 1354 | movd_2 = "rm/od:660F6ErM|rm/oq:660F6ErXM|mr/do:660F7ERm|mr/qo:", 1355 | movdqa_2 = "rmo:660F6FrM|mro:660F7FRm", 1356 | movdqu_2 = "rmo:F30F6FrM|mro:F30F7FRm", 1357 | movhlps_2 = "rro:0F12rM", 1358 | movhpd_2 = "rx/oq:660F16rM|xr/qo:n660F17Rm", 1359 | movhps_2 = "rx/oq:0F16rM|xr/qo:n0F17Rm", 1360 | movlhps_2 = "rro:0F16rM", 1361 | movlpd_2 = "rx/oq:660F12rM|xr/qo:n660F13Rm", 1362 | movlps_2 = "rx/oq:0F12rM|xr/qo:n0F13Rm", 1363 | movmskpd_2 = "rr/do:660F50rM", 1364 | movmskps_2 = "rr/do:0F50rM", 1365 | movntdq_2 = "xro:660FE7Rm", 1366 | movnti_2 = "xrqd:0FC3Rm", 1367 | movntpd_2 = "xro:660F2BRm", 1368 | movntps_2 = "xro:0F2BRm", 1369 | movq_2 = "rro:F30F7ErM|rx/oq:|xr/qo:n660FD6Rm", 1370 | movsd_2 = "rro:F20F10rM|rx/oq:|xr/qo:nF20F11Rm", 1371 | movss_2 = "rro:F30F10rM|rx/od:|xr/do:F30F11Rm", 1372 | movupd_2 = "rmo:660F10rM|mro:660F11Rm", 1373 | movups_2 = "rmo:0F10rM|mro:0F11Rm", 1374 | orpd_2 = "rmo:660F56rM", 1375 | orps_2 = "rmo:0F56rM", 1376 | pause_0 = "F390", 1377 | pextrw_3 = "rri/do:660FC5rMU|xri/wo:660F3A15nRmU", -- Mem op: SSE4.1 only. 1378 | pinsrw_3 = "rri/od:660FC4rMU|rxi/ow:", 1379 | pmovmskb_2 = "rr/do:660FD7rM", 1380 | prefetchnta_1 = "xb:n0F180m", 1381 | prefetcht0_1 = "xb:n0F181m", 1382 | prefetcht1_1 = "xb:n0F182m", 1383 | prefetcht2_1 = "xb:n0F183m", 1384 | pshufd_3 = "rmio:660F70rMU", 1385 | pshufhw_3 = "rmio:F30F70rMU", 1386 | pshuflw_3 = "rmio:F20F70rMU", 1387 | pslld_2 = "rmo:660FF2rM|rio:660F726mU", 1388 | pslldq_2 = "rio:660F737mU", 1389 | psllq_2 = "rmo:660FF3rM|rio:660F736mU", 1390 | psllw_2 = "rmo:660FF1rM|rio:660F716mU", 1391 | psrad_2 = "rmo:660FE2rM|rio:660F724mU", 1392 | psraw_2 = "rmo:660FE1rM|rio:660F714mU", 1393 | psrld_2 = "rmo:660FD2rM|rio:660F722mU", 1394 | psrldq_2 = "rio:660F733mU", 1395 | psrlq_2 = "rmo:660FD3rM|rio:660F732mU", 1396 | psrlw_2 = "rmo:660FD1rM|rio:660F712mU", 1397 | rcpps_2 = "rmo:0F53rM", 1398 | rcpss_2 = "rro:F30F53rM|rx/od:", 1399 | rsqrtps_2 = "rmo:0F52rM", 1400 | rsqrtss_2 = "rmo:F30F52rM", 1401 | sfence_0 = "0FAEF8", 1402 | shufpd_3 = "rmio:660FC6rMU", 1403 | shufps_3 = "rmio:0FC6rMU", 1404 | stmxcsr_1 = "xd:0FAE3m", 1405 | ucomisd_2 = "rro:660F2ErM|rx/oq:", 1406 | ucomiss_2 = "rro:0F2ErM|rx/od:", 1407 | unpckhpd_2 = "rmo:660F15rM", 1408 | unpckhps_2 = "rmo:0F15rM", 1409 | unpcklpd_2 = "rmo:660F14rM", 1410 | unpcklps_2 = "rmo:0F14rM", 1411 | xorpd_2 = "rmo:660F57rM", 1412 | xorps_2 = "rmo:0F57rM", 1413 | 1414 | -- SSE3 ops 1415 | fisttp_1 = "xw:nDF1m|xd:DB1m|xq:nDD1m", 1416 | addsubpd_2 = "rmo:660FD0rM", 1417 | addsubps_2 = "rmo:F20FD0rM", 1418 | haddpd_2 = "rmo:660F7CrM", 1419 | haddps_2 = "rmo:F20F7CrM", 1420 | hsubpd_2 = "rmo:660F7DrM", 1421 | hsubps_2 = "rmo:F20F7DrM", 1422 | lddqu_2 = "rxo:F20FF0rM", 1423 | movddup_2 = "rmo:F20F12rM", 1424 | movshdup_2 = "rmo:F30F16rM", 1425 | movsldup_2 = "rmo:F30F12rM", 1426 | 1427 | -- SSSE3 ops 1428 | pabsb_2 = "rmo:660F381CrM", 1429 | pabsd_2 = "rmo:660F381ErM", 1430 | pabsw_2 = "rmo:660F381DrM", 1431 | palignr_3 = "rmio:660F3A0FrMU", 1432 | phaddd_2 = "rmo:660F3802rM", 1433 | phaddsw_2 = "rmo:660F3803rM", 1434 | phaddw_2 = "rmo:660F3801rM", 1435 | phsubd_2 = "rmo:660F3806rM", 1436 | phsubsw_2 = "rmo:660F3807rM", 1437 | phsubw_2 = "rmo:660F3805rM", 1438 | pmaddubsw_2 = "rmo:660F3804rM", 1439 | pmulhrsw_2 = "rmo:660F380BrM", 1440 | pshufb_2 = "rmo:660F3800rM", 1441 | psignb_2 = "rmo:660F3808rM", 1442 | psignd_2 = "rmo:660F380ArM", 1443 | psignw_2 = "rmo:660F3809rM", 1444 | 1445 | -- SSE4.1 ops 1446 | blendpd_3 = "rmio:660F3A0DrMU", 1447 | blendps_3 = "rmio:660F3A0CrMU", 1448 | blendvpd_3 = "rmRo:660F3815rM", 1449 | blendvps_3 = "rmRo:660F3814rM", 1450 | dppd_3 = "rmio:660F3A41rMU", 1451 | dpps_3 = "rmio:660F3A40rMU", 1452 | extractps_3 = "mri/do:660F3A17RmU|rri/qo:660F3A17RXmU", 1453 | insertps_3 = "rrio:660F3A41rMU|rxi/od:", 1454 | movntdqa_2 = "rxo:660F382ArM", 1455 | mpsadbw_3 = "rmio:660F3A42rMU", 1456 | packusdw_2 = "rmo:660F382BrM", 1457 | pblendvb_3 = "rmRo:660F3810rM", 1458 | pblendw_3 = "rmio:660F3A0ErMU", 1459 | pcmpeqq_2 = "rmo:660F3829rM", 1460 | pextrb_3 = "rri/do:660F3A14nRmU|rri/qo:|xri/bo:", 1461 | pextrd_3 = "mri/do:660F3A16RmU", 1462 | pextrq_3 = "mri/qo:660F3A16RmU", 1463 | -- pextrw is SSE2, mem operand is SSE4.1 only 1464 | phminposuw_2 = "rmo:660F3841rM", 1465 | pinsrb_3 = "rri/od:660F3A20nrMU|rxi/ob:", 1466 | pinsrd_3 = "rmi/od:660F3A22rMU", 1467 | pinsrq_3 = "rmi/oq:660F3A22rXMU", 1468 | pmaxsb_2 = "rmo:660F383CrM", 1469 | pmaxsd_2 = "rmo:660F383DrM", 1470 | pmaxud_2 = "rmo:660F383FrM", 1471 | pmaxuw_2 = "rmo:660F383ErM", 1472 | pminsb_2 = "rmo:660F3838rM", 1473 | pminsd_2 = "rmo:660F3839rM", 1474 | pminud_2 = "rmo:660F383BrM", 1475 | pminuw_2 = "rmo:660F383ArM", 1476 | pmovsxbd_2 = "rro:660F3821rM|rx/od:", 1477 | pmovsxbq_2 = "rro:660F3822rM|rx/ow:", 1478 | pmovsxbw_2 = "rro:660F3820rM|rx/oq:", 1479 | pmovsxdq_2 = "rro:660F3825rM|rx/oq:", 1480 | pmovsxwd_2 = "rro:660F3823rM|rx/oq:", 1481 | pmovsxwq_2 = "rro:660F3824rM|rx/od:", 1482 | pmovzxbd_2 = "rro:660F3831rM|rx/od:", 1483 | pmovzxbq_2 = "rro:660F3832rM|rx/ow:", 1484 | pmovzxbw_2 = "rro:660F3830rM|rx/oq:", 1485 | pmovzxdq_2 = "rro:660F3835rM|rx/oq:", 1486 | pmovzxwd_2 = "rro:660F3833rM|rx/oq:", 1487 | pmovzxwq_2 = "rro:660F3834rM|rx/od:", 1488 | pmuldq_2 = "rmo:660F3828rM", 1489 | pmulld_2 = "rmo:660F3840rM", 1490 | ptest_2 = "rmo:660F3817rM", 1491 | roundpd_3 = "rmio:660F3A09rMU", 1492 | roundps_3 = "rmio:660F3A08rMU", 1493 | roundsd_3 = "rrio:660F3A0BrMU|rxi/oq:", 1494 | roundss_3 = "rrio:660F3A0ArMU|rxi/od:", 1495 | 1496 | -- SSE4.2 ops 1497 | crc32_2 = "rmqd:F20F38F1rM|rm/dw:66F20F38F1rM|rm/db:F20F38F0rM|rm/qb:", 1498 | pcmpestri_3 = "rmio:660F3A61rMU", 1499 | pcmpestrm_3 = "rmio:660F3A60rMU", 1500 | pcmpgtq_2 = "rmo:660F3837rM", 1501 | pcmpistri_3 = "rmio:660F3A63rMU", 1502 | pcmpistrm_3 = "rmio:660F3A62rMU", 1503 | popcnt_2 = "rmqdw:F30FB8rM", 1504 | 1505 | -- SSE4a 1506 | extrq_2 = "rro:660F79rM", 1507 | extrq_3 = "riio:660F780mUU", 1508 | insertq_2 = "rro:F20F79rM", 1509 | insertq_4 = "rriio:F20F78rMUU", 1510 | lzcnt_2 = "rmqdw:F30FBDrM", 1511 | movntsd_2 = "xr/qo:nF20F2BRm", 1512 | movntss_2 = "xr/do:F30F2BRm", 1513 | -- popcnt is also in SSE4.2 1514 | 1515 | -- AES-NI 1516 | aesdec_2 = "rmo:660F38DErM", 1517 | aesdeclast_2 = "rmo:660F38DFrM", 1518 | aesenc_2 = "rmo:660F38DCrM", 1519 | aesenclast_2 = "rmo:660F38DDrM", 1520 | aesimc_2 = "rmo:660F38DBrM", 1521 | aeskeygenassist_3 = "rmio:660F3ADFrMU", 1522 | pclmulqdq_3 = "rmio:660F3A44rMU", 1523 | 1524 | -- AVX FP ops 1525 | vaddsubpd_3 = "rrmoy:660FVD0rM", 1526 | vaddsubps_3 = "rrmoy:F20FVD0rM", 1527 | vandpd_3 = "rrmoy:660FV54rM", 1528 | vandps_3 = "rrmoy:0FV54rM", 1529 | vandnpd_3 = "rrmoy:660FV55rM", 1530 | vandnps_3 = "rrmoy:0FV55rM", 1531 | vblendpd_4 = "rrmioy:660F3AV0DrMU", 1532 | vblendps_4 = "rrmioy:660F3AV0CrMU", 1533 | vblendvpd_4 = "rrmroy:660F3AV4BrMs", 1534 | vblendvps_4 = "rrmroy:660F3AV4ArMs", 1535 | vbroadcastf128_2 = "rx/yo:660F38u1ArM", 1536 | vcmppd_4 = "rrmioy:660FVC2rMU", 1537 | vcmpps_4 = "rrmioy:0FVC2rMU", 1538 | vcmpsd_4 = "rrrio:F20FVC2rMU|rrxi/ooq:", 1539 | vcmpss_4 = "rrrio:F30FVC2rMU|rrxi/ood:", 1540 | vcomisd_2 = "rro:660Fu2FrM|rx/oq:", 1541 | vcomiss_2 = "rro:0Fu2FrM|rx/od:", 1542 | vcvtdq2pd_2 = "rro:F30FuE6rM|rx/oq:|rm/yo:", 1543 | vcvtdq2ps_2 = "rmoy:0Fu5BrM", 1544 | vcvtpd2dq_2 = "rmoy:F20FuE6rM", 1545 | vcvtpd2ps_2 = "rmoy:660Fu5ArM", 1546 | vcvtps2dq_2 = "rmoy:660Fu5BrM", 1547 | vcvtps2pd_2 = "rro:0Fu5ArM|rx/oq:|rm/yo:", 1548 | vcvtsd2si_2 = "rr/do:F20Fu2DrM|rx/dq:|rr/qo:|rxq:", 1549 | vcvtsd2ss_3 = "rrro:F20FV5ArM|rrx/ooq:", 1550 | vcvtsi2sd_3 = "rrm/ood:F20FV2ArM|rrm/ooq:F20FVX2ArM", 1551 | vcvtsi2ss_3 = "rrm/ood:F30FV2ArM|rrm/ooq:F30FVX2ArM", 1552 | vcvtss2sd_3 = "rrro:F30FV5ArM|rrx/ood:", 1553 | vcvtss2si_2 = "rr/do:F30Fu2DrM|rxd:|rr/qo:|rx/qd:", 1554 | vcvttpd2dq_2 = "rmo:660FuE6rM|rm/oy:660FuLE6rM", 1555 | vcvttps2dq_2 = "rmoy:F30Fu5BrM", 1556 | vcvttsd2si_2 = "rr/do:F20Fu2CrM|rx/dq:|rr/qo:|rxq:", 1557 | vcvttss2si_2 = "rr/do:F30Fu2CrM|rxd:|rr/qo:|rx/qd:", 1558 | vdppd_4 = "rrmio:660F3AV41rMU", 1559 | vdpps_4 = "rrmioy:660F3AV40rMU", 1560 | vextractf128_3 = "mri/oy:660F3AuL19RmU", 1561 | vextractps_3 = "mri/do:660F3Au17RmU", 1562 | vhaddpd_3 = "rrmoy:660FV7CrM", 1563 | vhaddps_3 = "rrmoy:F20FV7CrM", 1564 | vhsubpd_3 = "rrmoy:660FV7DrM", 1565 | vhsubps_3 = "rrmoy:F20FV7DrM", 1566 | vinsertf128_4 = "rrmi/yyo:660F3AV18rMU", 1567 | vinsertps_4 = "rrrio:660F3AV21rMU|rrxi/ood:", 1568 | vldmxcsr_1 = "xd:0FuAE2m", 1569 | vmaskmovps_3 = "rrxoy:660F38V2CrM|xrroy:660F38V2ERm", 1570 | vmaskmovpd_3 = "rrxoy:660F38V2DrM|xrroy:660F38V2FRm", 1571 | vmovapd_2 = "rmoy:660Fu28rM|mroy:660Fu29Rm", 1572 | vmovaps_2 = "rmoy:0Fu28rM|mroy:0Fu29Rm", 1573 | vmovd_2 = "rm/od:660Fu6ErM|rm/oq:660FuX6ErM|mr/do:660Fu7ERm|mr/qo:", 1574 | vmovq_2 = "rro:F30Fu7ErM|rx/oq:|xr/qo:660FuD6Rm", 1575 | vmovddup_2 = "rmy:F20Fu12rM|rro:|rx/oq:", 1576 | vmovhlps_3 = "rrro:0FV12rM", 1577 | vmovhpd_2 = "xr/qo:660Fu17Rm", 1578 | vmovhpd_3 = "rrx/ooq:660FV16rM", 1579 | vmovhps_2 = "xr/qo:0Fu17Rm", 1580 | vmovhps_3 = "rrx/ooq:0FV16rM", 1581 | vmovlhps_3 = "rrro:0FV16rM", 1582 | vmovlpd_2 = "xr/qo:660Fu13Rm", 1583 | vmovlpd_3 = "rrx/ooq:660FV12rM", 1584 | vmovlps_2 = "xr/qo:0Fu13Rm", 1585 | vmovlps_3 = "rrx/ooq:0FV12rM", 1586 | vmovmskpd_2 = "rr/do:660Fu50rM|rr/dy:660FuL50rM", 1587 | vmovmskps_2 = "rr/do:0Fu50rM|rr/dy:0FuL50rM", 1588 | vmovntpd_2 = "xroy:660Fu2BRm", 1589 | vmovntps_2 = "xroy:0Fu2BRm", 1590 | vmovsd_2 = "rx/oq:F20Fu10rM|xr/qo:F20Fu11Rm", 1591 | vmovsd_3 = "rrro:F20FV10rM", 1592 | vmovshdup_2 = "rmoy:F30Fu16rM", 1593 | vmovsldup_2 = "rmoy:F30Fu12rM", 1594 | vmovss_2 = "rx/od:F30Fu10rM|xr/do:F30Fu11Rm", 1595 | vmovss_3 = "rrro:F30FV10rM", 1596 | vmovupd_2 = "rmoy:660Fu10rM|mroy:660Fu11Rm", 1597 | vmovups_2 = "rmoy:0Fu10rM|mroy:0Fu11Rm", 1598 | vorpd_3 = "rrmoy:660FV56rM", 1599 | vorps_3 = "rrmoy:0FV56rM", 1600 | vpermilpd_3 = "rrmoy:660F38V0DrM|rmioy:660F3Au05rMU", 1601 | vpermilps_3 = "rrmoy:660F38V0CrM|rmioy:660F3Au04rMU", 1602 | vperm2f128_4 = "rrmiy:660F3AV06rMU", 1603 | vptestpd_2 = "rmoy:660F38u0FrM", 1604 | vptestps_2 = "rmoy:660F38u0ErM", 1605 | vrcpps_2 = "rmoy:0Fu53rM", 1606 | vrcpss_3 = "rrro:F30FV53rM|rrx/ood:", 1607 | vrsqrtps_2 = "rmoy:0Fu52rM", 1608 | vrsqrtss_3 = "rrro:F30FV52rM|rrx/ood:", 1609 | vroundpd_3 = "rmioy:660F3Au09rMU", 1610 | vroundps_3 = "rmioy:660F3Au08rMU", 1611 | vroundsd_4 = "rrrio:660F3AV0BrMU|rrxi/ooq:", 1612 | vroundss_4 = "rrrio:660F3AV0ArMU|rrxi/ood:", 1613 | vshufpd_4 = "rrmioy:660FVC6rMU", 1614 | vshufps_4 = "rrmioy:0FVC6rMU", 1615 | vsqrtps_2 = "rmoy:0Fu51rM", 1616 | vsqrtss_2 = "rro:F30Fu51rM|rx/od:", 1617 | vsqrtpd_2 = "rmoy:660Fu51rM", 1618 | vsqrtsd_2 = "rro:F20Fu51rM|rx/oq:", 1619 | vstmxcsr_1 = "xd:0FuAE3m", 1620 | vucomisd_2 = "rro:660Fu2ErM|rx/oq:", 1621 | vucomiss_2 = "rro:0Fu2ErM|rx/od:", 1622 | vunpckhpd_3 = "rrmoy:660FV15rM", 1623 | vunpckhps_3 = "rrmoy:0FV15rM", 1624 | vunpcklpd_3 = "rrmoy:660FV14rM", 1625 | vunpcklps_3 = "rrmoy:0FV14rM", 1626 | vxorpd_3 = "rrmoy:660FV57rM", 1627 | vxorps_3 = "rrmoy:0FV57rM", 1628 | vzeroall_0 = "0FuL77", 1629 | vzeroupper_0 = "0Fu77", 1630 | 1631 | -- AVX2 FP ops 1632 | vbroadcastss_2 = "rx/od:660F38u18rM|rx/yd:|rro:|rr/yo:", 1633 | vbroadcastsd_2 = "rx/yq:660F38u19rM|rr/yo:", 1634 | -- *vgather* (!vsib) 1635 | vpermpd_3 = "rmiy:660F3AuX01rMU", 1636 | vpermps_3 = "rrmy:660F38V16rM", 1637 | 1638 | -- AVX, AVX2 integer ops 1639 | -- In general, xmm requires AVX, ymm requires AVX2. 1640 | vaesdec_3 = "rrmo:660F38VDErM", 1641 | vaesdeclast_3 = "rrmo:660F38VDFrM", 1642 | vaesenc_3 = "rrmo:660F38VDCrM", 1643 | vaesenclast_3 = "rrmo:660F38VDDrM", 1644 | vaesimc_2 = "rmo:660F38uDBrM", 1645 | vaeskeygenassist_3 = "rmio:660F3AuDFrMU", 1646 | vlddqu_2 = "rxoy:F20FuF0rM", 1647 | vmaskmovdqu_2 = "rro:660FuF7rM", 1648 | vmovdqa_2 = "rmoy:660Fu6FrM|mroy:660Fu7FRm", 1649 | vmovdqu_2 = "rmoy:F30Fu6FrM|mroy:F30Fu7FRm", 1650 | vmovntdq_2 = "xroy:660FuE7Rm", 1651 | vmovntdqa_2 = "rxoy:660F38u2ArM", 1652 | vmpsadbw_4 = "rrmioy:660F3AV42rMU", 1653 | vpabsb_2 = "rmoy:660F38u1CrM", 1654 | vpabsd_2 = "rmoy:660F38u1ErM", 1655 | vpabsw_2 = "rmoy:660F38u1DrM", 1656 | vpackusdw_3 = "rrmoy:660F38V2BrM", 1657 | vpalignr_4 = "rrmioy:660F3AV0FrMU", 1658 | vpblendvb_4 = "rrmroy:660F3AV4CrMs", 1659 | vpblendw_4 = "rrmioy:660F3AV0ErMU", 1660 | vpclmulqdq_4 = "rrmio:660F3AV44rMU", 1661 | vpcmpeqq_3 = "rrmoy:660F38V29rM", 1662 | vpcmpestri_3 = "rmio:660F3Au61rMU", 1663 | vpcmpestrm_3 = "rmio:660F3Au60rMU", 1664 | vpcmpgtq_3 = "rrmoy:660F38V37rM", 1665 | vpcmpistri_3 = "rmio:660F3Au63rMU", 1666 | vpcmpistrm_3 = "rmio:660F3Au62rMU", 1667 | vpextrb_3 = "rri/do:660F3Au14nRmU|rri/qo:|xri/bo:", 1668 | vpextrw_3 = "rri/do:660FuC5rMU|xri/wo:660F3Au15nRmU", 1669 | vpextrd_3 = "mri/do:660F3Au16RmU", 1670 | vpextrq_3 = "mri/qo:660F3Au16RmU", 1671 | vphaddw_3 = "rrmoy:660F38V01rM", 1672 | vphaddd_3 = "rrmoy:660F38V02rM", 1673 | vphaddsw_3 = "rrmoy:660F38V03rM", 1674 | vphminposuw_2 = "rmo:660F38u41rM", 1675 | vphsubw_3 = "rrmoy:660F38V05rM", 1676 | vphsubd_3 = "rrmoy:660F38V06rM", 1677 | vphsubsw_3 = "rrmoy:660F38V07rM", 1678 | vpinsrb_4 = "rrri/ood:660F3AV20rMU|rrxi/oob:", 1679 | vpinsrw_4 = "rrri/ood:660FVC4rMU|rrxi/oow:", 1680 | vpinsrd_4 = "rrmi/ood:660F3AV22rMU", 1681 | vpinsrq_4 = "rrmi/ooq:660F3AVX22rMU", 1682 | vpmaddubsw_3 = "rrmoy:660F38V04rM", 1683 | vpmaxsb_3 = "rrmoy:660F38V3CrM", 1684 | vpmaxsd_3 = "rrmoy:660F38V3DrM", 1685 | vpmaxuw_3 = "rrmoy:660F38V3ErM", 1686 | vpmaxud_3 = "rrmoy:660F38V3FrM", 1687 | vpminsb_3 = "rrmoy:660F38V38rM", 1688 | vpminsd_3 = "rrmoy:660F38V39rM", 1689 | vpminuw_3 = "rrmoy:660F38V3ArM", 1690 | vpminud_3 = "rrmoy:660F38V3BrM", 1691 | vpmovmskb_2 = "rr/do:660FuD7rM|rr/dy:660FuLD7rM", 1692 | vpmovsxbw_2 = "rroy:660F38u20rM|rx/oq:|rx/yo:", 1693 | vpmovsxbd_2 = "rroy:660F38u21rM|rx/od:|rx/yq:", 1694 | vpmovsxbq_2 = "rroy:660F38u22rM|rx/ow:|rx/yd:", 1695 | vpmovsxwd_2 = "rroy:660F38u23rM|rx/oq:|rx/yo:", 1696 | vpmovsxwq_2 = "rroy:660F38u24rM|rx/od:|rx/yq:", 1697 | vpmovsxdq_2 = "rroy:660F38u25rM|rx/oq:|rx/yo:", 1698 | vpmovzxbw_2 = "rroy:660F38u30rM|rx/oq:|rx/yo:", 1699 | vpmovzxbd_2 = "rroy:660F38u31rM|rx/od:|rx/yq:", 1700 | vpmovzxbq_2 = "rroy:660F38u32rM|rx/ow:|rx/yd:", 1701 | vpmovzxwd_2 = "rroy:660F38u33rM|rx/oq:|rx/yo:", 1702 | vpmovzxwq_2 = "rroy:660F38u34rM|rx/od:|rx/yq:", 1703 | vpmovzxdq_2 = "rroy:660F38u35rM|rx/oq:|rx/yo:", 1704 | vpmuldq_3 = "rrmoy:660F38V28rM", 1705 | vpmulhrsw_3 = "rrmoy:660F38V0BrM", 1706 | vpmulld_3 = "rrmoy:660F38V40rM", 1707 | vpshufb_3 = "rrmoy:660F38V00rM", 1708 | vpshufd_3 = "rmioy:660Fu70rMU", 1709 | vpshufhw_3 = "rmioy:F30Fu70rMU", 1710 | vpshuflw_3 = "rmioy:F20Fu70rMU", 1711 | vpsignb_3 = "rrmoy:660F38V08rM", 1712 | vpsignw_3 = "rrmoy:660F38V09rM", 1713 | vpsignd_3 = "rrmoy:660F38V0ArM", 1714 | vpslldq_3 = "rrioy:660Fv737mU", 1715 | vpsllw_3 = "rrmoy:660FVF1rM|rrioy:660Fv716mU", 1716 | vpslld_3 = "rrmoy:660FVF2rM|rrioy:660Fv726mU", 1717 | vpsllq_3 = "rrmoy:660FVF3rM|rrioy:660Fv736mU", 1718 | vpsraw_3 = "rrmoy:660FVE1rM|rrioy:660Fv714mU", 1719 | vpsrad_3 = "rrmoy:660FVE2rM|rrioy:660Fv724mU", 1720 | vpsrldq_3 = "rrioy:660Fv733mU", 1721 | vpsrlw_3 = "rrmoy:660FVD1rM|rrioy:660Fv712mU", 1722 | vpsrld_3 = "rrmoy:660FVD2rM|rrioy:660Fv722mU", 1723 | vpsrlq_3 = "rrmoy:660FVD3rM|rrioy:660Fv732mU", 1724 | vptest_2 = "rmoy:660F38u17rM", 1725 | 1726 | -- AVX2 integer ops 1727 | vbroadcasti128_2 = "rx/yo:660F38u5ArM", 1728 | vinserti128_4 = "rrmi/yyo:660F3AV38rMU", 1729 | vextracti128_3 = "mri/oy:660F3AuL39RmU", 1730 | vpblendd_4 = "rrmioy:660F3AV02rMU", 1731 | vpbroadcastb_2 = "rro:660F38u78rM|rx/ob:|rr/yo:|rx/yb:", 1732 | vpbroadcastw_2 = "rro:660F38u79rM|rx/ow:|rr/yo:|rx/yw:", 1733 | vpbroadcastd_2 = "rro:660F38u58rM|rx/od:|rr/yo:|rx/yd:", 1734 | vpbroadcastq_2 = "rro:660F38u59rM|rx/oq:|rr/yo:|rx/yq:", 1735 | vpermd_3 = "rrmy:660F38V36rM", 1736 | vpermq_3 = "rmiy:660F3AuX00rMU", 1737 | -- *vpgather* (!vsib) 1738 | vperm2i128_4 = "rrmiy:660F3AV46rMU", 1739 | vpmaskmovd_3 = "rrxoy:660F38V8CrM|xrroy:660F38V8ERm", 1740 | vpmaskmovq_3 = "rrxoy:660F38VX8CrM|xrroy:660F38VX8ERm", 1741 | vpsllvd_3 = "rrmoy:660F38V47rM", 1742 | vpsllvq_3 = "rrmoy:660F38VX47rM", 1743 | vpsravd_3 = "rrmoy:660F38V46rM", 1744 | vpsrlvd_3 = "rrmoy:660F38V45rM", 1745 | vpsrlvq_3 = "rrmoy:660F38VX45rM", 1746 | 1747 | -- Intel ADX 1748 | adcx_2 = "rmqd:660F38F6rM", 1749 | adox_2 = "rmqd:F30F38F6rM", 1750 | 1751 | -- BMI1 1752 | andn_3 = "rrmqd:0F38VF2rM", 1753 | bextr_3 = "rmrqd:0F38wF7rM", 1754 | blsi_2 = "rmqd:0F38vF33m", 1755 | blsmsk_2 = "rmqd:0F38vF32m", 1756 | blsr_2 = "rmqd:0F38vF31m", 1757 | tzcnt_2 = "rmqdw:F30FBCrM", 1758 | 1759 | -- BMI2 1760 | bzhi_3 = "rmrqd:0F38wF5rM", 1761 | mulx_3 = "rrmqd:F20F38VF6rM", 1762 | pdep_3 = "rrmqd:F20F38VF5rM", 1763 | pext_3 = "rrmqd:F30F38VF5rM", 1764 | rorx_3 = "rmSqd:F20F3AuF0rMS", 1765 | sarx_3 = "rmrqd:F30F38wF7rM", 1766 | shrx_3 = "rmrqd:F20F38wF7rM", 1767 | shlx_3 = "rmrqd:660F38wF7rM", 1768 | 1769 | -- FMA3 1770 | vfmaddsub132pd_3 = "rrmoy:660F38VX96rM", 1771 | vfmaddsub132ps_3 = "rrmoy:660F38V96rM", 1772 | vfmaddsub213pd_3 = "rrmoy:660F38VXA6rM", 1773 | vfmaddsub213ps_3 = "rrmoy:660F38VA6rM", 1774 | vfmaddsub231pd_3 = "rrmoy:660F38VXB6rM", 1775 | vfmaddsub231ps_3 = "rrmoy:660F38VB6rM", 1776 | 1777 | vfmsubadd132pd_3 = "rrmoy:660F38VX97rM", 1778 | vfmsubadd132ps_3 = "rrmoy:660F38V97rM", 1779 | vfmsubadd213pd_3 = "rrmoy:660F38VXA7rM", 1780 | vfmsubadd213ps_3 = "rrmoy:660F38VA7rM", 1781 | vfmsubadd231pd_3 = "rrmoy:660F38VXB7rM", 1782 | vfmsubadd231ps_3 = "rrmoy:660F38VB7rM", 1783 | 1784 | vfmadd132pd_3 = "rrmoy:660F38VX98rM", 1785 | vfmadd132ps_3 = "rrmoy:660F38V98rM", 1786 | vfmadd132sd_3 = "rrro:660F38VX99rM|rrx/ooq:", 1787 | vfmadd132ss_3 = "rrro:660F38V99rM|rrx/ood:", 1788 | vfmadd213pd_3 = "rrmoy:660F38VXA8rM", 1789 | vfmadd213ps_3 = "rrmoy:660F38VA8rM", 1790 | vfmadd213sd_3 = "rrro:660F38VXA9rM|rrx/ooq:", 1791 | vfmadd213ss_3 = "rrro:660F38VA9rM|rrx/ood:", 1792 | vfmadd231pd_3 = "rrmoy:660F38VXB8rM", 1793 | vfmadd231ps_3 = "rrmoy:660F38VB8rM", 1794 | vfmadd231sd_3 = "rrro:660F38VXB9rM|rrx/ooq:", 1795 | vfmadd231ss_3 = "rrro:660F38VB9rM|rrx/ood:", 1796 | 1797 | vfmsub132pd_3 = "rrmoy:660F38VX9ArM", 1798 | vfmsub132ps_3 = "rrmoy:660F38V9ArM", 1799 | vfmsub132sd_3 = "rrro:660F38VX9BrM|rrx/ooq:", 1800 | vfmsub132ss_3 = "rrro:660F38V9BrM|rrx/ood:", 1801 | vfmsub213pd_3 = "rrmoy:660F38VXAArM", 1802 | vfmsub213ps_3 = "rrmoy:660F38VAArM", 1803 | vfmsub213sd_3 = "rrro:660F38VXABrM|rrx/ooq:", 1804 | vfmsub213ss_3 = "rrro:660F38VABrM|rrx/ood:", 1805 | vfmsub231pd_3 = "rrmoy:660F38VXBArM", 1806 | vfmsub231ps_3 = "rrmoy:660F38VBArM", 1807 | vfmsub231sd_3 = "rrro:660F38VXBBrM|rrx/ooq:", 1808 | vfmsub231ss_3 = "rrro:660F38VBBrM|rrx/ood:", 1809 | 1810 | vfnmadd132pd_3 = "rrmoy:660F38VX9CrM", 1811 | vfnmadd132ps_3 = "rrmoy:660F38V9CrM", 1812 | vfnmadd132sd_3 = "rrro:660F38VX9DrM|rrx/ooq:", 1813 | vfnmadd132ss_3 = "rrro:660F38V9DrM|rrx/ood:", 1814 | vfnmadd213pd_3 = "rrmoy:660F38VXACrM", 1815 | vfnmadd213ps_3 = "rrmoy:660F38VACrM", 1816 | vfnmadd213sd_3 = "rrro:660F38VXADrM|rrx/ooq:", 1817 | vfnmadd213ss_3 = "rrro:660F38VADrM|rrx/ood:", 1818 | vfnmadd231pd_3 = "rrmoy:660F38VXBCrM", 1819 | vfnmadd231ps_3 = "rrmoy:660F38VBCrM", 1820 | vfnmadd231sd_3 = "rrro:660F38VXBDrM|rrx/ooq:", 1821 | vfnmadd231ss_3 = "rrro:660F38VBDrM|rrx/ood:", 1822 | 1823 | vfnmsub132pd_3 = "rrmoy:660F38VX9ErM", 1824 | vfnmsub132ps_3 = "rrmoy:660F38V9ErM", 1825 | vfnmsub132sd_3 = "rrro:660F38VX9FrM|rrx/ooq:", 1826 | vfnmsub132ss_3 = "rrro:660F38V9FrM|rrx/ood:", 1827 | vfnmsub213pd_3 = "rrmoy:660F38VXAErM", 1828 | vfnmsub213ps_3 = "rrmoy:660F38VAErM", 1829 | vfnmsub213sd_3 = "rrro:660F38VXAFrM|rrx/ooq:", 1830 | vfnmsub213ss_3 = "rrro:660F38VAFrM|rrx/ood:", 1831 | vfnmsub231pd_3 = "rrmoy:660F38VXBErM", 1832 | vfnmsub231ps_3 = "rrmoy:660F38VBErM", 1833 | vfnmsub231sd_3 = "rrro:660F38VXBFrM|rrx/ooq:", 1834 | vfnmsub231ss_3 = "rrro:660F38VBFrM|rrx/ood:", 1835 | } 1836 | 1837 | ------------------------------------------------------------------------------ 1838 | 1839 | -- Arithmetic ops. 1840 | for name,n in pairs{ add = 0, ["or"] = 1, adc = 2, sbb = 3, 1841 | ["and"] = 4, sub = 5, xor = 6, cmp = 7 } do 1842 | local n8 = shl(n, 3) 1843 | map_op[name.."_2"] = format( 1844 | "mr:%02XRm|rm:%02XrM|mI1qdw:81%XmI|mS1qdw:83%XmS|Ri1qdwb:%02Xri|mi1qdwb:81%Xmi", 1845 | 1+n8, 3+n8, n, n, 5+n8, n) 1846 | end 1847 | 1848 | -- Shift ops. 1849 | for name,n in pairs{ rol = 0, ror = 1, rcl = 2, rcr = 3, 1850 | shl = 4, shr = 5, sar = 7, sal = 4 } do 1851 | map_op[name.."_2"] = format("m1:D1%Xm|mC1qdwb:D3%Xm|mi:C1%XmU", n, n, n) 1852 | end 1853 | 1854 | -- Conditional ops. 1855 | for cc,n in pairs(map_cc) do 1856 | map_op["j"..cc.."_1"] = format("J.:n0F8%XJ", n) -- short: 7%X 1857 | map_op["set"..cc.."_1"] = format("mb:n0F9%X2m", n) 1858 | map_op["cmov"..cc.."_2"] = format("rmqdw:0F4%XrM", n) -- P6+ 1859 | end 1860 | 1861 | -- FP arithmetic ops. 1862 | for name,n in pairs{ add = 0, mul = 1, com = 2, comp = 3, 1863 | sub = 4, subr = 5, div = 6, divr = 7 } do 1864 | local nc = 0xc0 + shl(n, 3) 1865 | local nr = nc + (n < 4 and 0 or (n % 2 == 0 and 8 or -8)) 1866 | local fn = "f"..name 1867 | map_op[fn.."_1"] = format("ff:D8%02Xr|xd:D8%Xm|xq:nDC%Xm", nc, n, n) 1868 | if n == 2 or n == 3 then 1869 | map_op[fn.."_2"] = format("Fff:D8%02XR|Fx2d:D8%XM|Fx2q:nDC%XM", nc, n, n) 1870 | else 1871 | map_op[fn.."_2"] = format("Fff:D8%02XR|fFf:DC%02Xr|Fx2d:D8%XM|Fx2q:nDC%XM", nc, nr, n, n) 1872 | map_op[fn.."p_1"] = format("ff:DE%02Xr", nr) 1873 | map_op[fn.."p_2"] = format("fFf:DE%02Xr", nr) 1874 | end 1875 | map_op["fi"..name.."_1"] = format("xd:DA%Xm|xw:nDE%Xm", n, n) 1876 | end 1877 | 1878 | -- FP conditional moves. 1879 | for cc,n in pairs{ b=0, e=1, be=2, u=3, nb=4, ne=5, nbe=6, nu=7 } do 1880 | local nc = 0xdac0 + shl(band(n, 3), 3) + shl(band(n, 4), 6) 1881 | map_op["fcmov"..cc.."_1"] = format("ff:%04Xr", nc) -- P6+ 1882 | map_op["fcmov"..cc.."_2"] = format("Fff:%04XR", nc) -- P6+ 1883 | end 1884 | 1885 | -- SSE / AVX FP arithmetic ops. 1886 | for name,n in pairs{ sqrt = 1, add = 8, mul = 9, 1887 | sub = 12, min = 13, div = 14, max = 15 } do 1888 | map_op[name.."ps_2"] = format("rmo:0F5%XrM", n) 1889 | map_op[name.."ss_2"] = format("rro:F30F5%XrM|rx/od:", n) 1890 | map_op[name.."pd_2"] = format("rmo:660F5%XrM", n) 1891 | map_op[name.."sd_2"] = format("rro:F20F5%XrM|rx/oq:", n) 1892 | if n ~= 1 then 1893 | map_op["v"..name.."ps_3"] = format("rrmoy:0FV5%XrM", n) 1894 | map_op["v"..name.."ss_3"] = format("rrro:F30FV5%XrM|rrx/ood:", n) 1895 | map_op["v"..name.."pd_3"] = format("rrmoy:660FV5%XrM", n) 1896 | map_op["v"..name.."sd_3"] = format("rrro:F20FV5%XrM|rrx/ooq:", n) 1897 | end 1898 | end 1899 | 1900 | -- SSE2 / AVX / AVX2 integer arithmetic ops (66 0F leaf). 1901 | for name,n in pairs{ 1902 | paddb = 0xFC, paddw = 0xFD, paddd = 0xFE, paddq = 0xD4, 1903 | paddsb = 0xEC, paddsw = 0xED, packssdw = 0x6B, 1904 | packsswb = 0x63, packuswb = 0x67, paddusb = 0xDC, 1905 | paddusw = 0xDD, pand = 0xDB, pandn = 0xDF, pavgb = 0xE0, 1906 | pavgw = 0xE3, pcmpeqb = 0x74, pcmpeqd = 0x76, 1907 | pcmpeqw = 0x75, pcmpgtb = 0x64, pcmpgtd = 0x66, 1908 | pcmpgtw = 0x65, pmaddwd = 0xF5, pmaxsw = 0xEE, 1909 | pmaxub = 0xDE, pminsw = 0xEA, pminub = 0xDA, 1910 | pmulhuw = 0xE4, pmulhw = 0xE5, pmullw = 0xD5, 1911 | pmuludq = 0xF4, por = 0xEB, psadbw = 0xF6, psubb = 0xF8, 1912 | psubw = 0xF9, psubd = 0xFA, psubq = 0xFB, psubsb = 0xE8, 1913 | psubsw = 0xE9, psubusb = 0xD8, psubusw = 0xD9, 1914 | punpckhbw = 0x68, punpckhwd = 0x69, punpckhdq = 0x6A, 1915 | punpckhqdq = 0x6D, punpcklbw = 0x60, punpcklwd = 0x61, 1916 | punpckldq = 0x62, punpcklqdq = 0x6C, pxor = 0xEF 1917 | } do 1918 | map_op[name.."_2"] = format("rmo:660F%02XrM", n) 1919 | map_op["v"..name.."_3"] = format("rrmoy:660FV%02XrM", n) 1920 | end 1921 | 1922 | ------------------------------------------------------------------------------ 1923 | 1924 | local map_vexarg = { u = false, v = 1, V = 2 } 1925 | 1926 | -- Process pattern string. 1927 | local function dopattern(pat, args, sz, op, needrex) 1928 | local digit, addin, vex 1929 | local opcode = 0 1930 | local szov = sz 1931 | local narg = 1 1932 | local rex = 0 1933 | 1934 | -- Limit number of section buffer positions used by a single dasm_put(). 1935 | -- A single opcode needs a maximum of 6 positions. 1936 | if secpos+6 > maxsecpos then wflush() end 1937 | 1938 | -- Process each character. 1939 | for c in gmatch(pat.."|", ".") do 1940 | if match(c, "%x") then -- Hex digit. 1941 | digit = byte(c) - 48 1942 | if digit > 48 then digit = digit - 39 1943 | elseif digit > 16 then digit = digit - 7 end 1944 | opcode = opcode*16 + digit 1945 | addin = nil 1946 | elseif c == "n" then -- Disable operand size mods for opcode. 1947 | szov = nil 1948 | elseif c == "X" then -- Force REX.W. 1949 | rex = 8 1950 | elseif c == "L" then -- Force VEX.L. 1951 | vex.l = true 1952 | elseif c == "r" then -- Merge 1st operand regno. into opcode. 1953 | addin = args[1]; opcode = opcode + (addin.reg % 8) 1954 | if narg < 2 then narg = 2 end 1955 | elseif c == "R" then -- Merge 2nd operand regno. into opcode. 1956 | addin = args[2]; opcode = opcode + (addin.reg % 8) 1957 | narg = 3 1958 | elseif c == "m" or c == "M" then -- Encode ModRM/SIB. 1959 | local s 1960 | if addin then 1961 | s = addin.reg 1962 | opcode = opcode - band(s, 7) -- Undo regno opcode merge. 1963 | else 1964 | s = band(opcode, 15) -- Undo last digit. 1965 | opcode = shr(opcode, 4) 1966 | end 1967 | local nn = c == "m" and 1 or 2 1968 | local t = args[nn] 1969 | if narg <= nn then narg = nn + 1 end 1970 | if szov == "q" and rex == 0 then rex = rex + 8 end 1971 | if t.reg and t.reg > 7 then rex = rex + 1 end 1972 | if t.xreg and t.xreg > 7 then rex = rex + 2 end 1973 | if s > 7 then rex = rex + 4 end 1974 | if needrex then rex = rex + 16 end 1975 | local psz, sk = wputop(szov, opcode, rex, vex, s < 0, t.vreg or t.vxreg) 1976 | opcode = nil 1977 | local imark = sub(pat, -1) -- Force a mark (ugly). 1978 | -- Put ModRM/SIB with regno/last digit as spare. 1979 | wputmrmsib(t, imark, s, addin and addin.vreg, psz, sk) 1980 | addin = nil 1981 | elseif map_vexarg[c] ~= nil then -- Encode using VEX prefix 1982 | local b = band(opcode, 255); opcode = shr(opcode, 8) 1983 | local m = 1 1984 | if b == 0x38 then m = 2 1985 | elseif b == 0x3a then m = 3 end 1986 | if m ~= 1 then b = band(opcode, 255); opcode = shr(opcode, 8) end 1987 | if b ~= 0x0f then 1988 | werror("expected `0F', `0F38', or `0F3A' to precede `"..c.. 1989 | "' in pattern `"..pat.."' for `"..op.."'") 1990 | end 1991 | local v = map_vexarg[c] 1992 | if v then v = remove(args, v) end 1993 | b = band(opcode, 255) 1994 | local p = 0 1995 | if b == 0x66 then p = 1 1996 | elseif b == 0xf3 then p = 2 1997 | elseif b == 0xf2 then p = 3 end 1998 | if p ~= 0 then opcode = shr(opcode, 8) end 1999 | if opcode ~= 0 then wputop(nil, opcode, 0); opcode = 0 end 2000 | vex = { m = m, p = p, v = v } 2001 | else 2002 | if opcode then -- Flush opcode. 2003 | if szov == "q" and rex == 0 then rex = rex + 8 end 2004 | if needrex then rex = rex + 16 end 2005 | if addin and addin.reg == -1 then 2006 | local psz, sk = wputop(szov, opcode - 7, rex, vex, true) 2007 | wvreg("opcode", addin.vreg, psz, sk) 2008 | else 2009 | if addin and addin.reg > 7 then rex = rex + 1 end 2010 | wputop(szov, opcode, rex, vex) 2011 | end 2012 | opcode = nil 2013 | end 2014 | if c == "|" then break end 2015 | if c == "o" then -- Offset (pure 32 bit displacement). 2016 | wputdarg(args[1].disp); if narg < 2 then narg = 2 end 2017 | elseif c == "O" then 2018 | wputdarg(args[2].disp); narg = 3 2019 | else 2020 | -- Anything else is an immediate operand. 2021 | local a = args[narg] 2022 | narg = narg + 1 2023 | local mode, imm = a.mode, a.imm 2024 | if mode == "iJ" and not match("iIJ", c) then 2025 | werror("bad operand size for label") 2026 | end 2027 | if c == "S" then 2028 | wputsbarg(imm) 2029 | elseif c == "U" then 2030 | wputbarg(imm) 2031 | elseif c == "W" then 2032 | wputwarg(imm) 2033 | elseif c == "i" or c == "I" then 2034 | if mode == "iJ" then 2035 | wputlabel("IMM_", imm, 1) 2036 | elseif mode == "iI" and c == "I" then 2037 | waction(sz == "w" and "IMM_WB" or "IMM_DB", imm) 2038 | else 2039 | wputszarg(sz, imm) 2040 | end 2041 | elseif c == "J" then 2042 | if mode == "iPJ" then 2043 | waction("REL_A", imm) -- !x64 (secpos) 2044 | else 2045 | wputlabel("REL_", imm, 2) 2046 | end 2047 | elseif c == "s" then 2048 | local reg = a.reg 2049 | if reg < 0 then 2050 | wputb(0) 2051 | wvreg("imm.hi", a.vreg) 2052 | else 2053 | wputb(shl(reg, 4)) 2054 | end 2055 | else 2056 | werror("bad char `"..c.."' in pattern `"..pat.."' for `"..op.."'") 2057 | end 2058 | end 2059 | end 2060 | end 2061 | end 2062 | 2063 | ------------------------------------------------------------------------------ 2064 | 2065 | -- Mapping of operand modes to short names. Suppress output with '#'. 2066 | local map_modename = { 2067 | r = "reg", R = "eax", C = "cl", x = "mem", m = "mrm", i = "imm", 2068 | f = "stx", F = "st0", J = "lbl", ["1"] = "1", 2069 | I = "#", S = "#", O = "#", 2070 | } 2071 | 2072 | -- Return a table/string showing all possible operand modes. 2073 | local function templatehelp(template, nparams) 2074 | if nparams == 0 then return "" end 2075 | local t = {} 2076 | for tm in gmatch(template, "[^%|]+") do 2077 | local s = map_modename[sub(tm, 1, 1)] 2078 | s = s..gsub(sub(tm, 2, nparams), ".", function(c) 2079 | return ", "..map_modename[c] 2080 | end) 2081 | if not match(s, "#") then t[#t+1] = s end 2082 | end 2083 | return t 2084 | end 2085 | 2086 | -- Match operand modes against mode match part of template. 2087 | local function matchtm(tm, args) 2088 | for i=1,#args do 2089 | if not match(args[i].mode, sub(tm, i, i)) then return end 2090 | end 2091 | return true 2092 | end 2093 | 2094 | -- Handle opcodes defined with template strings. 2095 | map_op[".template__"] = function(params, template, nparams) 2096 | if not params then return templatehelp(template, nparams) end 2097 | local args = {} 2098 | 2099 | -- Zero-operand opcodes have no match part. 2100 | if #params == 0 then 2101 | dopattern(template, args, "d", params.op, nil) 2102 | return 2103 | end 2104 | 2105 | -- Determine common operand size (coerce undefined size) or flag as mixed. 2106 | local sz, szmix, needrex 2107 | for i,p in ipairs(params) do 2108 | args[i] = parseoperand(p) 2109 | local nsz = args[i].opsize 2110 | if nsz then 2111 | if sz and sz ~= nsz then szmix = true else sz = nsz end 2112 | end 2113 | local nrex = args[i].needrex 2114 | if nrex ~= nil then 2115 | if needrex == nil then 2116 | needrex = nrex 2117 | elseif needrex ~= nrex then 2118 | werror("bad mix of byte-addressable registers") 2119 | end 2120 | end 2121 | end 2122 | 2123 | -- Try all match:pattern pairs (separated by '|'). 2124 | local gotmatch, lastpat 2125 | for tm in gmatch(template, "[^%|]+") do 2126 | -- Split off size match (starts after mode match) and pattern string. 2127 | local szm, pat = match(tm, "^(.-):(.*)$", #args+1) 2128 | if pat == "" then pat = lastpat else lastpat = pat end 2129 | if matchtm(tm, args) then 2130 | local prefix = sub(szm, 1, 1) 2131 | if prefix == "/" then -- Exactly match leading operand sizes. 2132 | for i = #szm,1,-1 do 2133 | if i == 1 then 2134 | dopattern(pat, args, sz, params.op, needrex) -- Process pattern. 2135 | return 2136 | elseif args[i-1].opsize ~= sub(szm, i, i) then 2137 | break 2138 | end 2139 | end 2140 | else -- Match common operand size. 2141 | local szp = sz 2142 | if szm == "" then szm = x64 and "qdwb" or "dwb" end -- Default sizes. 2143 | if prefix == "1" then szp = args[1].opsize; szmix = nil 2144 | elseif prefix == "2" then szp = args[2].opsize; szmix = nil end 2145 | if not szmix and (prefix == "." or match(szm, szp or "#")) then 2146 | dopattern(pat, args, szp, params.op, needrex) -- Process pattern. 2147 | return 2148 | end 2149 | end 2150 | gotmatch = true 2151 | end 2152 | end 2153 | 2154 | local msg = "bad operand mode" 2155 | if gotmatch then 2156 | if szmix then 2157 | msg = "mixed operand size" 2158 | else 2159 | msg = sz and "bad operand size" or "missing operand size" 2160 | end 2161 | end 2162 | 2163 | werror(msg.." in `"..opmodestr(params.op, args).."'") 2164 | end 2165 | 2166 | ------------------------------------------------------------------------------ 2167 | 2168 | -- x64-specific opcode for 64 bit immediates and displacements. 2169 | if x64 then 2170 | function map_op.mov64_2(params) 2171 | if not params then return { "reg, imm", "reg, [disp]", "[disp], reg" } end 2172 | if secpos+2 > maxsecpos then wflush() end 2173 | local opcode, op64, sz, rex, vreg 2174 | local op64 = match(params[1], "^%[%s*(.-)%s*%]$") 2175 | if op64 then 2176 | local a = parseoperand(params[2]) 2177 | if a.mode ~= "rmR" then werror("bad operand mode") end 2178 | sz = a.opsize 2179 | rex = sz == "q" and 8 or 0 2180 | opcode = 0xa3 2181 | else 2182 | op64 = match(params[2], "^%[%s*(.-)%s*%]$") 2183 | local a = parseoperand(params[1]) 2184 | if op64 then 2185 | if a.mode ~= "rmR" then werror("bad operand mode") end 2186 | sz = a.opsize 2187 | rex = sz == "q" and 8 or 0 2188 | opcode = 0xa1 2189 | else 2190 | if sub(a.mode, 1, 1) ~= "r" or a.opsize ~= "q" then 2191 | werror("bad operand mode") 2192 | end 2193 | op64 = params[2] 2194 | if a.reg == -1 then 2195 | vreg = a.vreg 2196 | opcode = 0xb8 2197 | else 2198 | opcode = 0xb8 + band(a.reg, 7) 2199 | end 2200 | rex = a.reg > 7 and 9 or 8 2201 | end 2202 | end 2203 | local psz, sk = wputop(sz, opcode, rex, nil, vreg) 2204 | wvreg("opcode", vreg, psz, sk) 2205 | if luamode then 2206 | waction("IMM_D", format("ffi.cast(\"uintptr_t\", %s) %% 2^32", op64)) 2207 | waction("IMM_D", format("ffi.cast(\"uintptr_t\", %s) / 2^32", op64)) 2208 | else 2209 | waction("IMM_D", format("(unsigned int)(%s)", op64)) 2210 | waction("IMM_D", format("(unsigned int)((%s)>>32)", op64)) 2211 | end 2212 | end 2213 | end 2214 | 2215 | ------------------------------------------------------------------------------ 2216 | 2217 | -- Pseudo-opcodes for data storage. 2218 | local function op_data(params) 2219 | if not params then return "imm..." end 2220 | local sz = sub(params.op, 2, 2) 2221 | if sz == "a" then sz = addrsize end 2222 | for _,p in ipairs(params) do 2223 | local a = parseoperand(p) 2224 | if sub(a.mode, 1, 1) ~= "i" or (a.opsize and a.opsize ~= sz) then 2225 | werror("bad mode or size in `"..p.."'") 2226 | end 2227 | if a.mode == "iJ" then 2228 | wputlabel("IMM_", a.imm, 1) 2229 | else 2230 | wputszarg(sz, a.imm) 2231 | end 2232 | if secpos+2 > maxsecpos then wflush() end 2233 | end 2234 | end 2235 | 2236 | map_op[".byte_*"] = op_data 2237 | map_op[".sbyte_*"] = op_data 2238 | map_op[".word_*"] = op_data 2239 | map_op[".dword_*"] = op_data 2240 | map_op[".aword_*"] = op_data 2241 | 2242 | ------------------------------------------------------------------------------ 2243 | 2244 | -- Pseudo-opcode to mark the position where the action list is to be emitted. 2245 | map_op[".actionlist_1"] = function(params) 2246 | if not params then return "cvar" end 2247 | local name = params[1] -- No syntax check. You get to keep the pieces. 2248 | wline(function(out) writeactions(out, name) end) 2249 | end 2250 | 2251 | -- Pseudo-opcode to mark the position where the global enum is to be emitted. 2252 | map_op[".globals_1"] = function(params) 2253 | if not params then return "prefix" end 2254 | local prefix = params[1] -- No syntax check. You get to keep the pieces. 2255 | wline(function(out) writeglobals(out, prefix) end) 2256 | end 2257 | 2258 | -- Pseudo-opcode to mark the position where the global names are to be emitted. 2259 | map_op[".globalnames_1"] = function(params) 2260 | if not params then return "cvar" end 2261 | local name = params[1] -- No syntax check. You get to keep the pieces. 2262 | wline(function(out) writeglobalnames(out, name) end) 2263 | end 2264 | 2265 | -- Pseudo-opcode to mark the position where the extern names are to be emitted. 2266 | map_op[".externnames_1"] = function(params) 2267 | if not params then return "cvar" end 2268 | local name = params[1] -- No syntax check. You get to keep the pieces. 2269 | wline(function(out) writeexternnames(out, name) end) 2270 | end 2271 | 2272 | ------------------------------------------------------------------------------ 2273 | 2274 | -- Label pseudo-opcode (converted from trailing colon form). 2275 | map_op[".label_2"] = function(params) 2276 | if not params then return "[1-9] | ->global | =>pcexpr [, addr]" end 2277 | if secpos+2 > maxsecpos then wflush() end 2278 | local a = parseoperand(params[1]) 2279 | local mode, imm = a.mode, a.imm 2280 | if type(imm) == "number" and (mode == "iJ" or (imm >= 1 and imm <= 9)) then 2281 | -- Local label (1: ... 9:) or global label (->global:). 2282 | waction("LABEL_LG", nil, 1) 2283 | wputxb(imm) 2284 | elseif mode == "iJ" then 2285 | -- PC label (=>pcexpr:). 2286 | waction("LABEL_PC", imm) 2287 | else 2288 | werror("bad label definition") 2289 | end 2290 | -- SETLABEL must immediately follow LABEL_LG/LABEL_PC. 2291 | local addr = params[2] 2292 | if addr then 2293 | local a = parseoperand(addr) 2294 | if a.mode == "iPJ" then 2295 | waction("SETLABEL", a.imm) 2296 | else 2297 | werror("bad label assignment") 2298 | end 2299 | end 2300 | end 2301 | map_op[".label_1"] = map_op[".label_2"] 2302 | 2303 | ------------------------------------------------------------------------------ 2304 | 2305 | -- Alignment pseudo-opcode. 2306 | map_op[".align_1"] = function(params) 2307 | if not params then return "numpow2" end 2308 | if secpos+1 > maxsecpos then wflush() end 2309 | local align = tonumber(params[1]) or map_opsizenum[map_opsize[params[1]]] 2310 | if align then 2311 | local x = align 2312 | -- Must be a power of 2 in the range (2 ... 256). 2313 | for i=1,8 do 2314 | x = x / 2 2315 | if x == 1 then 2316 | waction("ALIGN", nil, 1) 2317 | wputxb(align-1) -- Action byte is 2**n-1. 2318 | return 2319 | end 2320 | end 2321 | end 2322 | werror("bad alignment") 2323 | end 2324 | 2325 | -- Spacing pseudo-opcode. 2326 | map_op[".space_2"] = function(params) 2327 | if not params then return "num [, filler]" end 2328 | if secpos+1 > maxsecpos then wflush() end 2329 | waction("SPACE", params[1]) 2330 | local fill = params[2] 2331 | if fill then 2332 | fill = tonumber(fill) 2333 | if not fill or fill < 0 or fill > 255 then werror("bad filler") end 2334 | end 2335 | wputxb(fill or 0) 2336 | end 2337 | map_op[".space_1"] = map_op[".space_2"] 2338 | 2339 | ------------------------------------------------------------------------------ 2340 | 2341 | -- Pseudo-opcode for (primitive) type definitions (map to C types). 2342 | map_op[".type_3"] = function(params, nparams) 2343 | if not params then 2344 | return nparams == 2 and "name, ctype" or "name, ctype, reg" 2345 | end 2346 | local name, ctype, reg = params[1], params[2], params[3] 2347 | if not match(name, "^[%a_][%w_]*$") then 2348 | werror("bad type name `"..name.."'") 2349 | end 2350 | local tp = map_type[name] 2351 | if tp then 2352 | werror("duplicate type `"..name.."'") 2353 | end 2354 | if reg and not map_reg_valid_base[reg] then 2355 | werror("bad base register `"..(map_reg_rev[reg] or reg).."'") 2356 | end 2357 | -- Add #type to current defines table. 2358 | g_map_def["#"..name] = luamode and "ffi.sizeof(\""..ctype.."\")" or "sizeof("..ctype..")" 2359 | -- Add new type and emit shortcut define. 2360 | local num = ctypenum + 1 2361 | local ctypefmt 2362 | if luamode then 2363 | ctypefmt = function(tailr) 2364 | local index, field 2365 | index, field = match(tailr, "^(%b[])(.*)") 2366 | index = index and sub(index, 2, -2) 2367 | field = field or tailr 2368 | field = match(field, "^%->(.*)") or match(field, "^%.(.*)") 2369 | if not (index or field) then 2370 | werror("invalid syntax `"..tailr.."`") 2371 | end 2372 | local Da = index and format("Da%X(%s)", num, index) 2373 | local Dt = field and format("Dt%X(\"%s\")", num, field) 2374 | return Da and Dt and Da.."+"..Dt or Da or Dt 2375 | end 2376 | else 2377 | ctypefmt = format("Dt%X(%%s)", num) 2378 | end 2379 | map_type[name] = { 2380 | ctype = ctype, 2381 | ctypefmt = ctypefmt, 2382 | reg = reg, 2383 | } 2384 | if luamode then 2385 | wline(format("local Dt%X; do local ct=ffi.typeof(\"%s\"); function Dt%X(f) return ffi.offsetof(ct,f) or error(string.format(\"'struct %s' has no member named '%%s'\", f)) end; end", 2386 | num, ctype, num, ctype)) 2387 | wline(format("local Da%X; do local sz=ffi.sizeof(\"%s\"); function Da%X(i) return i*sz end; end", 2388 | num, ctype, num)) 2389 | else 2390 | wline(format("#define Dt%X(_V) (int)(ptrdiff_t)&(((%s *)0)_V)", num, ctype)) 2391 | end 2392 | ctypenum = num 2393 | end 2394 | map_op[".type_2"] = map_op[".type_3"] 2395 | 2396 | -- Dump type definitions. 2397 | local function dumptypes(out, lvl) 2398 | local t = {} 2399 | for name in pairs(map_type) do t[#t+1] = name end 2400 | sort(t) 2401 | out:write("Type definitions:\n") 2402 | for _,name in ipairs(t) do 2403 | local tp = map_type[name] 2404 | local reg = tp.reg and map_reg_rev[tp.reg] or "" 2405 | out:write(format(" %-20s %-20s %s\n", name, tp.ctype, reg)) 2406 | end 2407 | out:write("\n") 2408 | end 2409 | 2410 | ------------------------------------------------------------------------------ 2411 | 2412 | -- Set the current section. 2413 | function _M.section(num) 2414 | waction("SECTION") 2415 | wputxb(num) 2416 | wflush(true) -- SECTION is a terminal action. 2417 | end 2418 | 2419 | ------------------------------------------------------------------------------ 2420 | 2421 | -- Dump architecture description. 2422 | function _M.dumparch(out) 2423 | out:write(format("DynASM %s version %s, released %s\n\n", 2424 | _info.arch, _info.version, _info.release)) 2425 | dumpregs(out) 2426 | dumpactions(out) 2427 | end 2428 | 2429 | -- Dump all user defined elements. 2430 | function _M.dumpdef(out, lvl) 2431 | dumptypes(out, lvl) 2432 | dumpglobals(out, lvl) 2433 | dumpexterns(out, lvl) 2434 | end 2435 | 2436 | ------------------------------------------------------------------------------ 2437 | 2438 | -- Pass callbacks from/to the DynASM core. 2439 | function _M.passcb(wl, we, wf, ww) 2440 | wline, werror, wfatal, wwarn = wl, we, wf, ww 2441 | return wflush 2442 | end 2443 | 2444 | -- Setup the arch-specific module. 2445 | function _M.setup(arch, opt) 2446 | g_arch, g_opt = arch, opt 2447 | luamode = g_opt.lang == "lua" 2448 | init_actionlist() 2449 | init_map_global() 2450 | init_map_extern() 2451 | init_map_type() 2452 | end 2453 | 2454 | -- Merge the core maps and the arch-specific maps. 2455 | function _M.mergemaps(map_coreop, map_def) 2456 | setmetatable(map_op, { __index = map_coreop }) 2457 | setmetatable(map_def, { __index = map_archdef }) 2458 | -- Hold a ref. to map_def to store `#type` defines in. 2459 | g_map_def = map_def 2460 | return map_op, map_def 2461 | end 2462 | 2463 | return _M 2464 | 2465 | ------------------------------------------------------------------------------ 2466 | 2467 | --------------------------------------------------------------------------------