├── .build.yml ├── .github └── workflows │ └── ci.yml ├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── decode-test.c ├── decode.c ├── encode-test.c ├── encode-test.inc ├── encode.c ├── encode2-test.c ├── encode2-test.cc ├── encode2.c ├── fadec-enc.h ├── fadec-enc2.h ├── fadec.h ├── format.c ├── instrs.txt ├── meson.build ├── meson_options.txt └── parseinstrs.py /.build.yml: -------------------------------------------------------------------------------- 1 | image: alpine/edge 2 | sources: 3 | - https://git.sr.ht/~aengelke/fadec 4 | packages: 5 | - meson 6 | tasks: 7 | - build: | 8 | mkdir fadec-build1 9 | meson fadec-build1 fadec 10 | ninja -C fadec-build1 11 | ninja -C fadec-build1 test 12 | # Complete test with encode2 API. 13 | mkdir fadec-build2 14 | meson fadec-build2 fadec -Dwith_encode2=true 15 | ninja -C fadec-build2 16 | ninja -C fadec-build2 test 17 | -------------------------------------------------------------------------------- /.github/workflows/ci.yml: -------------------------------------------------------------------------------- 1 | name: CI 2 | 3 | on: [push] 4 | 5 | jobs: 6 | build-linux: 7 | runs-on: ubuntu-latest 8 | steps: 9 | - uses: actions/checkout@v4 10 | - name: Install dependencies 11 | run: sudo apt install -y ninja-build meson 12 | - name: Configure 13 | run: mkdir build; CC=clang CXX=clang++ meson -Dbuildtype=debugoptimized -Dwith_encode2=true build 14 | - name: Build 15 | run: ninja -v -C build 16 | - name: Test 17 | run: meson test -v -C build 18 | build-linux-cmake: 19 | runs-on: ubuntu-latest 20 | steps: 21 | - uses: actions/checkout@v4 22 | - name: Install dependencies 23 | run: sudo apt install -y ninja-build cmake 24 | - name: Configure 25 | run: CC=clang CXX=clang++ cmake -B build -G Ninja -DFADEC_ENCODE2=ON 26 | - name: Build 27 | run: cmake --build build -v 28 | - name: Test 29 | run: ctest --test-dir build -V 30 | build-windows: 31 | runs-on: windows-latest 32 | steps: 33 | - uses: actions/checkout@v4 34 | - name: Install dependencies 35 | run: pip install ninja meson 36 | - name: Configure 37 | run: mkdir build; meson setup --vsenv -Dbuildtype=debugoptimized -Dwith_encode2=true build 38 | - name: Build 39 | run: meson compile -v -C build 40 | - name: Test 41 | run: meson test -v -C build 42 | build-windows-cmake: 43 | runs-on: windows-latest 44 | steps: 45 | - uses: actions/checkout@v4 46 | - name: Configure 47 | run: cmake -B build -DFADEC_ENCODE2=ON 48 | - name: Build 49 | run: cmake --build build -v 50 | - name: Test 51 | run: ctest --test-dir build -V -C Debug 52 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /build/ 2 | -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.23) 2 | 3 | project(fadec LANGUAGES C) 4 | enable_testing() 5 | 6 | # TODO: make this actually optional 7 | enable_language(CXX OPTIONAL) 8 | 9 | # Options 10 | set(FADEC_ARCHMODE "both" CACHE STRING "Support only 32-bit x86, 64-bit x86 or both") 11 | set_property(CACHE FADEC_ARCHMODE PROPERTY STRINGS both only32 only64) 12 | 13 | option(FADEC_UNDOC "Include undocumented instructions" FALSE) 14 | option(FADEC_DECODE "Include support for decoding" TRUE) 15 | option(FADEC_ENCODE "Include support for encoding" TRUE) 16 | option(FADEC_ENCODE2 "Include support for new encoding API" FALSE) 17 | 18 | set(CMAKE_C_STANDARD 11) 19 | 20 | if (MSVC) 21 | add_compile_options(/W4 -D_CRT_SECURE_NO_WARNINGS /wd4018 /wd4146 /wd4244 /wd4245 /wd4267 /wd4310) 22 | add_compile_options($<$:-Zc:preprocessor>) 23 | else() 24 | add_compile_options(-Wall -Wextra -Wpedantic -Wno-overlength-strings) 25 | endif() 26 | 27 | find_package(Python3 3.6 REQUIRED) 28 | 29 | add_library(fadec) 30 | add_library(fadec::fadec ALIAS fadec) 31 | set_target_properties(fadec PROPERTIES 32 | LINKER_LANGUAGE C 33 | ) 34 | 35 | set(GEN_ARGS "") 36 | if (NOT FADEC_ARCHMODE STREQUAL "only64") 37 | list(APPEND GEN_ARGS "--32") 38 | endif () 39 | if (NOT FADEC_ARCHMODE STREQUAL "only32") 40 | list(APPEND GEN_ARGS "--64") 41 | endif () 42 | if (FADEC_UNDOC) 43 | list(APPEND GEN_ARGS "--with-undoc") 44 | endif () 45 | 46 | file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/include") 47 | 48 | function(fadec_component) 49 | cmake_parse_arguments(ARG "" "NAME" "HEADERS;SOURCES" ${ARGN}) 50 | 51 | set(PRIV_INC ${CMAKE_CURRENT_BINARY_DIR}/include/fadec-${ARG_NAME}-private.inc) 52 | set(PUB_INC ${CMAKE_CURRENT_BINARY_DIR}/include/fadec-${ARG_NAME}-public.inc) 53 | 54 | add_custom_command( 55 | OUTPUT ${PRIV_INC} ${PUB_INC} 56 | COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/parseinstrs.py ${ARG_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/instrs.txt 57 | ${PUB_INC} ${PRIV_INC} ${GEN_ARGS} 58 | DEPENDS instrs.txt parseinstrs.py 59 | COMMENT "Building table for ${ARG_NAME}" 60 | ) 61 | 62 | list(APPEND FADEC_HEADERS ${PUB_INC}) 63 | target_sources(fadec PRIVATE 64 | ${ARG_SOURCES} 65 | 66 | PUBLIC 67 | FILE_SET HEADERS 68 | BASE_DIRS . 69 | FILES 70 | ${ARG_HEADERS} 71 | 72 | PUBLIC 73 | FILE_SET generated_public TYPE HEADERS 74 | BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/include 75 | FILES 76 | ${PUB_INC} 77 | 78 | PRIVATE 79 | FILE_SET generated_private TYPE HEADERS 80 | BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/include 81 | FILES 82 | ${PRIV_INC} 83 | ) 84 | 85 | add_executable(fadec-${ARG_NAME}-test ${ARG_NAME}-test.c) 86 | target_link_libraries(fadec-${ARG_NAME}-test PRIVATE fadec) 87 | add_test(NAME ${ARG_NAME} COMMAND fadec-${ARG_NAME}-test) 88 | 89 | if (CMAKE_CXX_COMPILER AND ${ARG_NAME} STREQUAL "encode2") 90 | add_executable(fadec-${ARG_NAME}-test-cpp ${ARG_NAME}-test.cc) 91 | target_link_libraries(fadec-${ARG_NAME}-test-cpp PRIVATE fadec) 92 | add_test(NAME ${ARG_NAME}-cpp COMMAND fadec-${ARG_NAME}-test-cpp) 93 | endif() 94 | endfunction() 95 | 96 | if (FADEC_DECODE) 97 | fadec_component(NAME decode SOURCES decode.c format.c HEADERS fadec.h) 98 | endif () 99 | if (FADEC_ENCODE) 100 | fadec_component(NAME encode SOURCES encode.c HEADERS fadec-enc.h) 101 | endif () 102 | if (FADEC_ENCODE2) 103 | fadec_component(NAME encode2 SOURCES encode2.c HEADERS fadec-enc2.h) 104 | endif () 105 | 106 | install(TARGETS fadec EXPORT fadec 107 | LIBRARY 108 | ARCHIVE 109 | FILE_SET HEADERS FILE_SET generated_public) 110 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2018, Alexis Engelke 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without 5 | modification, are permitted provided that the following conditions are met: 6 | 7 | 1. Redistributions of source code must retain the above copyright notice, 8 | this list of conditions and the following disclaimer. 9 | 10 | 2. Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | 3. Neither the name of the copyright holder nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 | POSSIBILITY OF SUCH DAMAGE. 29 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Fadec — Fast Decoder for x86-32 and x86-64 and Encoder for x86-64 2 | 3 | Fadec is a fast and lightweight decoder for x86-32 and x86-64. To meet the goal of speed, lookup tables are used to map the opcode the (internal) description of the instruction encoding. This table currently has a size of roughly 37 kiB (for 32/64-bit combined). 4 | 5 | Fadec-Enc (or Faenc) is a small, lightweight and easy-to-use encoder, currently for x86-64 only. 6 | 7 | ## Key features 8 | 9 | > **Q: Why not just use any other decoding/encoding library available out there?** 10 | > 11 | > A: I needed to embed a small and fast decoder in a project for a freestanding environment (i.e., no libc). Further, only very few plain encoding libraries are available for x86-64; and most of them are large or make heavy use of external dependencies. 12 | 13 | - **Small size:** the entire library with the x86-64/32 decoder and the x86-64 encoder are only 95 kiB; for specific use cases, the size can be reduced even further (e.g., by dropping AVX-512). The main decode/encode routines are only a few hundreds lines of code. 14 | - **Performance:** Fadec is significantly faster than libopcodes, Capstone, or Zydis due to the absence of high-level abstractions and the small lookup table. 15 | - **Zero dependencies:** the entire library has no dependencies, even on the standard library, making it suitable for freestanding environments without a full libc or `malloc`-style memory allocation. 16 | - **Correctness:** even corner cases should be handled correctly (if not, that's a bug), e.g., the order of prefixes, immediate sizes of jump instructions, the presence of the `lock` prefix, or properly handling VEX.W in 32-bit mode. 17 | 18 | All components of this library target the Intel 64 implementations of x86. While AMD64 is _mostly similar_, there are some minor differences (e.g. operand sizes for jump instructions, more instructions, `cr8` can be accessed with `lock` prefix, `f34190` is `xchg`, not `pause`) which are currently not handled. 19 | 20 | ## Decoder Usage 21 | 22 | ### Example 23 | ```c 24 | uint8_t buffer[] = {0x49, 0x90}; 25 | FdInstr instr; 26 | // Decode from buffer into instr in 64-bit mode. 27 | int ret = fd_decode(buffer, sizeof(buffer), 64, 0, &instr); 28 | // ret<0 indicates an error, ret>0 the number of decoded bytes 29 | // Relevant properties of instructions can now be queried using the FD_* macros. 30 | // Or, we can format the instruction to a string buffer: 31 | char fmtbuf[64]; 32 | fd_format(&instr, fmtbuf, sizeof(fmtbuf)); 33 | // fmtbuf now reads: "xchg r8, rax" 34 | ``` 35 | 36 | ### API 37 | 38 | The API consists of two functions to decode and format instructions, as well as several accessor macros. A full documentation can be found in [fadec.h](fadec.h). Direct access of any structure fields is not recommended. 39 | 40 | - `int fd_decode(const uint8_t* buf, size_t len, int mode, uintptr_t address, FdInstr* out_instr)` 41 | - Decode a single instruction. For internal performance reasons, note that: 42 | - The decoded operand sizes are not always exact. However, the exact size can be reconstructed in all cases. 43 | - An implicit `fwait` in FPU instructions is decoded as a separate instruction (matching the opcode layout in machine code). For example, `finit` is decoded as `FD_FWAIT` + `FD_FINIT` 44 | - Return value: number of bytes used, or a negative value in case of an error. 45 | - `buf`/`len`: buffer containing instruction bytes. At most 15 bytes will be read. If the instruction is longer than `len`, an error value is returned. 46 | - `mode`: architecture mode, either `32` or `64`. 47 | - `address`: set to `0`. (Obsolete use: virtual address of the decoded instruction.) 48 | - `out_instr`: Pointer to the instruction buffer, might get written partially in case of an error. 49 | - `void fd_format(const FdInstr* instr, char* buf, size_t len)` 50 | - Format a single instruction to a human-readable format. 51 | - `instr`: decoded instruction. 52 | - `buf`/`len`: buffer for formatted instruction string 53 | - Various accessor macros: see [fadec.h](fadec.h). 54 | 55 | ## Encoder Usage 56 | 57 | The encoder has two API variants: "v1" has a single entry point (`fe_enc64`) and the instruction is specified as integer parameter. "v2" has one entry point per instruction. v2 is currently about 3x faster than v1, but also has much larger code size (v1: <10 kiB; v2: ~3 MiB) and takes much longer to compile. It is therefore off by default and can be enabled by passing `-Dwith_encode2=true` to Meson. Both variants are supported. 58 | 59 | ### Example (API v1) 60 | 61 | ```c 62 | int failed = 0; 63 | uint8_t buf[64]; 64 | uint8_t* cur = buf; 65 | 66 | // xor eax, eax 67 | failed |= fe_enc64(&cur, FE_XOR32rr, FE_AX, FE_AX); 68 | // movzx ecx, byte ptr [rdi + 1*rax + 0] 69 | failed |= fe_enc64(&cur, FE_MOVZXr32m8, FE_CX, FE_MEM(FE_DI, 1, FE_AX, 0)); 70 | // test ecx, ecx 71 | failed |= fe_enc64(&cur, FE_TEST32rr, FE_CX, FE_CX); 72 | // jnz $ 73 | // This will be replaced later; FE_JMPL enforces use of longest offset 74 | uint8_t* fwd_jmp = cur; 75 | failed |= fe_enc64(&cur, FE_JNZ|FE_JMPL, (intptr_t) cur); 76 | uint8_t* loop_tgt = cur; 77 | // add rax, rcx 78 | failed |= fe_enc64(&cur, FE_ADD64rr, FE_AX, FE_CX); 79 | // sub ecx, 1 80 | failed |= fe_enc64(&cur, FE_SUB32ri, FE_CX, 1); 81 | // jnz loop_tgt 82 | failed |= fe_enc64(&cur, FE_JNZ, (intptr_t) loop_tgt); 83 | // Update previous jump to jump here. Note that we _must_ specify FE_JMPL too. 84 | failed |= fe_enc64(&fwd_jmp, FE_JNZ|FE_JMPL, (intptr_t) cur); 85 | // ret 86 | failed |= fe_enc64(&cur, FE_RET); 87 | // cur now points to the end of the buffer, failed indicates any failures. 88 | ``` 89 | 90 | ### Example (API v2) 91 | 92 | ```c 93 | uint8_t buf[64]; 94 | uint8_t* cur = buf; 95 | 96 | // xor eax, eax 97 | cur += fe64_XOR32rr(cur, 0, FE_AX, FE_AX); 98 | // movzx ecx, byte ptr [rdi + 1*rax + 0] 99 | cur += fe64_MOVZXr32m8(cur, 0, FE_CX, FE_MEM(FE_DI, 1, FE_AX, 0)); 100 | // test ecx, ecx 101 | cur += fe64_TEST32rr(cur, 0, FE_CX, FE_CX); 102 | // jnz $ 103 | // This will be replaced later; FE_JMPL enforces use of longest offset 104 | uint8_t* fwd_jmp = cur; 105 | cur += fe64_JNZ(cur, FE_JMPL, cur); 106 | uint8_t* loop_tgt = cur; 107 | // add rax, rcx 108 | cur += fe64_ADD64rr(cur, 0, FE_AX, FE_CX); 109 | // sub ecx, 1 110 | cur += fe64_SUB32ri(cur, 0, FE_CX, 1); 111 | // jnz loop_tgt 112 | cur += fe64_JNZ(cur, 0, loop_tgt); 113 | // Update previous jump to jump here. Note that we _must_ specify FE_JMPL too. 114 | fe64_JNZ(fwd_jmp, FE_JMPL, cur); 115 | // ret 116 | cur += fe64_RET(cur, 0); 117 | // cur now points to the end of the buffer 118 | // errors are ignored, this example should not cause any :-) 119 | ``` 120 | 121 | ### API v1 122 | 123 | The API consists of one function to handle encode requests, as well as some macros. More information can be found in [fadec-enc.h](fadec-enc.h). Usage of internals like enum values is not recommended. 124 | 125 | - `int fe_enc64(uint8_t** buf, uint64_t mnem, int64_t operands...)` 126 | - Encodes an instruction for x86-64 into `*buf`. EVEX-encoded instructions will transparently encode with the shorter VEX prefix where permitted. 127 | - Return value: `0` on success, a negative value in error cases. 128 | - `buf`: Pointer to the pointer to the instruction buffer. The pointer (`*buf`) will be advanced by the number of bytes written. The instruction buffer must have at least 15 bytes left. 129 | - `mnem`: Instruction mnemonic to encode combined with extra flags: 130 | - `FE_SEG(segreg)`: override segment to specified segment register. 131 | - `FE_ADDR32`: override address size to 32-bit. 132 | - `FE_JMPL`: use longest possible offset encoding, useful when jump target is not known. 133 | - `FE_MASK(maskreg)`: specify non-zero mask register (1--7) for instructions that support masking (suffixed with `_mask` or `_maskz`) or require a mask (AVX-512 gather/scatter). 134 | - `FE_RC_RN/RD/RU/RZ`: set rounding mode for instructions with static rounding control (suffixed `_er`). 135 | - `operands...`: Up to 4 instruction operands. The operand kinds must match the requirements of the mnemonic. 136 | - For register operands (`r`=non-mask register, `k`=mask register), use the register: `FE_AX`, `FE_AH`, `FE_XMM12`. 137 | - For immediate operands (`i`=regular, `a`=absolute address), use the constant: `12`, `-0xbeef`. 138 | - For memory operands (`m`=regular or `b`=broadcast), use: `FE_MEM(basereg,scale,indexreg,offset)`. Use `0` to specify _no register_. For RIP-relative addressing, the size of the instruction is added automatically. 139 | - For offset operands (`o`), specify the target address. 140 | 141 | ### API v2 142 | 143 | The API consists of one function per instruction, as well as some macros. The API provides type safety for different register types as well as for memory operands (regular vs. VSIB). Besides a few details listed here, the usage is very similar to API v1. More information can be found in [fadec-enc2.h](fadec-enc2.h). Usage of internals like enum values is not recommended. 144 | 145 | - `int fe64_(uint8_t* buf, int flags, )` 146 | - Encodes the specified instruction for x86-64 into `buf`. EVEX-encoded instructions will transparently encode with the shorter VEX prefix where permitted. 147 | - Return value: `0` on failure, otherwise the instruction length. 148 | - `buf`: Pointer to the instruction buffer. The instruction buffer must have at least 15 bytes left. Bytes beyond the returned instruction length can be overwritten. 149 | - `flags`: combination of extra flags, default to `0`: 150 | - `FE_SEG(segreg)`: override segment to specified segment register. 151 | - `FE_ADDR32`: override address size to 32-bit. 152 | - `FE_JMPL`: use longest possible offset encoding, useful when jump target is not known. 153 | - `FE_RC_RN/RD/RU/RZ`: set rounding mode for instructions with static rounding control (suffixed `_er`). 154 | - `FeRegMASK opmask` (instructions with opmask only): specify non-zero mask register (1--7) for instructions suffixed with `_mask`/`_maskz` and AVX-512 gather/scatter. 155 | - `operands...`: up to four instruction operands. 156 | - Registers have types `FeRegGP`/`FeRegXMM`/`FeRegMASK`/etc.; byte registers accepting high-byte operands also accept `FeRegGPH`. 157 | - Immediate operands have an appropriately sized integer type. 158 | - Memory operands use a `FeMem` (VSIB: `FeMemV`) structure, use the macro `FE_MEM(basereg,scale,indexreg,offset)` (VSIB: `FE_MEMV(...)`). Use `FE_NOREG` to specify _no register_. For RIP-relative addressing, the size of the instruction is added automatically. 159 | - For offset operands (`o`), specify the target address relative to `buf`. 160 | - `int fe64_NOP(uint8_t* buf, unsigned size)` 161 | - Encode a series of `nop`s of `size` bytes, but at least emit one byte. This will use larger the `nop` encodings to reduce the number of instructions and is intended for filling padding. 162 | 163 | ## Known issues 164 | - Decoder/Encoder: register uniqueness constraints are not enforced. This affects: 165 | - VSIB-encoded instructions: no vector register may be used more than once 166 | - AMX instructions: no tile register may be used more than once 167 | - AVX-512 complex FP16 multiplication: destination must be not be equal to a source register 168 | - Prefixes for indirect jumps and calls are not properly decoded, e.g. `notrack`, `bnd`. 169 | - Low test coverage. (Help needed.) 170 | - No Python API. 171 | 172 | Some ISA extensions are not supported, often because they are deprecated or unsupported by recent hardware. These are unlikely to be implemented in the near future: 173 | 174 | - (Intel) MPX: Intel lists MPX as deprecated. 175 | - (Intel) HLE prefixes `xacquire`/`xrelease`: Intel lists HLE as deprecated. The formatter for decoded instructions is able to reconstruct these in most cases, though. 176 | - (Intel) Xeon Phi (KNC/KNL/KNM) extensions, including the MVEX prefix: the hardware is discontinued/no longer available. 177 | - (AMD) XOP: unsupported by newer hardware. 178 | - (AMD) FMA4: unsupported by newer hardware. 179 | 180 | If you find any other issues, please report a bug. Or, even better, send a patch fixing the issue. 181 | -------------------------------------------------------------------------------- /decode.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | 9 | #ifdef __GNUC__ 10 | #define LIKELY(x) __builtin_expect((x), 1) 11 | #define UNLIKELY(x) __builtin_expect((x), 0) 12 | #define ASSUME(x) do { if (!(x)) __builtin_unreachable(); } while (0) 13 | #else 14 | #define LIKELY(x) (x) 15 | #define UNLIKELY(x) (x) 16 | #define ASSUME(x) ((void) 0) 17 | #endif 18 | 19 | // Defines FD_TABLE_OFFSET_32 and FD_TABLE_OFFSET_64, if available 20 | #define FD_DECODE_TABLE_DEFINES 21 | #include 22 | #undef FD_DECODE_TABLE_DEFINES 23 | 24 | enum DecodeMode { 25 | DECODE_64 = 0, 26 | DECODE_32 = 1, 27 | }; 28 | 29 | typedef enum DecodeMode DecodeMode; 30 | 31 | #define ENTRY_NONE 0 32 | #define ENTRY_INSTR 1 33 | #define ENTRY_TABLE256 2 34 | #define ENTRY_TABLE16 3 35 | #define ENTRY_TABLE8E 4 36 | #define ENTRY_TABLE_PREFIX 5 37 | #define ENTRY_TABLE_VEX 6 38 | #define ENTRY_TABLE_ROOT 8 39 | #define ENTRY_MASK 7 40 | 41 | static uint16_t 42 | table_lookup(unsigned cur_idx, unsigned entry_idx) { 43 | static _Alignas(16) const uint16_t _decode_table[] = { 44 | #define FD_DECODE_TABLE_DATA 45 | #include 46 | #undef FD_DECODE_TABLE_DATA 47 | }; 48 | return _decode_table[cur_idx + entry_idx]; 49 | } 50 | 51 | static unsigned 52 | table_walk(unsigned table_entry, unsigned entry_idx) { 53 | return table_lookup(table_entry & ~0x3, entry_idx); 54 | } 55 | 56 | #define LOAD_LE_1(buf) ((uint64_t) *(const uint8_t*) (buf)) 57 | #define LOAD_LE_2(buf) (LOAD_LE_1(buf) | LOAD_LE_1((const uint8_t*) (buf) + 1)<<8) 58 | #define LOAD_LE_3(buf) (LOAD_LE_2(buf) | LOAD_LE_1((const uint8_t*) (buf) + 2)<<16) 59 | #define LOAD_LE_4(buf) (LOAD_LE_2(buf) | LOAD_LE_2((const uint8_t*) (buf) + 2)<<16) 60 | #define LOAD_LE_8(buf) (LOAD_LE_4(buf) | LOAD_LE_4((const uint8_t*) (buf) + 4)<<32) 61 | 62 | enum 63 | { 64 | PREFIX_REXB = 0x01, 65 | PREFIX_REXX = 0x02, 66 | PREFIX_REXR = 0x04, 67 | PREFIX_REXW = 0x08, 68 | PREFIX_REX = 0x40, 69 | PREFIX_REXRR = 0x10, 70 | PREFIX_VEX = 0x20, 71 | }; 72 | 73 | struct InstrDesc 74 | { 75 | uint16_t type; 76 | uint16_t operand_indices; 77 | uint16_t operand_sizes; 78 | uint16_t reg_types; 79 | }; 80 | 81 | #define DESC_HAS_MODRM(desc) (((desc)->operand_indices & (3 << 0)) != 0) 82 | #define DESC_MODRM_IDX(desc) ((((desc)->operand_indices >> 0) & 3) ^ 3) 83 | #define DESC_HAS_MODREG(desc) (((desc)->operand_indices & (3 << 2)) != 0) 84 | #define DESC_MODREG_IDX(desc) ((((desc)->operand_indices >> 2) & 3) ^ 3) 85 | #define DESC_HAS_VEXREG(desc) (((desc)->operand_indices & (3 << 4)) != 0) 86 | #define DESC_VEXREG_IDX(desc) ((((desc)->operand_indices >> 4) & 3) ^ 3) 87 | #define DESC_IMM_CONTROL(desc) (((desc)->operand_indices >> 12) & 0x7) 88 | #define DESC_IMM_IDX(desc) ((((desc)->operand_indices >> 6) & 3) ^ 3) 89 | #define DESC_EVEX_BCST(desc) (((desc)->operand_indices >> 8) & 1) 90 | #define DESC_EVEX_MASK(desc) (((desc)->operand_indices >> 9) & 1) 91 | #define DESC_ZEROREG_VAL(desc) (((desc)->operand_indices >> 10) & 1) 92 | #define DESC_LOCK(desc) (((desc)->operand_indices >> 11) & 1) 93 | #define DESC_VSIB(desc) (((desc)->operand_indices >> 15) & 1) 94 | #define DESC_OPSIZE(desc) (((desc)->reg_types >> 11) & 7) 95 | #define DESC_MODRM_SIZE(desc) (((desc)->operand_sizes >> 0) & 3) 96 | #define DESC_MODREG_SIZE(desc) (((desc)->operand_sizes >> 2) & 3) 97 | #define DESC_VEXREG_SIZE(desc) (((desc)->operand_sizes >> 4) & 3) 98 | #define DESC_IMM_SIZE(desc) (((desc)->operand_sizes >> 6) & 3) 99 | #define DESC_LEGACY(desc) (((desc)->operand_sizes >> 8) & 1) 100 | #define DESC_SIZE_FIX1(desc) (((desc)->operand_sizes >> 10) & 7) 101 | #define DESC_SIZE_FIX2(desc) (((desc)->operand_sizes >> 13) & 3) 102 | #define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1) 103 | #define DESC_MODRM(desc) (((desc)->reg_types >> 14) & 1) 104 | #define DESC_IGN66(desc) (((desc)->reg_types >> 15) & 1) 105 | #define DESC_EVEX_SAE(desc) (((desc)->reg_types >> 8) & 1) 106 | #define DESC_EVEX_ER(desc) (((desc)->reg_types >> 9) & 1) 107 | #define DESC_EVEX_BCST16(desc) (((desc)->reg_types >> 10) & 1) 108 | #define DESC_REGTY_MODRM(desc) (((desc)->reg_types >> 0) & 7) 109 | #define DESC_REGTY_MODREG(desc) (((desc)->reg_types >> 3) & 7) 110 | #define DESC_REGTY_VEXREG(desc) (((desc)->reg_types >> 6) & 3) 111 | 112 | int 113 | fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address, 114 | FdInstr* instr) 115 | { 116 | int len = len_sz > 15 ? 15 : len_sz; 117 | 118 | // Ensure that we can actually handle the decode request 119 | DecodeMode mode; 120 | unsigned table_root_idx; 121 | switch (mode_int) 122 | { 123 | #if defined(FD_TABLE_OFFSET_32) 124 | case 32: table_root_idx = FD_TABLE_OFFSET_32; mode = DECODE_32; break; 125 | #endif 126 | #if defined(FD_TABLE_OFFSET_64) 127 | case 64: table_root_idx = FD_TABLE_OFFSET_64; mode = DECODE_64; break; 128 | #endif 129 | default: return FD_ERR_INTERNAL; 130 | } 131 | 132 | int off = 0; 133 | uint8_t vex_operand = 0; 134 | 135 | uint8_t addr_size = mode == DECODE_64 ? 3 : 2; 136 | unsigned prefix_rex = 0; 137 | uint8_t prefix_rep = 0; 138 | unsigned vexl = 0; 139 | unsigned prefix_evex = 0; 140 | instr->segment = FD_REG_NONE; 141 | 142 | // Values must match prefixes in parseinstrs.py. 143 | enum { 144 | PF_SEG1 = 0xfff8 - 0xfff8, 145 | PF_SEG2 = 0xfff9 - 0xfff8, 146 | PF_66 = 0xfffa - 0xfff8, 147 | PF_67 = 0xfffb - 0xfff8, 148 | PF_LOCK = 0xfffc - 0xfff8, 149 | PF_REP = 0xfffd - 0xfff8, 150 | PF_REX = 0xfffe - 0xfff8, 151 | }; 152 | 153 | uint8_t prefixes[8] = {0}; 154 | unsigned table_entry = 0; 155 | while (true) { 156 | if (UNLIKELY(off >= len)) 157 | return FD_ERR_PARTIAL; 158 | uint8_t prefix = buffer[off]; 159 | table_entry = table_lookup(table_root_idx, prefix); 160 | if (LIKELY(table_entry - 0xfff8 >= 8)) 161 | break; 162 | prefixes[PF_REX] = 0; 163 | prefixes[table_entry - 0xfff8] = prefix; 164 | off++; 165 | } 166 | if (off) { 167 | if (UNLIKELY(prefixes[PF_SEG2])) { 168 | if (prefixes[PF_SEG2] & 0x02) 169 | instr->segment = prefixes[PF_SEG2] >> 3 & 3; 170 | else 171 | instr->segment = prefixes[PF_SEG2] & 7; 172 | } 173 | if (UNLIKELY(prefixes[PF_67])) 174 | addr_size--; 175 | prefix_rex = prefixes[PF_REX]; 176 | prefix_rep = prefixes[PF_REP]; 177 | } 178 | 179 | // table_entry kinds: INSTR(0), T16(1), ESCAPE_A(2), ESCAPE_B(3) 180 | if (LIKELY(!(table_entry & 2))) { 181 | off++; 182 | 183 | // Then, walk through ModR/M-encoded opcode extensions. 184 | if (table_entry & 1) { 185 | if (UNLIKELY(off >= len)) 186 | return FD_ERR_PARTIAL; 187 | unsigned isreg = buffer[off] >= 0xc0; 188 | table_entry = table_walk(table_entry, ((buffer[off] >> 2) & 0xe) | isreg); 189 | // table_entry kinds: INSTR(0), T8E(1) 190 | if (table_entry & 1) 191 | table_entry = table_walk(table_entry, buffer[off] & 7); 192 | } 193 | 194 | // table_entry kinds: INSTR(0) 195 | goto direct; 196 | } 197 | 198 | if (UNLIKELY(off >= len)) 199 | return FD_ERR_PARTIAL; 200 | 201 | unsigned opcode_escape = 0; 202 | uint8_t mandatory_prefix = 0; // without escape/VEX/EVEX, this is ignored. 203 | if (buffer[off] == 0x0f) 204 | { 205 | if (UNLIKELY(off + 1 >= len)) 206 | return FD_ERR_PARTIAL; 207 | if (buffer[off + 1] == 0x38) 208 | opcode_escape = 2; 209 | else if (buffer[off + 1] == 0x3a) 210 | opcode_escape = 3; 211 | else 212 | opcode_escape = 1; 213 | off += opcode_escape >= 2 ? 2 : 1; 214 | 215 | // If there is no REP/REPNZ prefix offer 66h as mandatory prefix. If 216 | // there is a REP prefix, then the 66h prefix is ignored here. 217 | mandatory_prefix = prefix_rep ? prefix_rep ^ 0xf1 : !!prefixes[PF_66]; 218 | } 219 | else if (UNLIKELY((unsigned) buffer[off] - 0xc4 < 2 || buffer[off] == 0x62)) 220 | { 221 | unsigned vex_prefix = buffer[off]; 222 | // VEX (C4/C5) or EVEX (62) 223 | if (UNLIKELY(off + 1 >= len)) 224 | return FD_ERR_PARTIAL; 225 | if (UNLIKELY(mode == DECODE_32 && buffer[off + 1] < 0xc0)) { 226 | off++; 227 | table_entry = table_walk(table_entry, 0); 228 | // table_entry kinds: INSTR(0) 229 | goto direct; 230 | } 231 | 232 | // VEX/EVEX + 66/F3/F2/REX will #UD. 233 | // Note: REX is also here only respected if it immediately precedes the 234 | // opcode, in this case the VEX/EVEX "prefix". 235 | if (prefixes[PF_66] || prefixes[PF_REP] || prefix_rex) 236 | return FD_ERR_UD; 237 | 238 | uint8_t byte = buffer[off + 1]; 239 | if (vex_prefix == 0xc5) // 2-byte VEX 240 | { 241 | opcode_escape = 1; 242 | prefix_rex = byte & 0x80 ? 0 : PREFIX_REXR; 243 | } 244 | else // 3-byte VEX or EVEX 245 | { 246 | // SDM Vol 2A 2-15 (Dec. 2016): Ignored in 32-bit mode 247 | if (mode == DECODE_64) 248 | prefix_rex = byte >> 5 ^ 0x7; 249 | if (vex_prefix == 0x62) // EVEX 250 | { 251 | if (byte & 0x08) // Bit 3 of opcode_escape must be clear. 252 | return FD_ERR_UD; 253 | _Static_assert(PREFIX_REXRR == 0x10, "wrong REXRR value"); 254 | if (mode == DECODE_64) 255 | prefix_rex |= (byte & PREFIX_REXRR) ^ PREFIX_REXRR; 256 | } 257 | else // 3-byte VEX 258 | { 259 | if (byte & 0x18) // Bits 4:3 of opcode_escape must be clear. 260 | return FD_ERR_UD; 261 | } 262 | 263 | opcode_escape = (byte & 0x07); 264 | if (UNLIKELY(opcode_escape == 0)) { 265 | int prefix_len = vex_prefix == 0x62 ? 4 : 3; 266 | // Pretend to decode the prefix plus one opcode byte. 267 | return off + prefix_len > len ? FD_ERR_PARTIAL : FD_ERR_UD; 268 | } 269 | 270 | // Load third byte of VEX prefix 271 | if (UNLIKELY(off + 2 >= len)) 272 | return FD_ERR_PARTIAL; 273 | byte = buffer[off + 2]; 274 | prefix_rex |= byte & 0x80 ? PREFIX_REXW : 0; 275 | } 276 | 277 | mandatory_prefix = byte & 3; 278 | vex_operand = ((byte & 0x78) >> 3) ^ 0xf; 279 | prefix_rex |= PREFIX_VEX; 280 | 281 | if (vex_prefix == 0x62) // EVEX 282 | { 283 | if (!(byte & 0x04)) // Bit 10 must be 1. 284 | return FD_ERR_UD; 285 | if (UNLIKELY(off + 3 >= len)) 286 | return FD_ERR_PARTIAL; 287 | byte = buffer[off + 3]; 288 | // prefix_evex is z:L'L/RC:b:V':aaa 289 | vexl = (byte >> 5) & 3; 290 | prefix_evex = byte | 0x100; // Ensure that prefix_evex is non-zero. 291 | if (mode == DECODE_64) // V' causes UD in 32-bit mode 292 | vex_operand |= byte & 0x08 ? 0 : 0x10; // V' 293 | else if (!(byte & 0x08)) 294 | return FD_ERR_UD; 295 | off += 4; 296 | } 297 | else // VEX 298 | { 299 | vexl = byte & 0x04 ? 1 : 0; 300 | off += 0xc7 - vex_prefix; // 3 for c4, 2 for c5 301 | } 302 | } 303 | 304 | table_entry = table_walk(table_entry, opcode_escape); 305 | // table_entry kinds: INSTR(0) [only for invalid], T256(2) 306 | if (UNLIKELY(!table_entry)) 307 | return FD_ERR_UD; 308 | if (UNLIKELY(off >= len)) 309 | return FD_ERR_PARTIAL; 310 | table_entry = table_walk(table_entry, buffer[off++]); 311 | // table_entry kinds: INSTR(0), T16(1), TVEX(2), TPREFIX(3) 312 | 313 | // Handle mandatory prefixes (which behave like an opcode ext.). 314 | if ((table_entry & 3) == 3) 315 | table_entry = table_walk(table_entry, mandatory_prefix); 316 | // table_entry kinds: INSTR(0), T16(1), TVEX(2) 317 | 318 | // Then, walk through ModR/M-encoded opcode extensions. 319 | if (table_entry & 1) { 320 | if (UNLIKELY(off >= len)) 321 | return FD_ERR_PARTIAL; 322 | unsigned isreg = buffer[off] >= 0xc0; 323 | table_entry = table_walk(table_entry, ((buffer[off] >> 2) & 0xe) | isreg); 324 | // table_entry kinds: INSTR(0), T8E(1), TVEX(2) 325 | if (table_entry & 1) 326 | table_entry = table_walk(table_entry, buffer[off] & 7); 327 | } 328 | // table_entry kinds: INSTR(0), TVEX(2) 329 | 330 | // For VEX prefix, we have to distinguish between VEX.W and VEX.L which may 331 | // be part of the opcode. 332 | if (UNLIKELY(table_entry & 2)) 333 | { 334 | uint8_t index = 0; 335 | index |= prefix_rex & PREFIX_REXW ? (1 << 0) : 0; 336 | // When EVEX.L'L is the rounding mode, the instruction must not have 337 | // L'L constraints. 338 | index |= vexl << 1; 339 | table_entry = table_walk(table_entry, index); 340 | } 341 | // table_entry kinds: INSTR(0) 342 | 343 | direct: 344 | // table_entry kinds: INSTR(0) 345 | if (UNLIKELY(!table_entry)) 346 | return FD_ERR_UD; 347 | 348 | static _Alignas(16) const struct InstrDesc descs[] = { 349 | #define FD_DECODE_TABLE_DESCS 350 | #include 351 | #undef FD_DECODE_TABLE_DESCS 352 | }; 353 | const struct InstrDesc* desc = &descs[table_entry >> 2]; 354 | 355 | instr->type = desc->type; 356 | instr->addrsz = addr_size; 357 | instr->flags = ((prefix_rep + 1) & 6) + (mode == DECODE_64 ? FD_FLAG_64 : 0); 358 | instr->address = address; 359 | 360 | for (unsigned i = 0; i < sizeof(instr->operands) / sizeof(FdOp); i++) 361 | instr->operands[i] = (FdOp) {0}; 362 | 363 | if (DESC_MODRM(desc) && UNLIKELY(off++ >= len)) 364 | return FD_ERR_PARTIAL; 365 | unsigned op_byte = buffer[off - 1] | (!DESC_MODRM(desc) ? 0xc0 : 0); 366 | 367 | if (UNLIKELY(prefix_evex)) { 368 | // VSIB inst (gather/scatter) without mask register or w/EVEX.z is UD 369 | if (DESC_VSIB(desc) && (!(prefix_evex & 0x07) || (prefix_evex & 0x80))) 370 | return FD_ERR_UD; 371 | // Inst doesn't support masking, so EVEX.z or EVEX.aaa is UD 372 | if (!DESC_EVEX_MASK(desc) && (prefix_evex & 0x87)) 373 | return FD_ERR_UD; 374 | // EVEX.z without EVEX.aaa is UD. The Intel SDM is rather unprecise 375 | // about this, but real hardware doesn't accept this. 376 | if ((prefix_evex & 0x87) == 0x80) 377 | return FD_ERR_UD; 378 | 379 | // Cases for SAE/RC (reg operands only): 380 | // - ER supported -> all ok 381 | // - SAE supported -> assume L'L is RC, but ignored (undocumented) 382 | // - Neither supported -> b == 0 383 | if ((prefix_evex & 0x10) && (op_byte & 0xc0) == 0xc0) { // EVEX.b+reg 384 | if (!DESC_EVEX_SAE(desc)) 385 | return FD_ERR_UD; 386 | vexl = 2; 387 | if (DESC_EVEX_ER(desc)) 388 | instr->evex = prefix_evex; 389 | else 390 | instr->evex = (prefix_evex & 0x87) | 0x60; // set RC, clear B 391 | } else { 392 | if (UNLIKELY(vexl == 3)) // EVEX.L'L == 11b is UD 393 | return FD_ERR_UD; 394 | instr->evex = prefix_evex & 0x87; // clear RC, clear B 395 | } 396 | 397 | if (DESC_VSIB(desc)) 398 | vex_operand &= 0xf; // EVEX.V' is used as index extension instead. 399 | } else { 400 | instr->evex = 0; 401 | } 402 | 403 | unsigned op_size; 404 | unsigned op_size_alt = 0; 405 | if (!(DESC_OPSIZE(desc) & 4)) { 406 | if (mode == DECODE_64) 407 | op_size = ((prefix_rex & PREFIX_REXW) || DESC_OPSIZE(desc) == 3) ? 4 : 408 | UNLIKELY(prefixes[PF_66] && !DESC_IGN66(desc)) ? 2 : 409 | DESC_OPSIZE(desc) ? 4 : 410 | 3; 411 | else 412 | op_size = UNLIKELY(prefixes[PF_66] && !DESC_IGN66(desc)) ? 2 : 3; 413 | } else { 414 | op_size = 5 + vexl; 415 | op_size_alt = op_size - (DESC_OPSIZE(desc) & 3); 416 | } 417 | 418 | uint8_t operand_sizes[4] = { 419 | DESC_SIZE_FIX1(desc), DESC_SIZE_FIX2(desc) + 1, op_size, op_size_alt 420 | }; 421 | 422 | if (UNLIKELY(instr->type == FDI_MOV_CR || instr->type == FDI_MOV_DR)) { 423 | unsigned modreg = (op_byte >> 3) & 0x7; 424 | unsigned modrm = op_byte & 0x7; 425 | 426 | FdOp* op_modreg = &instr->operands[DESC_MODREG_IDX(desc)]; 427 | op_modreg->type = FD_OT_REG; 428 | op_modreg->size = op_size; 429 | op_modreg->reg = modreg | (prefix_rex & PREFIX_REXR ? 8 : 0); 430 | op_modreg->misc = instr->type == FDI_MOV_CR ? FD_RT_CR : FD_RT_DR; 431 | if (instr->type == FDI_MOV_CR && (~0x011d >> op_modreg->reg) & 1) 432 | return FD_ERR_UD; 433 | else if (instr->type == FDI_MOV_DR && prefix_rex & PREFIX_REXR) 434 | return FD_ERR_UD; 435 | 436 | FdOp* op_modrm = &instr->operands[DESC_MODRM_IDX(desc)]; 437 | op_modrm->type = FD_OT_REG; 438 | op_modrm->size = op_size; 439 | op_modrm->reg = modrm | (prefix_rex & PREFIX_REXB ? 8 : 0); 440 | op_modrm->misc = FD_RT_GPL; 441 | goto skip_modrm; 442 | } 443 | 444 | if (DESC_HAS_MODREG(desc)) 445 | { 446 | FdOp* op_modreg = &instr->operands[DESC_MODREG_IDX(desc)]; 447 | unsigned reg_idx = (op_byte & 0x38) >> 3; 448 | unsigned reg_ty = DESC_REGTY_MODREG(desc); 449 | op_modreg->misc = reg_ty; 450 | if (LIKELY(reg_ty < 2)) 451 | reg_idx += prefix_rex & PREFIX_REXR ? 8 : 0; 452 | else if (reg_ty == 7 && (prefix_rex & PREFIX_REXR || prefix_evex & 0x80)) 453 | return FD_ERR_UD; // REXR in 64-bit mode or EVEX.z with mask as dest 454 | if (UNLIKELY(reg_ty == FD_RT_VEC)) // REXRR ignored above in 32-bit mode 455 | reg_idx += prefix_rex & PREFIX_REXRR ? 16 : 0; 456 | else if (UNLIKELY(prefix_rex & PREFIX_REXRR)) 457 | return FD_ERR_UD; 458 | op_modreg->type = FD_OT_REG; 459 | op_modreg->size = operand_sizes[DESC_MODREG_SIZE(desc)]; 460 | op_modreg->reg = reg_idx; 461 | } 462 | 463 | if (DESC_HAS_MODRM(desc)) 464 | { 465 | FdOp* op_modrm = &instr->operands[DESC_MODRM_IDX(desc)]; 466 | op_modrm->size = operand_sizes[DESC_MODRM_SIZE(desc)]; 467 | 468 | unsigned rm = op_byte & 0x07; 469 | if (op_byte >= 0xc0) 470 | { 471 | uint8_t reg_idx = rm; 472 | unsigned reg_ty = DESC_REGTY_MODRM(desc); 473 | op_modrm->misc = reg_ty; 474 | if (LIKELY(reg_ty < 2)) 475 | reg_idx += prefix_rex & PREFIX_REXB ? 8 : 0; 476 | if (prefix_evex && reg_ty == 0) // vector registers only 477 | reg_idx += prefix_rex & PREFIX_REXX ? 16 : 0; 478 | op_modrm->type = FD_OT_REG; 479 | op_modrm->reg = reg_idx; 480 | } 481 | else 482 | { 483 | unsigned dispscale = 0; 484 | 485 | if (UNLIKELY(prefix_evex)) { 486 | // EVEX.z for memory destination operand is UD. 487 | if (UNLIKELY(prefix_evex & 0x80) && DESC_MODRM_IDX(desc) == 0) 488 | return FD_ERR_UD; 489 | 490 | // EVEX.b for memory-operand without broadcast support is UD. 491 | if (UNLIKELY(prefix_evex & 0x10)) { 492 | if (UNLIKELY(!DESC_EVEX_BCST(desc))) 493 | return FD_ERR_UD; 494 | if (UNLIKELY(DESC_EVEX_BCST16(desc))) 495 | dispscale = 1; 496 | else 497 | dispscale = prefix_rex & PREFIX_REXW ? 3 : 2; 498 | instr->segment |= dispscale << 6; // Store broadcast size 499 | op_modrm->type = FD_OT_MEMBCST; 500 | } else { 501 | dispscale = op_modrm->size - 1; 502 | op_modrm->type = FD_OT_MEM; 503 | } 504 | } else { 505 | op_modrm->type = FD_OT_MEM; 506 | } 507 | 508 | // 16-bit address size implies different ModRM encoding 509 | if (UNLIKELY(addr_size == 1)) { 510 | ASSUME(mode == DECODE_32); 511 | if (UNLIKELY(DESC_VSIB(desc))) // 16-bit addr size + VSIB is UD 512 | return FD_ERR_UD; 513 | if (rm < 6) 514 | op_modrm->misc = rm & 1 ? FD_REG_DI : FD_REG_SI; 515 | else 516 | op_modrm->misc = FD_REG_NONE; 517 | 518 | if (rm < 4) 519 | op_modrm->reg = rm & 2 ? FD_REG_BP : FD_REG_BX; 520 | else if (rm < 6 || (op_byte & 0xc7) == 0x06) 521 | op_modrm->reg = FD_REG_NONE; 522 | else 523 | op_modrm->reg = rm == 6 ? FD_REG_BP : FD_REG_BX; 524 | 525 | const uint8_t* dispbase = &buffer[off]; 526 | if (op_byte & 0x40) { 527 | if (UNLIKELY((off += 1) > len)) 528 | return FD_ERR_PARTIAL; 529 | instr->disp = (int8_t) LOAD_LE_1(dispbase) * (1 << dispscale); 530 | } else if (op_byte & 0x80 || (op_byte & 0xc7) == 0x06) { 531 | if (UNLIKELY((off += 2) > len)) 532 | return FD_ERR_PARTIAL; 533 | instr->disp = (int16_t) LOAD_LE_2(dispbase); 534 | } else { 535 | instr->disp = 0; 536 | } 537 | goto end_modrm; 538 | } 539 | 540 | // SIB byte 541 | uint8_t base = rm; 542 | if (rm == 4) { 543 | if (UNLIKELY(off >= len)) 544 | return FD_ERR_PARTIAL; 545 | uint8_t sib = buffer[off++]; 546 | unsigned scale = sib & 0xc0; 547 | unsigned idx = (sib & 0x38) >> 3; 548 | idx += prefix_rex & PREFIX_REXX ? 8 : 0; 549 | base = sib & 0x07; 550 | if (idx == 4) 551 | idx = FD_REG_NONE; 552 | op_modrm->misc = scale | idx; 553 | } else { 554 | op_modrm->misc = FD_REG_NONE; 555 | } 556 | 557 | if (UNLIKELY(DESC_VSIB(desc))) { 558 | // VSIB must have a memory operand with SIB byte. 559 | if (rm != 4) 560 | return FD_ERR_UD; 561 | _Static_assert(FD_REG_NONE == 0x3f, "unexpected FD_REG_NONE"); 562 | // idx 4 is valid for VSIB 563 | if ((op_modrm->misc & 0x3f) == FD_REG_NONE) 564 | op_modrm->misc &= 0xc4; 565 | if (prefix_evex) // EVEX.V':EVEX.X:SIB.idx 566 | op_modrm->misc |= prefix_evex & 0x8 ? 0 : 0x10; 567 | } 568 | 569 | // RIP-relative addressing only if SIB-byte is absent 570 | if (op_byte < 0x40 && rm == 5 && mode == DECODE_64) 571 | op_modrm->reg = FD_REG_IP; 572 | else if (op_byte < 0x40 && base == 5) 573 | op_modrm->reg = FD_REG_NONE; 574 | else 575 | op_modrm->reg = base + (prefix_rex & PREFIX_REXB ? 8 : 0); 576 | 577 | const uint8_t* dispbase = &buffer[off]; 578 | if (op_byte & 0x40) { 579 | if (UNLIKELY((off += 1) > len)) 580 | return FD_ERR_PARTIAL; 581 | instr->disp = (int8_t) LOAD_LE_1(dispbase) * (1 << dispscale); 582 | } else if (op_byte & 0x80 || (op_byte < 0x40 && base == 5)) { 583 | if (UNLIKELY((off += 4) > len)) 584 | return FD_ERR_PARTIAL; 585 | instr->disp = (int32_t) LOAD_LE_4(dispbase); 586 | } else { 587 | instr->disp = 0; 588 | } 589 | end_modrm:; 590 | } 591 | } 592 | 593 | if (UNLIKELY(DESC_HAS_VEXREG(desc))) 594 | { 595 | FdOp* operand = &instr->operands[DESC_VEXREG_IDX(desc)]; 596 | if (DESC_ZEROREG_VAL(desc)) { 597 | operand->type = FD_OT_REG; 598 | operand->size = 1; 599 | operand->reg = FD_REG_CL; 600 | operand->misc = FD_RT_GPL; 601 | } else { 602 | operand->type = FD_OT_REG; 603 | // Without VEX prefix, this encodes an implicit register 604 | operand->size = operand_sizes[DESC_VEXREG_SIZE(desc)]; 605 | if (mode == DECODE_32) 606 | vex_operand &= 0x7; 607 | // Note: 32-bit will never UD here. EVEX.V' is caught above already. 608 | // Note: UD if > 16 for non-VEC. No EVEX-encoded instruction uses 609 | // EVEX.vvvv to refer to non-vector registers. Verified in parseinstrs. 610 | operand->reg = vex_operand; 611 | 612 | unsigned reg_ty = DESC_REGTY_VEXREG(desc); // VEC GPL MSK FPU/TMM 613 | if (prefix_rex & PREFIX_VEX) { // TMM with VEX, FPU otherwise 614 | // In 64-bit mode: UD if FD_RT_MASK and vex_operand&8 != 0 615 | if (reg_ty == 2 && vex_operand >= 8) 616 | return FD_ERR_UD; 617 | if (UNLIKELY(reg_ty == 3)) // TMM 618 | operand->reg &= 0x7; // TODO: verify 619 | operand->misc = (06710 >> (3 * reg_ty)) & 0x7; 620 | } else { 621 | operand->misc = (04710 >> (3 * reg_ty)) & 0x7; 622 | } 623 | } 624 | } 625 | else if (vex_operand != 0) 626 | { 627 | // TODO: bit 3 ignored in 32-bit mode? unverified 628 | return FD_ERR_UD; 629 | } 630 | 631 | uint32_t imm_control = UNLIKELY(DESC_IMM_CONTROL(desc)); 632 | if (LIKELY(!imm_control)) { 633 | } else if (UNLIKELY(imm_control == 1)) 634 | { 635 | // 1 = immediate constant 1, used for shifts 636 | FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)]; 637 | operand->type = FD_OT_IMM; 638 | operand->size = 1; 639 | instr->imm = 1; 640 | } 641 | else if (UNLIKELY(imm_control == 2)) 642 | { 643 | // 2 = memory, address-sized, used for mov with moffs operand 644 | FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)]; 645 | operand->type = FD_OT_MEM; 646 | operand->size = operand_sizes[DESC_IMM_SIZE(desc)]; 647 | operand->reg = FD_REG_NONE; 648 | operand->misc = FD_REG_NONE; 649 | 650 | int moffsz = 1 << addr_size; 651 | if (UNLIKELY(off + moffsz > len)) 652 | return FD_ERR_PARTIAL; 653 | if (moffsz == 2) 654 | instr->disp = LOAD_LE_2(&buffer[off]); 655 | if (moffsz == 4) 656 | instr->disp = LOAD_LE_4(&buffer[off]); 657 | if (LIKELY(moffsz == 8)) 658 | instr->disp = LOAD_LE_8(&buffer[off]); 659 | off += moffsz; 660 | } 661 | else if (UNLIKELY(imm_control == 3)) 662 | { 663 | // 3 = register in imm8[7:4], used for RVMR encoding with VBLENDVP[SD] 664 | FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)]; 665 | operand->type = FD_OT_REG; 666 | operand->size = op_size; 667 | operand->misc = FD_RT_VEC; 668 | 669 | if (UNLIKELY(off + 1 > len)) 670 | return FD_ERR_PARTIAL; 671 | uint8_t reg = (uint8_t) LOAD_LE_1(&buffer[off]); 672 | off += 1; 673 | 674 | if (mode == DECODE_32) 675 | reg &= 0x7f; 676 | operand->reg = reg >> 4; 677 | instr->imm = reg & 0x0f; 678 | } 679 | else if (imm_control != 0) 680 | { 681 | // 4/5 = immediate, operand-sized/8 bit 682 | // 6/7 = offset, operand-sized/8 bit (used for jumps/calls) 683 | int imm_byte = imm_control & 1; 684 | int imm_offset = imm_control & 2; 685 | 686 | FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)]; 687 | operand->type = FD_OT_IMM; 688 | 689 | if (imm_byte) { 690 | if (UNLIKELY(off + 1 > len)) 691 | return FD_ERR_PARTIAL; 692 | instr->imm = (int8_t) LOAD_LE_1(&buffer[off++]); 693 | operand->size = DESC_IMM_SIZE(desc) & 1 ? 1 : op_size; 694 | } else { 695 | operand->size = operand_sizes[DESC_IMM_SIZE(desc)]; 696 | 697 | uint8_t imm_size; 698 | if (UNLIKELY(instr->type == FDI_RET || instr->type == FDI_RETF || 699 | instr->type == FDI_SSE_EXTRQ || 700 | instr->type == FDI_SSE_INSERTQ)) 701 | imm_size = 2; 702 | else if (UNLIKELY(instr->type == FDI_JMPF || instr->type == FDI_CALLF)) 703 | imm_size = (1 << op_size >> 1) + 2; 704 | else if (UNLIKELY(instr->type == FDI_ENTER)) 705 | imm_size = 3; 706 | else if (instr->type == FDI_MOVABS) 707 | imm_size = (1 << op_size >> 1); 708 | else 709 | imm_size = op_size == 2 ? 2 : 4; 710 | 711 | if (UNLIKELY(off + imm_size > len)) 712 | return FD_ERR_PARTIAL; 713 | 714 | if (imm_size == 2) 715 | instr->imm = (int16_t) LOAD_LE_2(&buffer[off]); 716 | else if (imm_size == 3) 717 | instr->imm = LOAD_LE_3(&buffer[off]); 718 | else if (imm_size == 4) 719 | instr->imm = (int32_t) LOAD_LE_4(&buffer[off]); 720 | else if (imm_size == 6) 721 | instr->imm = LOAD_LE_4(&buffer[off]) | LOAD_LE_2(&buffer[off+4]) << 32; 722 | else if (imm_size == 8) 723 | instr->imm = (int64_t) LOAD_LE_8(&buffer[off]); 724 | off += imm_size; 725 | } 726 | 727 | if (imm_offset) 728 | { 729 | if (instr->address != 0) 730 | instr->imm += instr->address + off; 731 | else 732 | operand->type = FD_OT_OFF; 733 | } 734 | } 735 | 736 | skip_modrm: 737 | if (UNLIKELY(prefixes[PF_LOCK])) { 738 | if (!DESC_LOCK(desc) || instr->operands[0].type != FD_OT_MEM) 739 | return FD_ERR_UD; 740 | instr->flags |= FD_FLAG_LOCK; 741 | } 742 | 743 | if (UNLIKELY(DESC_LEGACY(desc))) { 744 | // Without REX prefix, convert one-byte GP regs to high-byte regs 745 | // This actually only applies to SZ8/MOVSX/MOVZX; but no VEX-encoded 746 | // instructions have a byte-sized GP register in the first two operands. 747 | if (!(prefix_rex & PREFIX_REX)) { 748 | for (int i = 0; i < 2; i++) { 749 | FdOp* operand = &instr->operands[i]; 750 | if (operand->type == FD_OT_NONE) 751 | break; 752 | if (operand->type == FD_OT_REG && operand->misc == FD_RT_GPL && 753 | operand->size == 1 && operand->reg >= 4) 754 | operand->misc = FD_RT_GPH; 755 | } 756 | } 757 | 758 | if (instr->type == FDI_XCHG_NOP) { 759 | // Only 4890, 90, and 6690 are true NOPs. 760 | if (instr->operands[0].reg == 0) { 761 | instr->operands[0].type = FD_OT_NONE; 762 | instr->operands[1].type = FD_OT_NONE; 763 | instr->type = FD_HAS_REP(instr) ? FDI_PAUSE : FDI_NOP; 764 | } else if ((instr->operands[0].reg & 7) == 0 && FD_HAS_REP(instr)) { 765 | // On Intel, REX.B is ignored for F3.90. 766 | instr->operands[0].type = FD_OT_NONE; 767 | instr->operands[1].type = FD_OT_NONE; 768 | instr->type = FDI_PAUSE; 769 | } else { 770 | instr->type = FDI_XCHG; 771 | } 772 | } 773 | 774 | if (UNLIKELY(instr->type == FDI_3DNOW)) { 775 | unsigned opc3dn = instr->imm; 776 | if (opc3dn & 0x40) 777 | return FD_ERR_UD; 778 | uint64_t msk = opc3dn & 0x80 ? 0x88d144d144d14400 : 0x30003000; 779 | if (!(msk >> (opc3dn & 0x3f) & 1)) 780 | return FD_ERR_UD; 781 | } 782 | 783 | instr->operandsz = UNLIKELY(DESC_INSTR_WIDTH(desc)) ? op_size - 1 : 0; 784 | } else { 785 | instr->operandsz = 0; 786 | } 787 | 788 | instr->size = off; 789 | 790 | return off; 791 | } 792 | -------------------------------------------------------------------------------- /encode-test.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | 10 | static 11 | void 12 | print_hex(const uint8_t* buf, size_t len) 13 | { 14 | for (size_t i = 0; i < len; i++) 15 | printf("%02x", buf[i]); 16 | } 17 | 18 | static 19 | int 20 | test(uint8_t* buf, const char* name, uint64_t mnem, uint64_t op0, uint64_t op1, uint64_t op2, uint64_t op3, const void* exp, size_t exp_len) 21 | { 22 | memset(buf, 0, 16); 23 | 24 | uint8_t* inst = buf; 25 | int res = fe_enc64(&inst, mnem, op0, op1, op2, op3); 26 | if ((res != 0) != (exp_len == 0)) goto fail; 27 | if (inst - buf != (ptrdiff_t) exp_len) goto fail; 28 | if (memcmp(buf, exp, exp_len)) goto fail; 29 | 30 | return 0; 31 | 32 | fail: 33 | printf("Failed case %s:\n", name); 34 | printf(" Exp (%2zu): ", exp_len); 35 | print_hex(exp, exp_len); 36 | printf("\n Got (%2zd): ", inst - buf); 37 | print_hex(buf, inst - buf); 38 | printf("\n"); 39 | return -1; 40 | } 41 | 42 | #define TEST2(str, exp, exp_len, mnem, flags, op0, op1, op2, op3, ...) test(buf, str, FE_ ## mnem|flags, op0, op1, op2, op3, exp, exp_len) 43 | #define TEST1(str, exp, ...) TEST2(str, exp, sizeof(exp)-1, __VA_ARGS__, 0, 0, 0, 0, 0) 44 | #define TEST(exp, ...) failed |= TEST1(#__VA_ARGS__, exp, __VA_ARGS__) 45 | 46 | int 47 | main(int argc, char** argv) 48 | { 49 | (void) argc; (void) argv; 50 | 51 | int failed = 0; 52 | uint8_t buf[16]; 53 | 54 | // VSIB encoding doesn't differ for this API 55 | #define FE_MEMV FE_MEM 56 | #define FE_PTR(off) ((intptr_t) buf + (off)) 57 | #define FLAGMASK(flags, mask) (flags | FE_MASK(mask & 7)) 58 | #include "encode-test.inc" 59 | 60 | puts(failed ? "Some tests FAILED" : "All tests PASSED"); 61 | return failed ? EXIT_FAILURE : EXIT_SUCCESS; 62 | } 63 | -------------------------------------------------------------------------------- /encode.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | 9 | #ifdef __GNUC__ 10 | #define LIKELY(x) __builtin_expect((x), 1) 11 | #define UNLIKELY(x) __builtin_expect((x), 0) 12 | #else 13 | #define LIKELY(x) (x) 14 | #define UNLIKELY(x) (x) 15 | #endif 16 | 17 | #define OPC_66 0x80000 18 | #define OPC_F2 0x100000 19 | #define OPC_F3 0x200000 20 | #define OPC_REXW 0x400000 21 | #define OPC_LOCK 0x800000 22 | #define OPC_VEXL0 0x1000000 23 | #define OPC_VEXL1 0x1800000 24 | #define OPC_EVEXL0 0x2000000 25 | #define OPC_EVEXL1 0x2800000 26 | #define OPC_EVEXL2 0x3000000 27 | #define OPC_EVEXL3 0x3800000 28 | #define OPC_EVEXB 0x4000000 29 | #define OPC_VSIB 0x8000000 30 | #define OPC_67 FE_ADDR32 31 | #define OPC_SEG_MSK 0xe0000000 32 | #define OPC_JMPL FE_JMPL 33 | #define OPC_MASK_MSK 0xe00000000 34 | #define OPC_EVEXZ 0x1000000000 35 | #define OPC_USER_MSK (OPC_67|OPC_SEG_MSK|OPC_MASK_MSK) 36 | #define OPC_FORCE_SIB 0x2000000000 37 | #define OPC_DOWNGRADE_VEX 0x4000000000 38 | #define OPC_DOWNGRADE_VEX_FLIPW 0x40000000000 39 | #define OPC_EVEX_DISP8SCALE 0x38000000000 40 | #define OPC_GPH_OP0 0x200000000000 41 | #define OPC_GPH_OP1 0x400000000000 42 | 43 | #define EPFX_REX_MSK 0x43f 44 | #define EPFX_REX 0x20 45 | #define EPFX_EVEX 0x40 46 | #define EPFX_REXR 0x10 47 | #define EPFX_REXX 0x08 48 | #define EPFX_REXB 0x04 49 | #define EPFX_REXR4 0x02 50 | #define EPFX_REXB4 0x01 51 | #define EPFX_REXX4 0x400 52 | #define EPFX_VVVV_IDX 11 53 | 54 | static bool op_mem(FeOp op) { return op < 0; } 55 | static bool op_reg(FeOp op) { return op >= 0; } 56 | static bool op_reg_gpl(FeOp op) { return (op & ~0x1f) == 0x100; } 57 | static bool op_reg_gph(FeOp op) { return (op & ~0x3) == 0x204; } 58 | static bool op_reg_xmm(FeOp op) { return (op & ~0x1f) == 0x600; } 59 | static int64_t op_mem_offset(FeOp op) { return (int32_t) op; } 60 | static unsigned op_mem_base(FeOp op) { return (op >> 32) & 0xfff; } 61 | static unsigned op_mem_idx(FeOp op) { return (op >> 44) & 0xfff; } 62 | static unsigned op_mem_scale(FeOp op) { return (op >> 56) & 0xf; } 63 | static unsigned op_reg_idx(FeOp op) { return op & 0xff; } 64 | static bool op_imm_n(FeOp imm, unsigned immsz) { 65 | if (immsz == 0 && !imm) return true; 66 | if (immsz == 1 && (int8_t) imm == imm) return true; 67 | if (immsz == 2 && (int16_t) imm == imm) return true; 68 | if (immsz == 3 && (imm&0xffffff) == imm) return true; 69 | if (immsz == 4 && (int32_t) imm == imm) return true; 70 | if (immsz == 8 && (int64_t) imm == imm) return true; 71 | return false; 72 | } 73 | 74 | static 75 | unsigned 76 | opc_size(uint64_t opc, uint64_t epfx) 77 | { 78 | unsigned res = 1; 79 | if (UNLIKELY(opc & OPC_EVEXL0)) { 80 | res += 4; 81 | } else if (UNLIKELY(opc & OPC_VEXL0)) { 82 | if (opc & (OPC_REXW|0x20000) || epfx & (EPFX_REXX|EPFX_REXB)) 83 | res += 3; 84 | else 85 | res += 2; 86 | } else { 87 | if (opc & OPC_LOCK) res++; 88 | if (opc & OPC_66) res++; 89 | if (opc & (OPC_F2|OPC_F3)) res++; 90 | if (opc & OPC_REXW || epfx & EPFX_REX_MSK) res++; 91 | if (opc & 0x30000) res++; 92 | if (opc & 0x20000) res++; 93 | } 94 | if (opc & OPC_SEG_MSK) res++; 95 | if (opc & OPC_67) res++; 96 | if (opc & 0x8000) res++; 97 | return res; 98 | } 99 | 100 | static 101 | int 102 | enc_opc(uint8_t** restrict buf, uint64_t opc, uint64_t epfx) 103 | { 104 | if (opc & OPC_SEG_MSK) 105 | *(*buf)++ = (0x65643e362e2600 >> (8 * ((opc >> 29) & 7))) & 0xff; 106 | if (opc & OPC_67) *(*buf)++ = 0x67; 107 | if (opc & OPC_EVEXL0) { 108 | *(*buf)++ = 0x62; 109 | unsigned b1 = opc >> 16 & 7; 110 | if (!(epfx & EPFX_REXR)) b1 |= 0x80; 111 | if (!(epfx & EPFX_REXX)) b1 |= 0x40; 112 | if (!(epfx & EPFX_REXB)) b1 |= 0x20; 113 | if (!(epfx & EPFX_REXR4)) b1 |= 0x10; 114 | if ((epfx & EPFX_REXB4)) b1 |= 0x08; 115 | *(*buf)++ = b1; 116 | unsigned b2 = opc >> 20 & 3; 117 | if (!(epfx & EPFX_REXX4)) b2 |= 0x04; 118 | b2 |= (~(epfx >> EPFX_VVVV_IDX) & 0xf) << 3; 119 | if (opc & OPC_REXW) b2 |= 0x80; 120 | *(*buf)++ = b2; 121 | unsigned b3 = opc >> 33 & 7; 122 | b3 |= (~(epfx >> EPFX_VVVV_IDX) & 0x10) >> 1; 123 | if (opc & OPC_EVEXB) b3 |= 0x10; 124 | b3 |= (opc >> 23 & 3) << 5; 125 | if (opc & OPC_EVEXZ) b3 |= 0x80; 126 | *(*buf)++ = b3; 127 | } else if (opc & OPC_VEXL0) { 128 | if (epfx & (EPFX_REXR4|EPFX_REXX4|EPFX_REXB4|(0x10<> 20 & 3; 131 | *(*buf)++ = 0xc4 | !vex3; 132 | unsigned b2 = pp | (opc & 0x800000 ? 0x4 : 0); 133 | if (vex3) { 134 | unsigned b1 = opc >> 16 & 7; 135 | if (!(epfx & EPFX_REXR)) b1 |= 0x80; 136 | if (!(epfx & EPFX_REXX)) b1 |= 0x40; 137 | if (!(epfx & EPFX_REXB)) b1 |= 0x20; 138 | *(*buf)++ = b1; 139 | if (opc & OPC_REXW) b2 |= 0x80; 140 | } else { 141 | if (!(epfx & EPFX_REXR)) b2 |= 0x80; 142 | } 143 | b2 |= (~(epfx >> EPFX_VVVV_IDX) & 0xf) << 3; 144 | *(*buf)++ = b2; 145 | } else { 146 | if (opc & OPC_LOCK) *(*buf)++ = 0xF0; 147 | if (opc & OPC_66) *(*buf)++ = 0x66; 148 | if (opc & OPC_F2) *(*buf)++ = 0xF2; 149 | if (opc & OPC_F3) *(*buf)++ = 0xF3; 150 | if (opc & OPC_REXW || epfx & (EPFX_REX_MSK)) { 151 | unsigned rex = 0x40; 152 | if (opc & OPC_REXW) rex |= 8; 153 | if (epfx & EPFX_REXR) rex |= 4; 154 | if (epfx & EPFX_REXX) rex |= 2; 155 | if (epfx & EPFX_REXB) rex |= 1; 156 | *(*buf)++ = rex; 157 | } 158 | if (opc & 0x30000) *(*buf)++ = 0x0F; 159 | if ((opc & 0x30000) == 0x20000) *(*buf)++ = 0x38; 160 | if ((opc & 0x30000) == 0x30000) *(*buf)++ = 0x3A; 161 | } 162 | *(*buf)++ = opc & 0xff; 163 | if (opc & 0x8000) *(*buf)++ = (opc >> 8) & 0xff; 164 | return 0; 165 | } 166 | 167 | static 168 | int 169 | enc_imm(uint8_t** restrict buf, uint64_t imm, unsigned immsz) 170 | { 171 | if (!op_imm_n(imm, immsz)) return -1; 172 | for (unsigned i = 0; i < immsz; i++) 173 | *(*buf)++ = imm >> 8 * i; 174 | return 0; 175 | } 176 | 177 | static 178 | int 179 | enc_o(uint8_t** restrict buf, uint64_t opc, uint64_t epfx, uint64_t op0) 180 | { 181 | if (op_reg_idx(op0) & 0x8) epfx |= EPFX_REXB; 182 | 183 | // NB: this cannot happen. There is only one O-encoded instruction which 184 | // accepts high-byte registers (b0+/MOVABS Rb,Ib), which will never have a 185 | // REx prefix if the operand is a high-byte register. 186 | // bool has_rex = opc & OPC_REXW || epfx & EPFX_REX_MSK; 187 | // if (has_rex && op_reg_gph(op0)) return -1; 188 | 189 | if (enc_opc(buf, opc, epfx)) return -1; 190 | *(*buf - 1) = (*(*buf - 1) & 0xf8) | (op_reg_idx(op0) & 0x7); 191 | return 0; 192 | } 193 | 194 | static 195 | int 196 | enc_mr(uint8_t** restrict buf, uint64_t opc, uint64_t epfx, uint64_t op0, 197 | uint64_t op1, unsigned immsz) 198 | { 199 | // If !op_reg(op1), it is a constant value for ModRM.reg 200 | if (op_reg(op0) && (op_reg_idx(op0) & 0x8)) epfx |= EPFX_REXB; 201 | if (op_reg(op0) && (op_reg_idx(op0) & 0x10)) 202 | epfx |= 0 ? EPFX_REXB4 : EPFX_REXX|EPFX_EVEX; 203 | if (op_mem(op0) && (op_mem_base(op0) & 0x8)) epfx |= EPFX_REXB; 204 | if (op_mem(op0) && (op_mem_base(op0) & 0x10)) epfx |= EPFX_REXB4; 205 | if (op_mem(op0) && (op_mem_idx(op0) & 0x8)) epfx |= EPFX_REXX; 206 | if (op_mem(op0) && (op_mem_idx(op0) & 0x10)) 207 | epfx |= opc & OPC_VSIB ? 0x10<> 39; 283 | if (!(off & ((1 << disp8scale) - 1)) && op_imm_n(off >> disp8scale, 1)) { 284 | mod = 0x40; 285 | dispsz = 1; 286 | off >>= disp8scale; 287 | } else { 288 | mod = 0x80; 289 | dispsz = 4; 290 | } 291 | } else if (rm == 5) { 292 | mod = 0x40; 293 | dispsz = 1; 294 | } 295 | } 296 | 297 | if (opcsz + 1 + (rm == 4) + dispsz + immsz > 15) return -1; 298 | 299 | if (enc_opc(buf, opc, epfx)) return -1; 300 | *(*buf)++ = mod | (reg << 3) | rm; 301 | if (UNLIKELY(rm == 4)) 302 | *(*buf)++ = (scale << 6) | (idx << 3) | base; 303 | return enc_imm(buf, off, dispsz); 304 | } 305 | 306 | typedef enum { 307 | ENC_NP, ENC_M, ENC_R, ENC_M1, ENC_MC, ENC_MR, ENC_RM, ENC_RMA, ENC_MRC, 308 | ENC_AM, ENC_MA, ENC_I, ENC_O, ENC_OA, ENC_S, ENC_A, ENC_D, ENC_FD, ENC_TD, 309 | ENC_IM, 310 | ENC_RVM, ENC_RVMR, ENC_RMV, ENC_VM, ENC_MVR, ENC_MRV, 311 | ENC_MAX 312 | } Encoding; 313 | 314 | struct EncodingInfo { 315 | uint8_t modrm : 2; 316 | uint8_t modreg : 2; 317 | uint8_t vexreg : 2; 318 | uint8_t immidx : 2; 319 | // 0 = normal or jump, 1 = constant 1, 2 = address-size, 3 = RVMR 320 | uint8_t immctl : 3; 321 | uint8_t zregidx : 2; 322 | uint8_t zregval : 1; 323 | }; 324 | 325 | const struct EncodingInfo encoding_infos[ENC_MAX] = { 326 | [ENC_NP] = { 0 }, 327 | [ENC_M] = { .modrm = 0x0^3, .immidx = 1 }, 328 | [ENC_R] = { .modreg = 0x0^3 }, 329 | [ENC_M1] = { .modrm = 0x0^3, .immctl = 1, .immidx = 1 }, 330 | [ENC_MC] = { .modrm = 0x0^3, .zregidx = 0x1^3, .zregval = 1 }, 331 | [ENC_MR] = { .modrm = 0x0^3, .modreg = 0x1^3, .immidx = 2 }, 332 | [ENC_RM] = { .modrm = 0x1^3, .modreg = 0x0^3, .immidx = 2 }, 333 | [ENC_RMA] = { .modrm = 0x1^3, .modreg = 0x0^3, .zregidx = 0x2^3, .zregval = 0 }, 334 | [ENC_MRC] = { .modrm = 0x0^3, .modreg = 0x1^3, .zregidx = 0x2^3, .zregval = 1 }, 335 | [ENC_AM] = { .modrm = 0x1^3, .zregidx = 0x0^3, .zregval = 0 }, 336 | [ENC_MA] = { .modrm = 0x0^3, .zregidx = 0x1^3, .zregval = 0 }, 337 | [ENC_I] = { .immidx = 0 }, 338 | [ENC_O] = { .modreg = 0x0^3, .immidx = 1 }, 339 | [ENC_OA] = { .modreg = 0x0^3, .zregidx = 0x1^3, .zregval = 0 }, 340 | [ENC_S] = { 0 }, 341 | [ENC_A] = { .zregidx = 0x0^3, .zregval = 0, .immidx = 1 }, 342 | [ENC_D] = { .immidx = 0 }, 343 | [ENC_FD] = { .zregidx = 0x0^3, .zregval = 0, .immctl = 2, .immidx = 1 }, 344 | [ENC_TD] = { .zregidx = 0x1^3, .zregval = 0, .immctl = 2, .immidx = 0 }, 345 | [ENC_IM] = { .modrm = 0x1^3, .immidx = 0 }, 346 | [ENC_RVM] = { .modrm = 0x2^3, .modreg = 0x0^3, .vexreg = 0x1^3, .immidx = 3 }, 347 | [ENC_RVMR] = { .modrm = 0x2^3, .modreg = 0x0^3, .vexreg = 0x1^3, .immctl = 3, .immidx = 3 }, 348 | [ENC_RMV] = { .modrm = 0x1^3, .modreg = 0x0^3, .vexreg = 0x2^3 }, 349 | [ENC_VM] = { .modrm = 0x1^3, .vexreg = 0x0^3, .immidx = 2 }, 350 | [ENC_MVR] = { .modrm = 0x0^3, .modreg = 0x2^3, .vexreg = 0x1^3 }, 351 | [ENC_MRV] = { .modrm = 0x0^3, .modreg = 0x1^3, .vexreg = 0x2^3 }, 352 | }; 353 | 354 | static const uint64_t alt_tab[] = { 355 | #include 356 | }; 357 | 358 | int 359 | fe_enc64_impl(uint8_t** restrict buf, uint64_t opc, FeOp op0, FeOp op1, 360 | FeOp op2, FeOp op3) 361 | { 362 | uint8_t* buf_start = *buf; 363 | uint64_t ops[4] = {op0, op1, op2, op3}; 364 | 365 | uint64_t epfx = 0; 366 | // Doesn't change between variants 367 | if ((opc & OPC_GPH_OP0) && op_reg_gpl(op0) && op0 >= FE_SP) 368 | epfx |= EPFX_REX; 369 | else if (!(opc & OPC_GPH_OP0) && op_reg_gph(op0)) 370 | goto fail; 371 | if ((opc & OPC_GPH_OP1) && op_reg_gpl(op1) && op1 >= FE_SP) 372 | epfx |= EPFX_REX; 373 | else if (!(opc & OPC_GPH_OP1) && op_reg_gph(op1)) 374 | goto fail; 375 | 376 | try_encode:; 377 | unsigned enc = (opc >> 51) & 0x1f; 378 | const struct EncodingInfo* ei = &encoding_infos[enc]; 379 | 380 | int64_t imm = 0xcc; 381 | unsigned immsz = (opc >> 47) & 0xf; 382 | 383 | if (UNLIKELY(ei->zregidx && op_reg_idx(ops[ei->zregidx^3]) != ei->zregval)) 384 | goto next; 385 | 386 | if (UNLIKELY(enc == ENC_S)) { 387 | if ((op_reg_idx(op0) << 3 & 0x20) != (opc & 0x20)) goto next; 388 | opc |= op_reg_idx(op0) << 3; 389 | } 390 | 391 | if (immsz) { 392 | imm = ops[ei->immidx]; 393 | if (UNLIKELY(ei->immctl)) { 394 | if (ei->immctl == 2) { 395 | immsz = UNLIKELY(opc & OPC_67) ? 4 : 8; 396 | if (immsz == 4) imm = (int32_t) imm; // address are zero-extended 397 | } else if (ei->immctl == 3) { 398 | if (!op_reg_xmm(imm)) goto fail; 399 | imm = op_reg_idx(imm) << 4; 400 | if (!op_imm_n(imm, 1)) goto fail; 401 | } else if (ei->immctl == 1) { 402 | if (imm != 1) goto next; 403 | immsz = 0; 404 | } 405 | } else { 406 | if (enc == ENC_D) { 407 | if (UNLIKELY(opc & FE_JMPL) && opc >> 56) goto next; 408 | imm -= (int64_t) *buf + opc_size(opc, epfx) + immsz; 409 | } 410 | if (!op_imm_n(imm, immsz)) goto next; 411 | } 412 | } 413 | 414 | // NOP has no operands, so this must be the 32-bit OA XCHG 415 | if ((opc & 0xfffffff) == 0x90 && ops[0] == FE_AX) goto next; 416 | 417 | if (UNLIKELY(enc == ENC_R)) { 418 | if (enc_mr(buf, opc, epfx, 0, ops[0], immsz)) goto fail; 419 | } else if (ei->modrm) { 420 | FeOp modreg = ei->modreg ? ops[ei->modreg^3] : (opc & 0xff00) >> 8; 421 | if (ei->vexreg) 422 | epfx |= ((uint64_t) op_reg_idx(ops[ei->vexreg^3])) << EPFX_VVVV_IDX; 423 | // Can fail for upgrade to EVEX due to high register numbers 424 | if (enc_mr(buf, opc, epfx, ops[ei->modrm^3], modreg, immsz)) goto next; 425 | } else if (ei->modreg) { 426 | if (enc_o(buf, opc, epfx, ops[ei->modreg^3])) goto fail; 427 | } else { 428 | if (enc_opc(buf, opc, epfx)) goto fail; 429 | } 430 | 431 | if (immsz) 432 | if (enc_imm(buf, imm, immsz)) goto fail; 433 | 434 | return 0; 435 | 436 | next:; 437 | uint64_t alt = opc >> 56; 438 | if (alt) { // try alternative encoding, if available 439 | opc = alt_tab[alt] | (opc & OPC_USER_MSK); 440 | goto try_encode; 441 | } 442 | 443 | fail: 444 | // Don't advance buffer on error; though we shouldn't write anything. 445 | *buf = buf_start; 446 | return -1; 447 | } 448 | -------------------------------------------------------------------------------- /encode2-test.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | 10 | static 11 | void print_hex(const uint8_t* buf, size_t len) { 12 | for (size_t i = 0; i < len; i++) 13 | printf("%02x", buf[i]); 14 | } 15 | 16 | static int 17 | check(const uint8_t* buf, const void* exp, size_t exp_len, unsigned res, const char* name) { 18 | if (res == exp_len && !memcmp(buf, exp, exp_len)) 19 | return 0; 20 | printf("Failed case (new) %s:\n", name); 21 | printf(" Exp (%2zu): ", exp_len); 22 | print_hex((const uint8_t*)exp, exp_len); 23 | printf("\n Got (%2u): ", res); 24 | print_hex(buf, res); 25 | printf("\n"); 26 | return -1; 27 | } 28 | 29 | #define TEST1(str, exp, name, ...) do { \ 30 | memset(buf, 0, sizeof buf); \ 31 | unsigned res = fe64_ ## name(buf, __VA_ARGS__); \ 32 | failed |= check(buf, exp, sizeof(exp) - 1, res, str); \ 33 | } while (0) 34 | #define TEST(exp, ...) TEST1(#__VA_ARGS__, exp, __VA_ARGS__) 35 | 36 | int 37 | main(void) { 38 | int failed = 0; 39 | uint8_t buf[16]; 40 | 41 | // This API is type safe and prohibits compilation of reg-type mismatches 42 | #define ENC_TEST_TYPESAFE 43 | // Silence -Warray-bounds with double cast 44 | #define FE_PTR(off) (const void*) ((uintptr_t) buf + (off)) 45 | #define FLAGMASK(flags, mask) flags, mask 46 | #include "encode-test.inc" 47 | 48 | TEST("\x90", NOP, 0); 49 | TEST("\x90", NOP, 1); 50 | TEST("\x66\x90", NOP, 2); 51 | TEST("\x0f\x1f\x00", NOP, 3); 52 | TEST("\x0f\x1f\x40\x00", NOP, 4); 53 | TEST("\x0f\x1f\x44\x00\x00", NOP, 5); 54 | TEST("\x66\x0f\x1f\x44\x00\x00", NOP, 6); 55 | TEST("\x0f\x1f\x80\x00\x00\x00\x00", NOP, 7); 56 | TEST("\x0f\x1f\x84\x00\x00\x00\x00\x00", NOP, 8); 57 | TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", NOP, 9); 58 | TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00\x90", NOP, 10); 59 | TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00\x66\x90", NOP, 11); 60 | TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00\x0f\x1f\x00", NOP, 12); 61 | 62 | puts(failed ? "Some tests FAILED" : "All tests PASSED"); 63 | return failed ? EXIT_FAILURE : EXIT_SUCCESS; 64 | } 65 | -------------------------------------------------------------------------------- /encode2-test.cc: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | 10 | using Buffer = std::array; 11 | 12 | static 13 | void print_hex(const uint8_t* buf, size_t len) { 14 | for (size_t i = 0; i < len; i++) 15 | std::printf("%02x", buf[i]); 16 | } 17 | 18 | static int 19 | check(const Buffer& buf, const char* exp, size_t exp_len, unsigned res, const char* name) { 20 | if (res == exp_len && !std::memcmp(buf.data(), exp, exp_len)) 21 | return 0; 22 | std::printf("Failed case (new) %s:\n", name); 23 | std::printf(" Exp (%2zu): ", exp_len); 24 | print_hex(reinterpret_cast(exp), exp_len); 25 | std::printf("\n Got (%2u): ", res); 26 | print_hex(buf.data(), res); 27 | std::printf("\n"); 28 | return -1; 29 | } 30 | 31 | #define TEST1(str, exp, name, ...) do { \ 32 | buf.fill(0); \ 33 | unsigned res = fe64_ ## name(buf.data(), __VA_ARGS__); \ 34 | failed |= check(buf, exp, sizeof(exp) - 1, res, str); \ 35 | } while (0) 36 | #define TEST(exp, ...) TEST1(#__VA_ARGS__, exp, __VA_ARGS__) 37 | 38 | #define TEST_CPP1(str, exp, expr) do { \ 39 | buf.fill(0); \ 40 | unsigned res = (expr); \ 41 | failed |= check(buf, exp, sizeof(exp) - 1, res, str); \ 42 | } while (0) 43 | #define TEST_CPP(exp, ...) TEST_CPP1(#__VA_ARGS__, exp, __VA_ARGS__) 44 | 45 | int main() { 46 | int failed = 0; 47 | Buffer buf{}; 48 | 49 | // This API is type safe and prohibits compilation of reg-type mismatches 50 | #define ENC_TEST_TYPESAFE 51 | // Silence -Warray-bounds with double cast 52 | #define FE_PTR(off) (const void*) ((uintptr_t) buf.data() + (off)) 53 | #define FLAGMASK(flags, mask) flags, mask 54 | #include "encode-test.inc" 55 | 56 | // Test implicit conversion of parameters also on the actual functions 57 | TEST_CPP("\x0f\x90\xc0", fe64_SETO8r(buf.data(), 0, FE_AX)); 58 | TEST_CPP("\x0f\x90\xc0", (fe64_SETO8r)(buf.data(), 0, FE_AX)); 59 | TEST_CPP("\x0f\x90\xc4", fe64_SETO8r(buf.data(), 0, FE_AH)); 60 | TEST_CPP("\x0f\x90\xc4", (fe64_SETO8r)(buf.data(), 0, FE_AH)); 61 | 62 | std::puts(failed ? "Some tests FAILED" : "All tests PASSED"); 63 | return failed ? EXIT_FAILURE : EXIT_SUCCESS; 64 | } 65 | -------------------------------------------------------------------------------- /encode2.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | 6 | #include 7 | 8 | 9 | #ifdef __GNUC__ 10 | #define LIKELY(x) __builtin_expect(!!(x), 1) 11 | #define UNLIKELY(x) __builtin_expect(!!(x), 0) 12 | #define HINT_COLD __attribute__((cold)) 13 | #else 14 | #define LIKELY(x) (x) 15 | #define UNLIKELY(x) (x) 16 | #define HINT_COLD 17 | #endif 18 | 19 | #define op_reg_idx(op) (op).idx 20 | #define op_reg_gph(op) (((op).idx & ~0x3) == 0x24) 21 | #define op_mem_base(mem) op_reg_idx((mem).base) 22 | #define op_mem_idx(mem) op_reg_idx((mem).idx) 23 | 24 | static bool 25 | op_imm_n(int64_t imm, unsigned immsz) { 26 | if (immsz == 0 && !imm) return true; 27 | if (immsz == 1 && (int8_t) imm == imm) return true; 28 | if (immsz == 2 && (int16_t) imm == imm) return true; 29 | if (immsz == 3 && (imm&0xffffff) == imm) return true; 30 | if (immsz == 4 && (int32_t) imm == imm) return true; 31 | if (immsz == 8 && (int64_t) imm == imm) return true; 32 | return false; 33 | } 34 | 35 | HINT_COLD static unsigned 36 | enc_seg67(uint8_t* buf, unsigned flags) { 37 | unsigned idx = 0; 38 | if (UNLIKELY(flags & FE_SEG_MASK)) { 39 | unsigned seg = (0x65643e362e2600 >> (8 * (flags & FE_SEG_MASK))) & 0xff; 40 | buf[idx++] = seg; 41 | } 42 | if (UNLIKELY(flags & FE_ADDR32)) buf[idx++] = 0x67; 43 | return idx; 44 | } 45 | 46 | static void 47 | enc_imm(uint8_t* buf, uint64_t imm, unsigned immsz) { 48 | for (unsigned i = 0; i < immsz; i++) 49 | *buf++ = imm >> 8 * i; 50 | } 51 | 52 | static int 53 | enc_mem_common(uint8_t* buf, unsigned ripoff, FeMem op0, uint64_t op1, 54 | unsigned sibidx, unsigned disp8scale) { 55 | int mod = 0, reg = op1 & 7, rm; 56 | unsigned sib = 0x20; 57 | bool withsib = false; 58 | unsigned dispsz = 0; 59 | int32_t off = op0.off; 60 | 61 | if (sibidx < 8) { 62 | int scalabs = op0.scale; 63 | if (scalabs & (scalabs - 1)) 64 | return 0; 65 | unsigned scale = (scalabs & 0xA ? 1 : 0) | (scalabs & 0xC ? 2 : 0); 66 | sib = scale << 6 | sibidx << 3; 67 | withsib = true; 68 | } 69 | 70 | if (UNLIKELY(op0.base.idx >= 0x20)) { 71 | if (UNLIKELY(op0.base.idx >= op_reg_idx(FE_NOREG))) { 72 | *buf++ = (reg << 3) | 4; 73 | *buf++ = sib | 5; 74 | enc_imm(buf, off, 4); 75 | return 6; 76 | } else if (LIKELY(op0.base.idx == FE_IP.idx)) { 77 | if (withsib) 78 | return 0; 79 | *buf++ = (reg << 3) | 5; 80 | // Adjust offset, caller doesn't know instruction length. 81 | enc_imm(buf, off - ripoff - 5, 4); 82 | return 5; 83 | } else { 84 | return 0; 85 | } 86 | } 87 | 88 | rm = op_reg_idx(op0.base) & 7; 89 | 90 | if (off) { 91 | if (LIKELY(!disp8scale)) { 92 | mod = (int8_t) off == off ? 0x40 : 0x80; 93 | dispsz = (int8_t) off == off ? 1 : 4; 94 | } else { 95 | if (!(off & ((1 << disp8scale) - 1)) && op_imm_n(off >> disp8scale, 1)) 96 | off >>= disp8scale, mod = 0x40, dispsz = 1; 97 | else 98 | mod = 0x80, dispsz = 4; 99 | } 100 | } else if (rm == 5) { 101 | dispsz = 1; 102 | mod = 0x40; 103 | } 104 | 105 | // Always write four bytes of displacement. The buffer is always large 106 | // enough, and we truncate by returning a smaller "written bytes" count. 107 | if (withsib || rm == 4) { 108 | *buf++ = mod | (reg << 3) | 4; 109 | *buf++ = sib | rm; 110 | enc_imm(buf, off, 4); 111 | return 2 + dispsz; 112 | } else { 113 | *buf++ = mod | (reg << 3) | rm; 114 | enc_imm(buf, off, 4); 115 | return 1 + dispsz; 116 | } 117 | } 118 | 119 | static int 120 | enc_mem(uint8_t* buf, unsigned ripoff, FeMem op0, uint64_t op1, bool forcesib, 121 | unsigned disp8scale) { 122 | unsigned sibidx = forcesib ? 4 : 8; 123 | if (op_reg_idx(op0.idx) < op_reg_idx(FE_NOREG)) { 124 | if (!op0.scale) 125 | return 0; 126 | if (op_reg_idx(op0.idx) == 4) 127 | return 0; 128 | sibidx = op_reg_idx(op0.idx) & 7; 129 | } else if (op0.scale) { 130 | return 0; 131 | } 132 | return enc_mem_common(buf, ripoff, op0, op1, sibidx, disp8scale); 133 | } 134 | 135 | static int 136 | enc_mem_vsib(uint8_t* buf, unsigned ripoff, FeMemV op0, uint64_t op1, 137 | bool forcesib, unsigned disp8scale) { 138 | (void) forcesib; 139 | if (!op0.scale) 140 | return 0; 141 | FeMem mem = FE_MEM(op0.base, op0.scale, FE_NOREG, op0.off); 142 | return enc_mem_common(buf, ripoff, mem, op1, op_reg_idx(op0.idx) & 7, 143 | disp8scale); 144 | } 145 | 146 | // EVEX/VEX "Opcode" format: 147 | // 148 | // | EVEX byte 4 | P P M M M - - W | Opcode byte | VEX-D VEX-D-FLIPW 149 | // 0 8 16 24 150 | 151 | enum { 152 | FE_OPC_VEX_WPP_SHIFT = 8, 153 | FE_OPC_VEX_WPP_MASK = 0x83 << FE_OPC_VEX_WPP_SHIFT, 154 | FE_OPC_VEX_MMM_SHIFT = 10, 155 | FE_OPC_VEX_MMM_MASK = 0x1f << FE_OPC_VEX_MMM_SHIFT, 156 | FE_OPC_VEX_DOWNGRADE_VEX = 1 << 24, 157 | FE_OPC_VEX_DOWNGRADE_VEX_FLIPW = 1 << 25, 158 | }; 159 | 160 | static int 161 | enc_vex_common(uint8_t* buf, unsigned opcode, unsigned base, 162 | unsigned idx, unsigned reg, unsigned vvvv) { 163 | if ((base | idx | reg | vvvv) & 0x10) return 0; 164 | bool vex3 = ((base | idx) & 0x08) || (opcode & 0xfc00) != 0x0400; 165 | if (vex3) { 166 | *buf++ = 0xc4; 167 | unsigned b1 = (opcode & FE_OPC_VEX_MMM_MASK) >> FE_OPC_VEX_MMM_SHIFT; 168 | if (!(reg & 0x08)) b1 |= 0x80; 169 | if (!(idx & 0x08)) b1 |= 0x40; 170 | if (!(base & 0x08)) b1 |= 0x20; 171 | *buf++ = b1; 172 | unsigned b2 = (opcode & FE_OPC_VEX_WPP_MASK) >> FE_OPC_VEX_WPP_SHIFT; 173 | if (opcode & 0x20) b2 |= 0x04; 174 | b2 |= (vvvv ^ 0xf) << 3; 175 | *buf++ = b2; 176 | } else { 177 | *buf++ = 0xc5; 178 | unsigned b2 = opcode >> FE_OPC_VEX_WPP_SHIFT & 3; 179 | if (opcode & 0x20) b2 |= 0x04; 180 | if (!(reg & 0x08)) b2 |= 0x80; 181 | b2 |= (vvvv ^ 0xf) << 3; 182 | *buf++ = b2; 183 | } 184 | *buf++ = (opcode & 0xff0000) >> 16; 185 | return 3 + vex3; 186 | } 187 | 188 | static int 189 | enc_vex_reg(uint8_t* buf, unsigned opcode, uint64_t rm, uint64_t reg, 190 | uint64_t vvvv) { 191 | unsigned off = enc_vex_common(buf, opcode, rm, 0, reg, vvvv); 192 | buf[off] = 0xc0 | (reg << 3 & 0x38) | (rm & 7); 193 | return off ? off + 1 : 0; 194 | } 195 | 196 | static int 197 | enc_vex_mem(uint8_t* buf, unsigned opcode, FeMem rm, uint64_t reg, 198 | uint64_t vvvv, unsigned ripoff, bool forcesib, unsigned disp8scale) { 199 | unsigned off = enc_vex_common(buf, opcode, op_reg_idx(rm.base), op_reg_idx(rm.idx), reg, vvvv); 200 | unsigned memoff = enc_mem(buf + off, ripoff + off, rm, reg, forcesib, disp8scale); 201 | return off && memoff ? off + memoff : 0; 202 | } 203 | 204 | static int 205 | enc_vex_vsib(uint8_t* buf, unsigned opcode, FeMemV rm, uint64_t reg, 206 | uint64_t vvvv, unsigned ripoff, bool forcesib, unsigned disp8scale) { 207 | unsigned off = enc_vex_common(buf, opcode, op_reg_idx(rm.base), op_reg_idx(rm.idx), reg, vvvv); 208 | unsigned memoff = enc_mem_vsib(buf + off, ripoff + off, rm, reg, forcesib, disp8scale); 209 | return off && memoff ? off + memoff : 0; 210 | } 211 | 212 | static int 213 | enc_evex_common(uint8_t* buf, unsigned opcode, unsigned base, 214 | unsigned idx, unsigned reg, unsigned vvvv) { 215 | *buf++ = 0x62; 216 | bool evexr3 = reg & 0x08; 217 | bool evexr4 = reg & 0x10; 218 | bool evexb3 = base & 0x08; 219 | bool evexb4 = base & 0x10; // evexb4 is unused in AVX-512 encoding 220 | bool evexx3 = idx & 0x08; 221 | bool evexx4 = idx & 0x10; 222 | bool evexv4 = vvvv & 0x10; 223 | unsigned b1 = (opcode & FE_OPC_VEX_MMM_MASK) >> FE_OPC_VEX_MMM_SHIFT; 224 | if (!evexr3) b1 |= 0x80; 225 | if (!evexx3) b1 |= 0x40; 226 | if (!evexb3) b1 |= 0x20; 227 | if (!evexr4) b1 |= 0x10; 228 | if (evexb4) b1 |= 0x08; 229 | *buf++ = b1; 230 | unsigned b2 = (opcode & FE_OPC_VEX_WPP_MASK) >> FE_OPC_VEX_WPP_SHIFT; 231 | if (!evexx4) b2 |= 0x04; 232 | b2 |= (~vvvv & 0xf) << 3; 233 | *buf++ = b2; 234 | unsigned b3 = opcode & 0xff; 235 | if (!evexv4) b3 |= 0x08; 236 | *buf++ = b3; 237 | *buf++ = (opcode & 0xff0000) >> 16; 238 | return 5; 239 | } 240 | 241 | static unsigned 242 | enc_evex_to_vex(unsigned opcode) { 243 | return opcode & FE_OPC_VEX_DOWNGRADE_VEX_FLIPW ? opcode ^ 0x8000 : opcode; 244 | } 245 | 246 | // Encode AVX-512 EVEX r/m-reg, non-xmm reg, vvvv, prefer vex 247 | static int 248 | enc_evex_reg(uint8_t* buf, unsigned opcode, unsigned rm, 249 | unsigned reg, unsigned vvvv) { 250 | unsigned off; 251 | if (!((rm | reg | vvvv) & 0x10) && (opcode & FE_OPC_VEX_DOWNGRADE_VEX)) 252 | off = enc_vex_common(buf, enc_evex_to_vex(opcode), rm, 0, reg, vvvv); 253 | else 254 | off = enc_evex_common(buf, opcode, rm, 0, reg, vvvv); 255 | buf[off] = 0xc0 | (reg << 3 & 0x38) | (rm & 7); 256 | return off + 1; 257 | } 258 | 259 | // Encode AVX-512 EVEX r/m-reg, xmm reg, vvvv, prefer vex 260 | static int 261 | enc_evex_xmm(uint8_t* buf, unsigned opcode, unsigned rm, 262 | unsigned reg, unsigned vvvv) { 263 | unsigned off; 264 | if (!((rm | reg | vvvv) & 0x10) && (opcode & FE_OPC_VEX_DOWNGRADE_VEX)) 265 | off = enc_vex_common(buf, enc_evex_to_vex(opcode), rm, 0, reg, vvvv); 266 | else 267 | // AVX-512 XMM reg encoding uses X3 instead of B4. 268 | off = enc_evex_common(buf, opcode, rm & 0x0f, rm >> 1, reg, vvvv); 269 | buf[off] = 0xc0 | (reg << 3 & 0x38) | (rm & 7); 270 | return off + 1; 271 | } 272 | 273 | static int 274 | enc_evex_mem(uint8_t* buf, unsigned opcode, FeMem rm, uint64_t reg, 275 | uint64_t vvvv, unsigned ripoff, bool forcesib, unsigned disp8scale) { 276 | unsigned off; 277 | if (!((op_reg_idx(rm.base) | op_reg_idx(rm.idx) | reg | vvvv) & 0x10) && 278 | (opcode & FE_OPC_VEX_DOWNGRADE_VEX)) { 279 | disp8scale = 0; // Only AVX-512 EVEX compresses displacement 280 | off = enc_vex_common(buf, enc_evex_to_vex(opcode), op_reg_idx(rm.base), op_reg_idx(rm.idx), reg, vvvv); 281 | } else { 282 | off = enc_evex_common(buf, opcode, op_reg_idx(rm.base), op_reg_idx(rm.idx), reg, vvvv); 283 | } 284 | unsigned memoff = enc_mem(buf + off, ripoff + off, rm, reg, forcesib, disp8scale); 285 | return off && memoff ? off + memoff : 0; 286 | } 287 | 288 | static int 289 | enc_evex_vsib(uint8_t* buf, unsigned opcode, FeMemV rm, uint64_t reg, 290 | uint64_t vvvv, unsigned ripoff, bool forcesib, unsigned disp8scale) { 291 | (void) vvvv; 292 | // EVEX VSIB requires non-zero mask operand 293 | if (!(opcode & 0x7)) return 0; 294 | // EVEX.X4 is encoded in EVEX.V4 295 | unsigned idx = op_reg_idx(rm.idx); 296 | unsigned off = enc_evex_common(buf, opcode, op_reg_idx(rm.base), idx & 0x0f, reg, idx & 0x10); 297 | unsigned memoff = enc_mem_vsib(buf + off, ripoff + off, rm, reg, forcesib, disp8scale); 298 | return off && memoff ? off + memoff : 0; 299 | } 300 | 301 | unsigned fe64_NOP(uint8_t* buf, unsigned flags) { 302 | unsigned len = flags ? flags : 1; 303 | // Taken from Intel SDM 304 | static const uint8_t tbl[] = { 305 | 0x90, 306 | 0x66, 0x90, 307 | 0x0f, 0x1f, 0x00, 308 | 0x0f, 0x1f, 0x40, 0x00, 309 | 0x0f, 0x1f, 0x44, 0x00, 0x00, 310 | 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00, 311 | 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00, 312 | 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 313 | 0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 314 | }; 315 | unsigned remain = len; 316 | for (; remain > 9; remain -= 9) 317 | for (unsigned i = 0; i < 9; i++) 318 | *(buf++) = tbl[36 + i]; 319 | const uint8_t* src = tbl + (remain * (remain - 1)) / 2; 320 | for (unsigned i = 0; i < remain; i++) 321 | *(buf++) = src[i]; 322 | return len; 323 | } 324 | 325 | #include 326 | -------------------------------------------------------------------------------- /fadec-enc.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef FD_FADEC_ENC_H_ 3 | #define FD_FADEC_ENC_H_ 4 | 5 | #include 6 | #include 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | typedef enum { 13 | FE_AX = 0x100, FE_CX, FE_DX, FE_BX, FE_SP, FE_BP, FE_SI, FE_DI, 14 | FE_R8, FE_R9, FE_R10, FE_R11, FE_R12, FE_R13, FE_R14, FE_R15, 15 | FE_IP = 0x120, 16 | FE_AH = 0x204, FE_CH, FE_DH, FE_BH, 17 | FE_ES = 0x300, FE_CS, FE_SS, FE_DS, FE_FS, FE_GS, 18 | FE_ST0 = 0x400, FE_ST1, FE_ST2, FE_ST3, FE_ST4, FE_ST5, FE_ST6, FE_ST7, 19 | FE_MM0 = 0x500, FE_MM1, FE_MM2, FE_MM3, FE_MM4, FE_MM5, FE_MM6, FE_MM7, 20 | FE_XMM0 = 0x600, FE_XMM1, FE_XMM2, FE_XMM3, FE_XMM4, FE_XMM5, FE_XMM6, FE_XMM7, 21 | FE_XMM8, FE_XMM9, FE_XMM10, FE_XMM11, FE_XMM12, FE_XMM13, FE_XMM14, FE_XMM15, 22 | FE_XMM16, FE_XMM17, FE_XMM18, FE_XMM19, FE_XMM20, FE_XMM21, FE_XMM22, FE_XMM23, 23 | FE_XMM24, FE_XMM25, FE_XMM26, FE_XMM27, FE_XMM28, FE_XMM29, FE_XMM30, FE_XMM31, 24 | FE_K0 = 0x700, FE_K1, FE_K2, FE_K3, FE_K4, FE_K5, FE_K6, FE_K7, 25 | FE_TMM0 = 0x800, FE_TMM1, FE_TMM2, FE_TMM3, FE_TMM4, FE_TMM5, FE_TMM6, FE_TMM7, 26 | } FeReg; 27 | 28 | typedef int64_t FeOp; 29 | 30 | /** Construct a memory operand. Unused parts can be set to 0 and will be 31 | * ignored. FE_IP can be used as base register, in which case the offset is 32 | * interpreted as the offset from the /current/ position -- the size of the 33 | * encoded instruction will be subtracted during encoding. scale must be 1, 2, 34 | * 4, or 8; but is ignored if idx == 0. **/ 35 | #define FE_MEM(base,sc,idx,off) (INT64_MIN | ((int64_t) ((base) & 0xfff) << 32) | ((int64_t) ((idx) & 0xfff) << 44) | ((int64_t) ((sc) & 0xf) << 56) | ((off) & 0xffffffff)) 36 | #define FE_NOREG ((FeReg) 0) 37 | 38 | /** Add segment override prefix. This may or may not generate prefixes for the 39 | * ignored prefixes ES/CS/DS/SS in 64-bit mode. **/ 40 | #define FE_SEG(seg) ((uint64_t) (((seg) & 0x7) + 1) << 29) 41 | /** Do not use. **/ 42 | #define FE_SEG_MASK 0xe0000000 43 | /** Overrides address size. **/ 44 | #define FE_ADDR32 0x10000000 45 | /** Used together with a RIP-relative (conditional) jump, this will force the 46 | * use of the encoding with the largest distance. Useful for reserving a jump 47 | * when the target offset is still unknown; if the jump is re-encoded later on, 48 | * FE_JMPL must be specified there, too, so that the encoding lengths match. **/ 49 | #define FE_JMPL 0x100000000 50 | #define FE_MASK(kreg) ((uint64_t) ((kreg) & 0x7) << 33) 51 | #define FE_RC_RN 0x0000000 52 | #define FE_RC_RD 0x0800000 53 | #define FE_RC_RU 0x1000000 54 | #define FE_RC_RZ 0x1800000 55 | 56 | #include 57 | 58 | /** Do not use. **/ 59 | #define fe_enc64_1(buf, mnem, op0, op1, op2, op3, ...) fe_enc64_impl(buf, mnem, op0, op1, op2, op3) 60 | /** Encode a single instruction for 64-bit mode. 61 | * \param buf Pointer to the buffer for instruction bytes, must have a size of 62 | * 15 bytes. The pointer is advanced by the number of bytes used for 63 | * encoding the specified instruction. 64 | * \param mnem Mnemonic, optionally or-ed with FE_SEG(), FE_ADDR32, or FE_JMPL. 65 | * \param operands... Instruction operands. Immediate operands are passed as 66 | * plain value; register operands using the FeReg enum; memory operands 67 | * using FE_MEM(); and offset operands for RIP-relative jumps/calls are 68 | * specified as _address in buf_, e.g. (intptr_t) jmptgt, the address of 69 | * buf and the size of the encoded instruction are subtracted internally. 70 | * \return Zero for success or a negative value in case of an error. 71 | **/ 72 | #define fe_enc64(buf, ...) fe_enc64_1(buf, __VA_ARGS__, 0, 0, 0, 0, 0) 73 | /** Do not use. **/ 74 | int fe_enc64_impl(uint8_t** buf, uint64_t mnem, FeOp op0, FeOp op1, FeOp op2, FeOp op3); 75 | 76 | #ifdef __cplusplus 77 | } 78 | #endif 79 | 80 | #endif 81 | -------------------------------------------------------------------------------- /fadec-enc2.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef FD_FADEC_ENC2_H_ 3 | #define FD_FADEC_ENC2_H_ 4 | 5 | #include 6 | #include 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | #ifdef __cplusplus 13 | #define FE_STRUCT(name) name 14 | #else 15 | #define FE_STRUCT(name) (name) 16 | #endif 17 | 18 | // Flags 19 | #define FE_JMPL 0x8 20 | #define FE_ADDR32 0x10 21 | #define FE_SEG_MASK 0x7 22 | #define FE_SEG(seg) (((seg).idx + 1) & FE_SEG_MASK) 23 | #define FE_RC_MASK 0x60 24 | #define FE_RC_RN 0x00 25 | #define FE_RC_RD 0x20 26 | #define FE_RC_RU 0x40 27 | #define FE_RC_RZ 0x60 28 | 29 | typedef struct FeRegGP { unsigned char idx; } FeRegGP; 30 | #define FE_GP(idx) (FE_STRUCT(FeRegGP) { idx }) 31 | #define FE_AX FE_GP(0) 32 | #define FE_CX FE_GP(1) 33 | #define FE_DX FE_GP(2) 34 | #define FE_BX FE_GP(3) 35 | #define FE_SP FE_GP(4) 36 | #define FE_BP FE_GP(5) 37 | #define FE_SI FE_GP(6) 38 | #define FE_DI FE_GP(7) 39 | #define FE_R8 FE_GP(8) 40 | #define FE_R9 FE_GP(9) 41 | #define FE_R10 FE_GP(10) 42 | #define FE_R11 FE_GP(11) 43 | #define FE_R12 FE_GP(12) 44 | #define FE_R13 FE_GP(13) 45 | #define FE_R14 FE_GP(14) 46 | #define FE_R15 FE_GP(15) 47 | #define FE_IP FE_GP(0x20) 48 | #define FE_NOREG FE_GP(0x80) 49 | typedef struct FeRegGPH { unsigned char idx; } FeRegGPH; 50 | #define FE_GPH(idx) (FE_STRUCT(FeRegGPH) { idx }) 51 | #define FE_AH FE_GPH(4) 52 | #define FE_CH FE_GPH(5) 53 | #define FE_DH FE_GPH(6) 54 | #define FE_BH FE_GPH(7) 55 | typedef struct FeRegSREG { unsigned char idx; } FeRegSREG; 56 | #define FE_SREG(idx) (FE_STRUCT(FeRegSREG) { idx }) 57 | #define FE_ES FE_SREG(0) 58 | #define FE_CS FE_SREG(1) 59 | #define FE_SS FE_SREG(2) 60 | #define FE_DS FE_SREG(3) 61 | #define FE_FS FE_SREG(4) 62 | #define FE_GS FE_SREG(5) 63 | typedef struct FeRegST { unsigned char idx; } FeRegST; 64 | #define FE_ST(idx) (FE_STRUCT(FeRegST) { idx }) 65 | #define FE_ST0 FE_ST(0) 66 | #define FE_ST1 FE_ST(1) 67 | #define FE_ST2 FE_ST(2) 68 | #define FE_ST3 FE_ST(3) 69 | #define FE_ST4 FE_ST(4) 70 | #define FE_ST5 FE_ST(5) 71 | #define FE_ST6 FE_ST(6) 72 | #define FE_ST7 FE_ST(7) 73 | typedef struct FeRegMM { unsigned char idx; } FeRegMM; 74 | #define FE_MM(idx) (FE_STRUCT(FeRegMM) { idx }) 75 | #define FE_MM0 FE_MM(0) 76 | #define FE_MM1 FE_MM(1) 77 | #define FE_MM2 FE_MM(2) 78 | #define FE_MM3 FE_MM(3) 79 | #define FE_MM4 FE_MM(4) 80 | #define FE_MM5 FE_MM(5) 81 | #define FE_MM6 FE_MM(6) 82 | #define FE_MM7 FE_MM(7) 83 | typedef struct FeRegXMM { unsigned char idx; } FeRegXMM; 84 | #define FE_XMM(idx) (FE_STRUCT(FeRegXMM) { idx }) 85 | #define FE_XMM0 FE_XMM(0) 86 | #define FE_XMM1 FE_XMM(1) 87 | #define FE_XMM2 FE_XMM(2) 88 | #define FE_XMM3 FE_XMM(3) 89 | #define FE_XMM4 FE_XMM(4) 90 | #define FE_XMM5 FE_XMM(5) 91 | #define FE_XMM6 FE_XMM(6) 92 | #define FE_XMM7 FE_XMM(7) 93 | #define FE_XMM8 FE_XMM(8) 94 | #define FE_XMM9 FE_XMM(9) 95 | #define FE_XMM10 FE_XMM(10) 96 | #define FE_XMM11 FE_XMM(11) 97 | #define FE_XMM12 FE_XMM(12) 98 | #define FE_XMM13 FE_XMM(13) 99 | #define FE_XMM14 FE_XMM(14) 100 | #define FE_XMM15 FE_XMM(15) 101 | #define FE_XMM16 FE_XMM(16) 102 | #define FE_XMM17 FE_XMM(17) 103 | #define FE_XMM18 FE_XMM(18) 104 | #define FE_XMM19 FE_XMM(19) 105 | #define FE_XMM20 FE_XMM(20) 106 | #define FE_XMM21 FE_XMM(21) 107 | #define FE_XMM22 FE_XMM(22) 108 | #define FE_XMM23 FE_XMM(23) 109 | #define FE_XMM24 FE_XMM(24) 110 | #define FE_XMM25 FE_XMM(25) 111 | #define FE_XMM26 FE_XMM(26) 112 | #define FE_XMM27 FE_XMM(27) 113 | #define FE_XMM28 FE_XMM(28) 114 | #define FE_XMM29 FE_XMM(29) 115 | #define FE_XMM30 FE_XMM(30) 116 | #define FE_XMM31 FE_XMM(31) 117 | typedef struct FeRegMASK { unsigned char idx; } FeRegMASK; 118 | #define FE_K(idx) (FE_STRUCT(FeRegMASK) { idx }) 119 | #define FE_K0 FE_K(0) 120 | #define FE_K1 FE_K(1) 121 | #define FE_K2 FE_K(2) 122 | #define FE_K3 FE_K(3) 123 | #define FE_K4 FE_K(4) 124 | #define FE_K5 FE_K(5) 125 | #define FE_K6 FE_K(6) 126 | #define FE_K7 FE_K(7) 127 | typedef struct FeRegTMM { unsigned char idx; } FeRegTMM; 128 | #define FE_TMM(idx) (FE_STRUCT(FeRegTMM) { idx }) 129 | #define FE_TMM0 FE_TMM(0) 130 | #define FE_TMM1 FE_TMM(1) 131 | #define FE_TMM2 FE_TMM(2) 132 | #define FE_TMM3 FE_TMM(3) 133 | #define FE_TMM4 FE_TMM(4) 134 | #define FE_TMM5 FE_TMM(5) 135 | #define FE_TMM6 FE_TMM(6) 136 | #define FE_TMM7 FE_TMM(7) 137 | typedef struct FeRegCR { unsigned char idx; } FeRegCR; 138 | #define FE_CR(idx) (FE_STRUCT(FeRegCR) { idx }) 139 | typedef struct FeRegDR { unsigned char idx; } FeRegDR; 140 | #define FE_DR(idx) (FE_STRUCT(FeRegDR) { idx }) 141 | 142 | // Internal only 143 | // Disambiguate GP and GPH -- C++ uses conversion constructors; C uses _Generic. 144 | #ifdef __cplusplus 145 | } 146 | namespace { 147 | struct FeRegGPLH { 148 | unsigned char idx; 149 | FeRegGPLH(FeRegGP gp) : idx(gp.idx) {} 150 | FeRegGPLH(FeRegGPH gp) : idx(gp.idx | 0x20) {} 151 | }; 152 | } 153 | extern "C" { 154 | #define FE_MAKE_GPLH(reg) reg 155 | #else 156 | typedef struct FeRegGPLH { unsigned char idx; } FeRegGPLH; 157 | #define FE_GPLH(idx) (FE_STRUCT(FeRegGPLH) { idx }) 158 | #define FE_MAKE_GPLH(reg) FE_GPLH(_Generic((reg), FeRegGPH: 0x20, FeRegGP: 0) | (reg).idx) 159 | #endif 160 | 161 | typedef struct FeMem { 162 | uint8_t flags; 163 | FeRegGP base; 164 | unsigned char scale; 165 | // union { 166 | FeRegGP idx; 167 | // FeRegXMM idx_xmm; 168 | // }; 169 | int32_t off; 170 | } FeMem; 171 | #define FE_MEM(base,sc,idx,off) (FE_STRUCT(FeMem) { 0, base, sc, idx, off }) 172 | typedef struct FeMemV { 173 | uint8_t flags; 174 | FeRegGP base; 175 | unsigned char scale; 176 | FeRegXMM idx; 177 | int32_t off; 178 | } FeMemV; 179 | #define FE_MEMV(base,sc,idx,off) (FE_STRUCT(FeMemV) { 0, base, sc, idx, off }) 180 | 181 | // NOP is special: flags is interpreted as the length in bytes, 0 = 1 byte, too. 182 | unsigned fe64_NOP(uint8_t* buf, unsigned flags); 183 | 184 | #include 185 | 186 | #ifdef __cplusplus 187 | } 188 | #endif 189 | 190 | #endif 191 | -------------------------------------------------------------------------------- /fadec.h: -------------------------------------------------------------------------------- 1 | 2 | #ifndef FD_FADEC_H_ 3 | #define FD_FADEC_H_ 4 | 5 | #include 6 | #include 7 | 8 | #ifdef __cplusplus 9 | extern "C" { 10 | #endif 11 | 12 | typedef enum { 13 | FD_REG_R0 = 0, FD_REG_R1, FD_REG_R2, FD_REG_R3, 14 | FD_REG_R4, FD_REG_R5, FD_REG_R6, FD_REG_R7, 15 | FD_REG_R8, FD_REG_R9, FD_REG_R10, FD_REG_R11, 16 | FD_REG_R12, FD_REG_R13, FD_REG_R14, FD_REG_R15, 17 | // Alternative names for byte registers 18 | FD_REG_AL = 0, FD_REG_CL, FD_REG_DL, FD_REG_BL, 19 | FD_REG_AH, FD_REG_CH, FD_REG_DH, FD_REG_BH, 20 | // Alternative names for general purpose registers 21 | FD_REG_AX = 0, FD_REG_CX, FD_REG_DX, FD_REG_BX, 22 | FD_REG_SP, FD_REG_BP, FD_REG_SI, FD_REG_DI, 23 | // FD_REG_IP can only be accessed in long mode (64-bit) 24 | FD_REG_IP = 0x10, 25 | // Segment register values 26 | FD_REG_ES = 0, FD_REG_CS, FD_REG_SS, FD_REG_DS, FD_REG_FS, FD_REG_GS, 27 | // No register specified 28 | FD_REG_NONE = 0x3f 29 | } FdReg; 30 | 31 | typedef enum { 32 | #define FD_MNEMONIC(name,value) FDI_ ## name = value, 33 | #include 34 | #undef FD_MNEMONIC 35 | } FdInstrType; 36 | 37 | /** Internal use only. **/ 38 | enum { 39 | FD_FLAG_LOCK = 1 << 0, 40 | FD_FLAG_REP = 1 << 2, 41 | FD_FLAG_REPNZ = 1 << 1, 42 | FD_FLAG_64 = 1 << 7, 43 | }; 44 | 45 | /** Operand types. **/ 46 | typedef enum { 47 | FD_OT_NONE = 0, 48 | FD_OT_REG = 1, 49 | FD_OT_IMM = 2, 50 | FD_OT_MEM = 3, 51 | FD_OT_OFF = 4, 52 | FD_OT_MEMBCST = 5, 53 | } FdOpType; 54 | 55 | typedef enum { 56 | /** Vector (SSE/AVX) register XMMn/YMMn/ZMMn **/ 57 | FD_RT_VEC = 0, 58 | /** Low general purpose register **/ 59 | FD_RT_GPL = 1, 60 | /** High-byte general purpose register **/ 61 | FD_RT_GPH = 2, 62 | /** Segment register **/ 63 | FD_RT_SEG = 3, 64 | /** FPU register ST(n) **/ 65 | FD_RT_FPU = 4, 66 | /** MMX register MMn **/ 67 | FD_RT_MMX = 5, 68 | /** TMM register TMMn **/ 69 | FD_RT_TMM = 6, 70 | /** Vector mask (AVX-512) register Kn **/ 71 | FD_RT_MASK = 7, 72 | /** Bound register BNDn **/ 73 | FD_RT_BND = 8, 74 | /** Control Register CRn **/ 75 | FD_RT_CR = 9, 76 | /** Debug Register DRn **/ 77 | FD_RT_DR = 10, 78 | /** Must be a memory operand **/ 79 | FD_RT_MEM = 15, 80 | } FdRegType; 81 | 82 | /** Do not depend on the actual enum values. **/ 83 | typedef enum { 84 | /** Round to nearest (even) **/ 85 | FD_RC_RN = 1, 86 | /** Round down **/ 87 | FD_RC_RD = 3, 88 | /** Round up **/ 89 | FD_RC_RU = 5, 90 | /** Round to zero (truncate) **/ 91 | FD_RC_RZ = 7, 92 | /** Rounding mode as specified in MXCSR **/ 93 | FD_RC_MXCSR = 0, 94 | /** Rounding mode irrelevant, but SAE **/ 95 | FD_RC_SAE = 6, 96 | } FdRoundControl; 97 | 98 | /** Internal use only. **/ 99 | typedef struct { 100 | uint8_t type; 101 | uint8_t size; 102 | uint8_t reg; 103 | uint8_t misc; 104 | } FdOp; 105 | 106 | /** Never(!) access struct fields directly. Use the macros defined below. **/ 107 | typedef struct { 108 | uint16_t type; 109 | uint8_t flags; 110 | uint8_t segment; 111 | uint8_t addrsz; 112 | uint8_t operandsz; 113 | uint8_t size; 114 | uint8_t evex; 115 | 116 | FdOp operands[4]; 117 | 118 | int64_t disp; 119 | int64_t imm; 120 | 121 | uint64_t address; 122 | } FdInstr; 123 | 124 | typedef enum { 125 | FD_ERR_UD = -1, 126 | FD_ERR_INTERNAL = -2, 127 | FD_ERR_PARTIAL = -3, 128 | } FdErr; 129 | 130 | 131 | /** Decode an instruction. 132 | * \param buf Buffer for instruction bytes. 133 | * \param len Length of the buffer (in bytes). An instruction is not longer than 134 | * 15 bytes on all x86 architectures. 135 | * \param mode Decoding mode, either 32 for protected/compatibility mode or 64 136 | * for long mode. 16-bit mode is not supported. 137 | * \param address Virtual address where the decoded instruction. This is used 138 | * for computing jump targets. If "0" is passed, operands which require 139 | * adding EIP/RIP will be stored as FD_OT_OFF operands. 140 | * DEPRECATED: Strongly prefer passing 0 and using FD_OT_OFF operands. 141 | * \param out_instr Pointer to the instruction buffer. Note that this may get 142 | * partially written even if an error is returned. 143 | * \return The number of bytes consumed by the instruction, or a negative number 144 | * indicating an error. 145 | **/ 146 | int fd_decode(const uint8_t* buf, size_t len, int mode, uintptr_t address, 147 | FdInstr* out_instr); 148 | 149 | /** Format an instruction to a string. 150 | * \param instr The instruction. 151 | * \param buf The buffer to hold the formatted string. 152 | * \param len The length of the buffer. 153 | **/ 154 | void fd_format(const FdInstr* instr, char* buf, size_t len); 155 | 156 | /** Format an instruction to a string. 157 | * NOTE: API stability is currently not guaranteed for this function; its name 158 | * and/or signature may change in future. 159 | * 160 | * \param instr The instruction. 161 | * \param addr The base address to use for printing FD_OT_OFF operands. 162 | * \param buf The buffer to hold the formatted string. 163 | * \param len The length of the buffer. 164 | **/ 165 | void fd_format_abs(const FdInstr* instr, uint64_t addr, char* buf, size_t len); 166 | 167 | /** Get the stringified name of an instruction type. 168 | * NOTE: API stability is currently not guaranteed for this function; changes 169 | * to the signature and/or the returned string can be expected. E.g., a future 170 | * version may take an extra parameter for the instruction operand size; or may 171 | * take a complete decoded instruction as first parameter and return the 172 | * mnemonic returned by fd_format. 173 | * 174 | * \param ty An instruction type 175 | * \return The instruction type as string, or "(invalid)". 176 | **/ 177 | const char* fdi_name(FdInstrType ty); 178 | 179 | 180 | /** Gets the type/mnemonic of the instruction. 181 | * ABI STABILITY NOTE: different versions or builds of the library may use 182 | * different values. When linking as shared library, any interpretation of this 183 | * value is meaningless; in such cases use fdi_name. 184 | * 185 | * API STABILITY NOTE: a future version of this library may decode string 186 | * instructions prefixed with REP/REPNZ and instructions prefixed with LOCK as 187 | * separate instruction types. **/ 188 | #define FD_TYPE(instr) ((FdInstrType) (instr)->type) 189 | /** DEPRECATED: This functionality is obsolete in favor of FD_OT_OFF. 190 | * Gets the address of the instruction. Invalid if decoded address == 0. **/ 191 | #define FD_ADDRESS(instr) ((instr)->address) 192 | /** Gets the size of the instruction in bytes. **/ 193 | #define FD_SIZE(instr) ((instr)->size) 194 | /** Gets the specified segment override, or FD_REG_NONE for default segment. **/ 195 | #define FD_SEGMENT(instr) ((FdReg) (instr)->segment & 0x3f) 196 | /** Gets the address size attribute of the instruction in bytes. **/ 197 | #define FD_ADDRSIZE(instr) (1 << (instr)->addrsz) 198 | /** Get the logarithmic address size; FD_ADDRSIZE == 1 << FD_ADDRSIZELG **/ 199 | #define FD_ADDRSIZELG(instr) ((instr)->addrsz) 200 | /** Gets the operation width in bytes of the instruction if this is not encoded 201 | * in the operands, for example for the string instruction (e.g. MOVS). **/ 202 | #define FD_OPSIZE(instr) (1 << (instr)->operandsz) 203 | /** Get the logarithmic operand size; FD_OPSIZE == 1 << FD_OPSIZELG iff 204 | * FD_OPSIZE is valid. **/ 205 | #define FD_OPSIZELG(instr) ((instr)->operandsz) 206 | /** Indicates whether the instruction was encoded with a REP prefix. Needed for: 207 | * (1) Handling the instructions MOVS, STOS, LODS, INS and OUTS properly. 208 | * (2) Handling the instructions SCAS and CMPS, for which this means REPZ. **/ 209 | #define FD_HAS_REP(instr) ((instr)->flags & FD_FLAG_REP) 210 | /** Indicates whether the instruction was encoded with a REPNZ prefix. **/ 211 | #define FD_HAS_REPNZ(instr) ((instr)->flags & FD_FLAG_REPNZ) 212 | /** Indicates whether the instruction was encoded with a LOCK prefix. **/ 213 | #define FD_HAS_LOCK(instr) ((instr)->flags & FD_FLAG_LOCK) 214 | /** Do not use. **/ 215 | #define FD_IS64(instr) ((instr)->flags & FD_FLAG_64) 216 | 217 | /** Gets the type of an operand at the given index. **/ 218 | #define FD_OP_TYPE(instr,idx) ((FdOpType) (instr)->operands[idx].type) 219 | /** Gets the size in bytes of an operand. However, there are a few exceptions: 220 | * (1) For some register types, e.g., segment registers, or x87 registers, the 221 | * size is zero. (This allows some simplifications internally.) 222 | * (2) On some vector instructions this may be only an approximation of the 223 | * actually needed operand size (that is, an instruction may/must only use 224 | * a smaller part than specified here). The real operand size is always 225 | * fully recoverable in combination with the instruction type. **/ 226 | #define FD_OP_SIZE(instr,idx) (1 << (instr)->operands[idx].size >> 1) 227 | /** Get the logarithmic size of an operand; see FD_OP_SIZE for special cases. 228 | * The following equality holds: FD_OP_SIZE == 1 << (FD_OP_SIZELG + 1) >> 1 229 | * Note that typically FD_OP_SIZE == 1 << FD_OP_SIZELG unless a zero-sized 230 | * memory operand, FPU register, or mask register is involved. **/ 231 | #define FD_OP_SIZELG(instr,idx) ((instr)->operands[idx].size - 1) 232 | /** Gets the accessed register index of a register operand. Note that /only/ the 233 | * index is returned, no further interpretation of the index (which depends on 234 | * the instruction type) is done. The register type can be fetched using 235 | * FD_OP_REG_TYPE, e.g. for distinguishing high-byte registers. 236 | * Only valid if FD_OP_TYPE == FD_OT_REG **/ 237 | #define FD_OP_REG(instr,idx) ((FdReg) (instr)->operands[idx].reg) 238 | /** Gets the type of the accessed register. 239 | * Only valid if FD_OP_TYPE == FD_OT_REG **/ 240 | #define FD_OP_REG_TYPE(instr,idx) ((FdRegType) (instr)->operands[idx].misc) 241 | /** DEPRECATED: use FD_OP_REG_TYPE() == FD_RT_GPH instead. 242 | * Returns whether the accessed register is a high-byte register. In that case, 243 | * the register index has to be decreased by 4. 244 | * Only valid if FD_OP_TYPE == FD_OT_REG **/ 245 | #define FD_OP_REG_HIGH(instr,idx) (FD_OP_REG_TYPE(instr,idx) == FD_RT_GPH) 246 | /** Gets the index of the base register from a memory operand, or FD_REG_NONE, 247 | * if the memory operand has no base register. This is the only case where the 248 | * 64-bit register RIP can be returned, in which case the operand also has no 249 | * scaled index register. 250 | * Only valid if FD_OP_TYPE == FD_OT_MEM/MEMBCST **/ 251 | #define FD_OP_BASE(instr,idx) ((FdReg) (instr)->operands[idx].reg) 252 | /** Gets the index of the index register from a memory operand, or FD_REG_NONE, 253 | * if the memory operand has no scaled index register. 254 | * Only valid if FD_OP_TYPE == FD_OT_MEM/MEMBCST **/ 255 | #define FD_OP_INDEX(instr,idx) ((FdReg) (instr)->operands[idx].misc & 0x3f) 256 | /** Gets the scale of the index register from a memory operand when existent. 257 | * This does /not/ return the scale in an absolute value but returns the amount 258 | * of bits the index register is shifted to the left (i.e. the value in in the 259 | * range 0-3). The actual scale can be computed easily using 1<operands[idx].misc >> 6) 262 | /** Gets the sign-extended displacement of a memory operand. 263 | * Only valid if FD_OP_TYPE == FD_OT_MEM/MEMBCST **/ 264 | #define FD_OP_DISP(instr,idx) ((int64_t) (instr)->disp) 265 | /** Get memory broadcast size in bytes. 266 | * Only valid if FD_OP_TYPE == FD_OT_MEMBCST **/ 267 | #define FD_OP_BCSTSZ(instr,idx) (1 << FD_OP_BCSTSZLG(instr,idx)) 268 | /** Get logarithmic memory broadcast size (1 = 2-byte; 2=4-byte; 3=8-byte). 269 | * Only valid if FD_OP_TYPE == FD_OT_MEMBCST **/ 270 | #define FD_OP_BCSTSZLG(instr,idx) ((instr)->segment >> 6) 271 | /** Gets the (sign-extended) encoded constant for an immediate operand. 272 | * Only valid if FD_OP_TYPE == FD_OT_IMM or FD_OP_TYPE == FD_OT_OFF **/ 273 | #define FD_OP_IMM(instr,idx) ((instr)->imm) 274 | 275 | /** Get the opmask register for EVEX-encoded instructions; 0 for no mask. **/ 276 | #define FD_MASKREG(instr) ((instr)->evex & 0x07) 277 | /** Get whether zero masking shall be used. Only valid if FD_MASKREG != 0. **/ 278 | #define FD_MASKZERO(instr) ((instr)->evex & 0x80) 279 | /** Get rounding mode for EVEX-encoded instructions. See FdRoundControl. **/ 280 | #define FD_ROUNDCONTROL(instr) ((FdRoundControl) (((instr)->evex & 0x70) >> 4)) 281 | 282 | #ifdef __cplusplus 283 | } 284 | #endif 285 | 286 | #endif 287 | -------------------------------------------------------------------------------- /format.c: -------------------------------------------------------------------------------- 1 | 2 | #include 3 | #include 4 | #include 5 | #ifdef _MSC_VER 6 | #include 7 | #endif 8 | 9 | #include 10 | 11 | 12 | #ifdef __GNUC__ 13 | #define LIKELY(x) __builtin_expect(!!(x), 1) 14 | #define UNLIKELY(x) __builtin_expect(!!(x), 0) 15 | #define DECLARE_ARRAY_SIZE(n) static n 16 | #define DECLARE_RESTRICTED_ARRAY_SIZE(n) restrict static n 17 | #else 18 | #define LIKELY(x) (x) 19 | #define UNLIKELY(x) (x) 20 | #define DECLARE_ARRAY_SIZE(n) n 21 | #define DECLARE_RESTRICTED_ARRAY_SIZE(n) n 22 | #endif 23 | 24 | #if defined(__has_attribute) 25 | #if __has_attribute(fallthrough) 26 | #define FALLTHROUGH() __attribute__((fallthrough)) 27 | #endif 28 | #endif 29 | #if !defined(FALLTHROUGH) 30 | #define FALLTHROUGH() ((void)0) 31 | #endif 32 | 33 | struct FdStr { 34 | const char* s; 35 | unsigned sz; 36 | }; 37 | 38 | #define fd_stre(s) ((struct FdStr) { (s "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), sizeof (s)-1 }) 39 | 40 | static char* 41 | fd_strpcat(char* restrict dst, struct FdStr src) { 42 | #ifdef __GNUC__ 43 | unsigned lim = __builtin_constant_p(src.sz) && src.sz <= 8 ? 8 : 16; 44 | #else 45 | unsigned lim = 16; 46 | #endif 47 | for (unsigned i = 0; i < lim; i++) 48 | dst[i] = src.s[i]; 49 | // __builtin_memcpy(dst, src.s, 16); 50 | return dst + src.sz; 51 | } 52 | 53 | static unsigned 54 | fd_clz64(uint64_t v) { 55 | #if defined(__GNUC__) 56 | return __builtin_clzll(v); 57 | #elif defined(_MSC_VER) 58 | unsigned long index; 59 | 60 | // 32-bit MSVC doesn't support _BitScanReverse64. This is an attempt to 61 | // identify this case. 62 | #if INTPTR_MAX == INT64_MAX 63 | _BitScanReverse64(&index, v); 64 | #else 65 | if (_BitScanReverse(&index, v >> 32)) 66 | return 31 - index; 67 | 68 | _BitScanReverse(&index, v & 0xffffffff); 69 | #endif 70 | 71 | return 63 - index; 72 | #else 73 | #error Unsupported compiler. 74 | #endif 75 | } 76 | 77 | #if defined(__SSE2__) 78 | #include 79 | #endif 80 | 81 | static char* 82 | fd_strpcatnum(char dst[DECLARE_ARRAY_SIZE(18)], uint64_t val) { 83 | unsigned lz = fd_clz64(val|1); 84 | unsigned numbytes = 16 - (lz / 4); 85 | #if defined(__SSE2__) 86 | __m128i mv = _mm_set_epi64x(0, val << (lz & -4)); 87 | __m128i mvp = _mm_unpacklo_epi8(mv, mv); 88 | __m128i mva = _mm_srli_epi16(mvp, 12); 89 | __m128i mvb = _mm_and_si128(mvp, _mm_set1_epi16(0x0f00u)); 90 | __m128i ml = _mm_or_si128(mva, mvb); 91 | __m128i mn = _mm_or_si128(ml, _mm_set1_epi8(0x30)); 92 | __m128i mgt = _mm_cmpgt_epi8(ml, _mm_set1_epi8(9)); 93 | __m128i mgtm = _mm_and_si128(mgt, _mm_set1_epi8(0x61 - 0x3a)); 94 | __m128i ma = _mm_add_epi8(mn, mgtm); 95 | __m128i msw = _mm_shufflehi_epi16(_mm_shufflelo_epi16(ma, 0x1b), 0x1b); 96 | __m128i ms = _mm_shuffle_epi32(msw, 0x4e); 97 | _mm_storeu_si128((__m128i_u*) (dst + 2), ms); 98 | #else 99 | unsigned idx = numbytes + 2; 100 | do { 101 | dst[--idx] = "0123456789abcdef"[val % 16]; 102 | val /= 16; 103 | } while (val); 104 | #endif 105 | dst[0] = '0'; 106 | dst[1] = 'x'; 107 | return dst + numbytes + 2; 108 | } 109 | 110 | static char* 111 | fd_strpcatreg(char* restrict dst, size_t rt, size_t ri, unsigned size) { 112 | const char* nametab = 113 | "\2al\4bnd0\2cl\4bnd1\2dl\4bnd2\2bl\4bnd3" 114 | "\3spl\0 \3bpl\0 \3sil\0 \3dil\0 " 115 | "\3r8b\0 \3r9b\0 \4r10b\0 \4r11b\0 " 116 | "\4r12b\2ah\4r13b\2ch\4r14b\2dh\4r15b\2bh\0\0 " 117 | 118 | "\2ax\4tmm0\2cx\4tmm1\2dx\4tmm2\2bx\4tmm3" 119 | "\2sp\4tmm4\2bp\4tmm5\2si\4tmm6\2di\4tmm7" 120 | "\3r8w \2es\3r9w \2cs\4r10w\2ss\4r11w\2ds" 121 | "\4r12w\2fs\4r13w\2gs\4r14w\0 \4r15w\0 \2ip\0 " 122 | 123 | "\3eax\3mm0\3ecx\3mm1\3edx\3mm2\3ebx\3mm3" 124 | "\3esp\3mm4\3ebp\3mm5\3esi\3mm6\3edi\3mm7" 125 | "\3r8d \2k0\3r9d \2k1\4r10d\2k2\4r11d\2k3" 126 | "\4r12d\2k4\4r13d\2k5\4r14d\2k6\4r15d\2k7\3eip\0 " 127 | 128 | "\3rax\3cr0\3rcx\0 \3rdx\3cr2\3rbx\3cr3" 129 | "\3rsp\3cr4\3rbp\0 \3rsi\0 \3rdi\0 " 130 | "\2r8 \3cr8\2r9 \3dr0\3r10\3dr1\3r11\3dr2" 131 | "\3r12\3dr3\3r13\3dr4\3r14\3dr5\3r15\3dr6\3rip\3dr7" 132 | 133 | "\5st(0)\0 \5st(1)\0 \5st(2)\0 \5st(3)\0 " 134 | "\5st(4)\0 \5st(5)\0 \5st(6)\0 \5st(7)\0 " 135 | 136 | "\4xmm0\0 \4xmm1\0 \4xmm2\0 \4xmm3\0 " 137 | "\4xmm4\0 \4xmm5\0 \4xmm6\0 \4xmm7\0 " 138 | "\4xmm8\0 \4xmm9\0 \5xmm10\0 \5xmm11\0 " 139 | "\5xmm12\0 \5xmm13\0 \5xmm14\0 \5xmm15\0 " 140 | "\5xmm16\0 \5xmm17\0 \5xmm18\0 \5xmm19\0 " 141 | "\5xmm20\0 \5xmm21\0 \5xmm22\0 \5xmm23\0 " 142 | "\5xmm24\0 \5xmm25\0 \5xmm26\0 \5xmm27\0 " 143 | "\5xmm28\0 \5xmm29\0 \5xmm30\0 \5xmm31\0 "; 144 | 145 | static const uint16_t nametabidx[] = { 146 | [FD_RT_GPL] = 0 * 17*8 + 0 * 8 + 0, 147 | [FD_RT_GPH] = 0 * 17*8 + 8 * 8 + 5, 148 | [FD_RT_SEG] = 1 * 17*8 + 8 * 8 + 5, 149 | [FD_RT_FPU] = 4 * 17*8 + 0 * 8 + 0, 150 | [FD_RT_MMX] = 2 * 17*8 + 0 * 8 + 4, 151 | [FD_RT_VEC] = 4 * 17*8 + 8 * 8 + 0, 152 | [FD_RT_MASK]= 2 * 17*8 + 8 * 8 + 5, 153 | [FD_RT_BND] = 0 * 17*8 + 0 * 8 + 3, 154 | [FD_RT_CR] = 3 * 17*8 + 0 * 8 + 4, 155 | [FD_RT_DR] = 3 * 17*8 + 9 * 8 + 4, 156 | [FD_RT_TMM] = 1 * 17*8 + 0 * 8 + 3, 157 | }; 158 | 159 | unsigned idx = rt == FD_RT_GPL ? size * 17*8 : nametabidx[rt]; 160 | const char* name = nametab + idx + 8*ri; 161 | for (unsigned i = 0; i < 8; i++) 162 | dst[i] = name[i+1]; 163 | if (UNLIKELY(rt == FD_RT_VEC && size > 4)) 164 | dst[0] += size - 4; 165 | return dst + *name; 166 | } 167 | 168 | const char* 169 | fdi_name(FdInstrType ty) { 170 | (void) ty; 171 | return "(invalid)"; 172 | } 173 | 174 | static char* 175 | fd_mnemonic(char buf[DECLARE_RESTRICTED_ARRAY_SIZE(48)], const FdInstr* instr) { 176 | #define FD_DECODE_TABLE_STRTAB1 177 | static const char* mnemonic_str = 178 | #include 179 | // 20 NULL Bytes to prevent out-of-bounds reads 180 | "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"; 181 | #undef FD_DECODE_TABLE_STRTAB1 182 | 183 | #define FD_DECODE_TABLE_STRTAB2 184 | static const uint16_t mnemonic_offs[] = { 185 | #include 186 | }; 187 | #undef FD_DECODE_TABLE_STRTAB2 188 | 189 | #define FD_DECODE_TABLE_STRTAB3 190 | static const uint8_t mnemonic_lens[] = { 191 | #include 192 | }; 193 | #undef FD_DECODE_TABLE_STRTAB3 194 | 195 | const char* mnem = &mnemonic_str[mnemonic_offs[FD_TYPE(instr)]]; 196 | unsigned mnemlen = mnemonic_lens[FD_TYPE(instr)]; 197 | 198 | bool prefix_xacq_xrel = false; 199 | bool prefix_segment = false; 200 | 201 | char sizesuffix[4] = {0}; 202 | unsigned sizesuffixlen = 0; 203 | 204 | if (UNLIKELY(FD_OP_TYPE(instr, 0) == FD_OT_OFF && FD_OP_SIZELG(instr, 0) == 1)) 205 | sizesuffix[0] = 'w', sizesuffixlen = 1; 206 | 207 | switch (FD_TYPE(instr)) { 208 | case FDI_C_SEP: 209 | mnem += FD_OPSIZE(instr) & 0xc; 210 | mnemlen = 3; 211 | break; 212 | case FDI_C_EX: 213 | mnem += FD_OPSIZE(instr) & 0xc; 214 | mnemlen = FD_OPSIZE(instr) < 4 ? 3 : 4; 215 | break; 216 | case FDI_CMPXCHGD: 217 | switch (FD_OPSIZELG(instr)) { 218 | default: break; 219 | case 2: sizesuffix[0] = '8', sizesuffix[1] = 'b', sizesuffixlen = 2; break; 220 | case 3: sizesuffix[0] = '1', sizesuffix[1] = '6', sizesuffix[2] = 'b', sizesuffixlen = 3; break; 221 | } 222 | break; 223 | case FDI_JCXZ: 224 | mnemlen = FD_ADDRSIZELG(instr) == 1 ? 4 : 5; 225 | mnem += 5 * (FD_ADDRSIZELG(instr) - 1); 226 | break; 227 | case FDI_PUSH: 228 | if (FD_OP_SIZELG(instr, 0) == 1 && FD_OP_TYPE(instr, 0) == FD_OT_IMM) 229 | sizesuffix[0] = 'w', sizesuffixlen = 1; 230 | FALLTHROUGH(); 231 | case FDI_POP: 232 | if (FD_OP_SIZELG(instr, 0) == 1 && FD_OP_TYPE(instr, 0) == FD_OT_REG && 233 | FD_OP_REG_TYPE(instr, 0) == FD_RT_SEG) 234 | sizesuffix[0] = 'w', sizesuffixlen = 1; 235 | break; 236 | case FDI_XCHG: 237 | if (FD_OP_TYPE(instr, 0) == FD_OT_MEM) 238 | prefix_xacq_xrel = true; 239 | break; 240 | case FDI_MOV: 241 | // MOV C6h/C7h can have XRELEASE prefix. 242 | if (FD_HAS_REP(instr) && FD_OP_TYPE(instr, 0) == FD_OT_MEM && 243 | FD_OP_TYPE(instr, 1) == FD_OT_IMM) 244 | prefix_xacq_xrel = true; 245 | break; 246 | case FDI_FXSAVE: 247 | case FDI_FXRSTOR: 248 | case FDI_XSAVE: 249 | case FDI_XSAVEC: 250 | case FDI_XSAVEOPT: 251 | case FDI_XSAVES: 252 | case FDI_XRSTOR: 253 | case FDI_XRSTORS: 254 | if (FD_OPSIZELG(instr) == 3) 255 | sizesuffix[0] = '6', sizesuffix[1] = '4', sizesuffixlen = 2; 256 | break; 257 | case FDI_EVX_MOV_G2X: 258 | case FDI_EVX_MOV_X2G: 259 | case FDI_EVX_PEXTR: 260 | sizesuffix[0] = "bwdq"[FD_OP_SIZELG(instr, 0)]; 261 | sizesuffixlen = 1; 262 | break; 263 | case FDI_EVX_PBROADCAST: 264 | sizesuffix[0] = "bwdq"[FD_OP_SIZELG(instr, 1)]; 265 | sizesuffixlen = 1; 266 | break; 267 | case FDI_EVX_PINSR: 268 | sizesuffix[0] = "bwdq"[FD_OP_SIZELG(instr, 2)]; 269 | sizesuffixlen = 1; 270 | break; 271 | case FDI_RET: 272 | case FDI_ENTER: 273 | case FDI_LEAVE: 274 | if (FD_OPSIZELG(instr) == 1) 275 | sizesuffix[0] = 'w', sizesuffixlen = 1; 276 | break; 277 | case FDI_LODS: 278 | case FDI_MOVS: 279 | case FDI_CMPS: 280 | case FDI_OUTS: 281 | prefix_segment = true; 282 | FALLTHROUGH(); 283 | case FDI_STOS: 284 | case FDI_SCAS: 285 | case FDI_INS: 286 | if (FD_HAS_REP(instr)) 287 | buf = fd_strpcat(buf, fd_stre("rep ")); 288 | if (FD_HAS_REPNZ(instr)) 289 | buf = fd_strpcat(buf, fd_stre("repnz ")); 290 | if (FD_IS64(instr) && FD_ADDRSIZELG(instr) == 2) 291 | buf = fd_strpcat(buf, fd_stre("addr32 ")); 292 | if (!FD_IS64(instr) && FD_ADDRSIZELG(instr) == 1) 293 | buf = fd_strpcat(buf, fd_stre("addr16 ")); 294 | FALLTHROUGH(); 295 | case FDI_IN: 296 | case FDI_OUT: 297 | if (FD_OP_TYPE(instr, 0) != FD_OT_NONE) 298 | break; 299 | FALLTHROUGH(); 300 | case FDI_PUSHA: 301 | case FDI_POPA: 302 | case FDI_PUSHF: 303 | case FDI_POPF: 304 | case FDI_RETF: 305 | case FDI_IRET: 306 | sizesuffix[0] = "bwdq"[FD_OPSIZELG(instr)]; 307 | sizesuffixlen = 1; 308 | break; 309 | default: break; 310 | } 311 | 312 | if (UNLIKELY(prefix_xacq_xrel || FD_HAS_LOCK(instr))) { 313 | if (FD_HAS_REP(instr)) 314 | buf = fd_strpcat(buf, fd_stre("xrelease ")); 315 | if (FD_HAS_REPNZ(instr)) 316 | buf = fd_strpcat(buf, fd_stre("xacquire ")); 317 | } 318 | if (UNLIKELY(FD_HAS_LOCK(instr))) 319 | buf = fd_strpcat(buf, fd_stre("lock ")); 320 | if (UNLIKELY(prefix_segment && FD_SEGMENT(instr) != FD_REG_NONE)) { 321 | *buf++ = "ecsdfg\0"[FD_SEGMENT(instr) & 7]; 322 | *buf++ = 's'; 323 | *buf++ = ' '; 324 | } 325 | 326 | for (unsigned i = 0; i < 20; i++) 327 | buf[i] = mnem[i]; 328 | buf += mnemlen; 329 | for (unsigned i = 0; i < 4; i++) 330 | buf[i] = sizesuffix[i]; 331 | buf += sizesuffixlen; 332 | 333 | return buf; 334 | } 335 | 336 | static char* 337 | fd_format_impl(char buf[DECLARE_RESTRICTED_ARRAY_SIZE(128)], const FdInstr* instr, uint64_t addr) { 338 | buf = fd_mnemonic(buf, instr); 339 | 340 | for (int i = 0; i < 4; i++) 341 | { 342 | FdOpType op_type = FD_OP_TYPE(instr, i); 343 | if (op_type == FD_OT_NONE) 344 | break; 345 | if (i > 0) 346 | *buf++ = ','; 347 | *buf++ = ' '; 348 | 349 | int size = FD_OP_SIZELG(instr, i); 350 | 351 | if (op_type == FD_OT_REG) { 352 | unsigned type = FD_OP_REG_TYPE(instr, i); 353 | unsigned idx = FD_OP_REG(instr, i); 354 | buf = fd_strpcatreg(buf, type, idx, size); 355 | } else if (op_type == FD_OT_MEM || op_type == FD_OT_MEMBCST) { 356 | unsigned idx_rt = FD_RT_GPL; 357 | unsigned idx_sz = FD_ADDRSIZELG(instr); 358 | switch (FD_TYPE(instr)) { 359 | case FDI_CMPXCHGD: size = FD_OPSIZELG(instr) + 1; break; 360 | case FDI_BOUND: size += 1; break; 361 | case FDI_JMPF: 362 | case FDI_CALLF: 363 | case FDI_LDS: 364 | case FDI_LES: 365 | case FDI_LFS: 366 | case FDI_LGS: 367 | case FDI_LSS: 368 | size += 6; 369 | break; 370 | case FDI_FLD: 371 | case FDI_FSTP: 372 | case FDI_FBLD: 373 | case FDI_FBSTP: 374 | size = size >= 0 ? size : 9; 375 | break; 376 | case FDI_VPGATHERQD: 377 | case FDI_VGATHERQPS: 378 | case FDI_EVX_PGATHERQD: 379 | case FDI_EVX_GATHERQPS: 380 | idx_rt = FD_RT_VEC; 381 | idx_sz = FD_OP_SIZELG(instr, 0) + 1; 382 | break; 383 | case FDI_EVX_PSCATTERQD: 384 | case FDI_EVX_SCATTERQPS: 385 | idx_rt = FD_RT_VEC; 386 | idx_sz = FD_OP_SIZELG(instr, 1) + 1; 387 | break; 388 | case FDI_VPGATHERDQ: 389 | case FDI_VGATHERDPD: 390 | case FDI_EVX_PGATHERDQ: 391 | case FDI_EVX_GATHERDPD: 392 | idx_rt = FD_RT_VEC; 393 | idx_sz = FD_OP_SIZELG(instr, 0) - 1; 394 | break; 395 | case FDI_EVX_PSCATTERDQ: 396 | case FDI_EVX_SCATTERDPD: 397 | idx_rt = FD_RT_VEC; 398 | idx_sz = FD_OP_SIZELG(instr, 1) - 1; 399 | break; 400 | case FDI_VPGATHERDD: 401 | case FDI_VPGATHERQQ: 402 | case FDI_VGATHERDPS: 403 | case FDI_VGATHERQPD: 404 | case FDI_EVX_PGATHERDD: 405 | case FDI_EVX_PGATHERQQ: 406 | case FDI_EVX_GATHERDPS: 407 | case FDI_EVX_GATHERQPD: 408 | idx_rt = FD_RT_VEC; 409 | idx_sz = FD_OP_SIZELG(instr, 0); 410 | break; 411 | case FDI_EVX_PSCATTERDD: 412 | case FDI_EVX_PSCATTERQQ: 413 | case FDI_EVX_SCATTERDPS: 414 | case FDI_EVX_SCATTERQPD: 415 | idx_rt = FD_RT_VEC; 416 | idx_sz = FD_OP_SIZELG(instr, 1); 417 | break; 418 | default: break; 419 | } 420 | 421 | if (op_type == FD_OT_MEMBCST) 422 | size = FD_OP_BCSTSZLG(instr, i); 423 | 424 | const char* ptrsizes = 425 | "\00 " 426 | "\11byte ptr " 427 | "\11word ptr " 428 | "\12dword ptr " 429 | "\12qword ptr " 430 | "\14xmmword ptr " 431 | "\14ymmword ptr " 432 | "\14zmmword ptr " 433 | "\12dword ptr " // far ptr; word + 2 434 | "\12fword ptr " // far ptr; dword + 2 435 | "\12tbyte ptr "; // far ptr/FPU; qword + 2 436 | const char* ptrsize = ptrsizes + 16 * (size + 1); 437 | buf = fd_strpcat(buf, (struct FdStr) { ptrsize+1, *ptrsize }); 438 | 439 | unsigned seg = FD_SEGMENT(instr); 440 | if (seg != FD_REG_NONE) { 441 | *buf++ = "ecsdfg\0"[seg & 7]; 442 | *buf++ = 's'; 443 | *buf++ = ':'; 444 | } 445 | *buf++ = '['; 446 | 447 | bool has_base = FD_OP_BASE(instr, i) != FD_REG_NONE; 448 | bool has_idx = FD_OP_INDEX(instr, i) != FD_REG_NONE; 449 | if (has_base) 450 | buf = fd_strpcatreg(buf, FD_RT_GPL, FD_OP_BASE(instr, i), FD_ADDRSIZELG(instr)); 451 | if (has_idx) { 452 | if (has_base) 453 | *buf++ = '+'; 454 | *buf++ = '0' + (1 << FD_OP_SCALE(instr, i)); 455 | *buf++ = '*'; 456 | buf = fd_strpcatreg(buf, idx_rt, FD_OP_INDEX(instr, i), idx_sz); 457 | } 458 | uint64_t disp = FD_OP_DISP(instr, i); 459 | if (disp && (has_base || has_idx)) { 460 | *buf++ = (int64_t) disp < 0 ? '-' : '+'; 461 | if ((int64_t) disp < 0) 462 | disp = -disp; 463 | } 464 | if (FD_ADDRSIZELG(instr) == 1) 465 | disp &= 0xffff; 466 | else if (FD_ADDRSIZELG(instr) == 2) 467 | disp &= 0xffffffff; 468 | if (disp || (!has_base && !has_idx)) 469 | buf = fd_strpcatnum(buf, disp); 470 | *buf++ = ']'; 471 | 472 | if (UNLIKELY(op_type == FD_OT_MEMBCST)) { 473 | // {1toX}, X = FD_OP_SIZE(instr, i) / BCSTSZ (=> 2/4/8/16/32) 474 | unsigned bcstszidx = FD_OP_SIZELG(instr, i) - FD_OP_BCSTSZLG(instr, i) - 1; 475 | const char* bcstsizes = "\6{1to2} \6{1to4} \6{1to8} \7{1to16}\7{1to32} "; 476 | const char* bcstsize = bcstsizes + bcstszidx * 8; 477 | buf = fd_strpcat(buf, (struct FdStr) { bcstsize+1, *bcstsize }); 478 | } 479 | } else if (op_type == FD_OT_IMM || op_type == FD_OT_OFF) { 480 | uint64_t immediate = FD_OP_IMM(instr, i); 481 | // Some instructions have actually two immediate operands which are 482 | // decoded as a single operand. Split them here appropriately. 483 | switch (FD_TYPE(instr)) { 484 | default: 485 | goto nosplitimm; 486 | case FDI_SSE_EXTRQ: 487 | case FDI_SSE_INSERTQ: 488 | buf = fd_strpcatnum(buf, immediate & 0xff); 489 | buf = fd_strpcat(buf, fd_stre(", ")); 490 | immediate = (immediate >> 8) & 0xff; 491 | break; 492 | case FDI_ENTER: 493 | buf = fd_strpcatnum(buf, immediate & 0xffff); 494 | buf = fd_strpcat(buf, fd_stre(", ")); 495 | immediate = (immediate >> 16) & 0xff; 496 | break; 497 | case FDI_JMPF: 498 | case FDI_CALLF: 499 | buf = fd_strpcatnum(buf, (immediate >> (8 << size)) & 0xffff); 500 | *buf++ = ':'; 501 | // immediate is masked below. 502 | break; 503 | } 504 | 505 | nosplitimm: 506 | if (op_type == FD_OT_OFF) 507 | immediate += addr + FD_SIZE(instr); 508 | if (size == 0) 509 | immediate &= 0xff; 510 | else if (size == 1) 511 | immediate &= 0xffff; 512 | else if (size == 2) 513 | immediate &= 0xffffffff; 514 | buf = fd_strpcatnum(buf, immediate); 515 | } 516 | 517 | if (i == 0 && FD_MASKREG(instr)) { 518 | *buf++ = '{'; 519 | buf = fd_strpcatreg(buf, FD_RT_MASK, FD_MASKREG(instr), 0); 520 | *buf++ = '}'; 521 | if (FD_MASKZERO(instr)) 522 | buf = fd_strpcat(buf, fd_stre("{z}")); 523 | } 524 | } 525 | if (UNLIKELY(FD_ROUNDCONTROL(instr) != FD_RC_MXCSR)) { 526 | switch (FD_ROUNDCONTROL(instr)) { 527 | case FD_RC_RN: buf = fd_strpcat(buf, fd_stre(", {rn-sae}")); break; 528 | case FD_RC_RD: buf = fd_strpcat(buf, fd_stre(", {rd-sae}")); break; 529 | case FD_RC_RU: buf = fd_strpcat(buf, fd_stre(", {ru-sae}")); break; 530 | case FD_RC_RZ: buf = fd_strpcat(buf, fd_stre(", {rz-sae}")); break; 531 | case FD_RC_SAE: buf = fd_strpcat(buf, fd_stre(", {sae}")); break; 532 | default: break; // should not happen 533 | } 534 | } 535 | *buf++ = '\0'; 536 | return buf; 537 | } 538 | 539 | void 540 | fd_format(const FdInstr* instr, char* buffer, size_t len) 541 | { 542 | fd_format_abs(instr, 0, buffer, len); 543 | } 544 | 545 | void 546 | fd_format_abs(const FdInstr* instr, uint64_t addr, char* restrict buffer, size_t len) { 547 | char tmp[128]; 548 | char* buf = buffer; 549 | if (UNLIKELY(len < 128)) { 550 | if (!len) 551 | return; 552 | buf = tmp; 553 | } 554 | 555 | char* end = fd_format_impl(buf, instr, addr); 556 | 557 | if (buf != buffer) { 558 | unsigned i; 559 | for (i = 0; i < (end - tmp) && i < len-1; i++) 560 | buffer[i] = tmp[i]; 561 | buffer[i] = '\0'; 562 | } 563 | } 564 | -------------------------------------------------------------------------------- /meson.build: -------------------------------------------------------------------------------- 1 | project('fadec', ['c'], default_options: ['warning_level=3', 'c_std=c11'], 2 | meson_version: '>=0.49') 3 | 4 | python3 = find_program('python3') 5 | 6 | # Check Python version 7 | py_version_res = run_command(python3, ['--version'], check: true) 8 | py_version = py_version_res.stdout().split(' ')[1] 9 | if not py_version.version_compare('>=3.6') 10 | error('Python 3.6 required, got @0@'.format(py_version)) 11 | endif 12 | 13 | has_cpp = add_languages('cpp', required: false) 14 | 15 | cc = meson.get_compiler('c') 16 | if cc.has_argument('-fstrict-aliasing') 17 | add_project_arguments('-fstrict-aliasing', language: 'c') 18 | endif 19 | if get_option('warning_level').to_int() >= 3 20 | extra_warnings = [ 21 | '-Wmissing-prototypes', '-Wshadow', '-Wwrite-strings', '-Wswitch-default', 22 | '-Winline', '-Wstrict-prototypes', '-Wundef', 23 | # We have strings longer than 4095 characters 24 | '-Wno-overlength-strings', 25 | # GCC 8 requires an extra option for strict cast alignment checks, Clang 26 | # always warns, even on architectures without alignment requirements. 27 | '-Wcast-align', '-Wcast-align=strict', 28 | ] 29 | add_project_arguments(cc.get_supported_arguments(extra_warnings), language: 'c') 30 | endif 31 | if cc.get_argument_syntax() == 'msvc' 32 | # Disable some warnings to align warnings with GCC and Clang: 33 | add_project_arguments('-D_CRT_SECURE_NO_WARNINGS', 34 | '/wd4018', # - Signed/unsigned comparison 35 | '/wd4146', # - Unary minus operator applied to unsigned 36 | # type, result still unsigned 37 | '/wd4244', # - Possible loss of data in conversion 38 | # from integer type to smaller integer type 39 | '/wd4245', # - Signed/unsigned assignment 40 | '/wd4267', # - Possible loss of data in conversion 41 | # from size_t to smaller type 42 | '/wd4310', # - Possible loss of data in conversion 43 | # of constant value to smaller type 44 | language: 'c') 45 | endif 46 | if cc.get_id() == 'msvc' and has_cpp 47 | cxx = meson.get_compiler('cpp') 48 | if cxx.get_id() == 'msvc' 49 | # Enable standard conformant preprocessor 50 | add_project_arguments(cxx.get_supported_arguments(['-Zc:preprocessor']), language: 'cpp') 51 | endif 52 | endif 53 | 54 | sources = [] 55 | headers = [] 56 | components = [] 57 | 58 | if get_option('with_decode') 59 | components += 'decode' 60 | headers += files('fadec.h') 61 | sources += files('decode.c', 'format.c') 62 | endif 63 | if get_option('with_encode') 64 | components += 'encode' 65 | headers += files('fadec-enc.h') 66 | sources += files('encode.c') 67 | endif 68 | if get_option('with_encode2') 69 | components += 'encode2' 70 | headers += files('fadec-enc2.h') 71 | sources += files('encode2.c') 72 | endif 73 | 74 | generate_args = [] 75 | if get_option('archmode') != 'only64' 76 | generate_args += ['--32'] 77 | endif 78 | if get_option('archmode') != 'only32' 79 | generate_args += ['--64'] 80 | endif 81 | if get_option('with_undoc') 82 | generate_args += ['--with-undoc'] 83 | endif 84 | if not meson.is_subproject() 85 | generate_args += ['--stats'] 86 | endif 87 | 88 | tables = [] 89 | foreach component : components 90 | tables += custom_target('@0@_table'.format(component), 91 | command: [python3, '@INPUT0@', component, 92 | '@INPUT1@', '@OUTPUT@'] + generate_args, 93 | input: files('parseinstrs.py', 'instrs.txt'), 94 | output: ['fadec-@0@-public.inc'.format(component), 95 | 'fadec-@0@-private.inc'.format(component)], 96 | install: true, 97 | install_dir: [get_option('includedir'), false]) 98 | endforeach 99 | 100 | libfadec = static_library('fadec', sources, tables, install: true) 101 | fadec = declare_dependency(link_with: libfadec, 102 | include_directories: include_directories('.'), 103 | sources: tables) 104 | install_headers(headers) 105 | 106 | foreach component : components 107 | test(component, executable('@0@-test'.format(component), 108 | '@0@-test.c'.format(component), 109 | dependencies: fadec)) 110 | if component == 'encode2' and has_cpp 111 | test(component + '-cpp', executable('@0@-test-cpp'.format(component), 112 | '@0@-test.cc'.format(component), 113 | dependencies: fadec)) 114 | endif 115 | endforeach 116 | 117 | if meson.version().version_compare('>=0.54.0') 118 | meson.override_dependency('fadec', fadec) 119 | endif 120 | 121 | pkg = import('pkgconfig') 122 | pkg.generate(libraries: libfadec, 123 | version: '0.1', 124 | name: 'fadec', 125 | filebase: 'fadec', 126 | description: 'Fast Decoder for x86-32 and x86-64') 127 | -------------------------------------------------------------------------------- /meson_options.txt: -------------------------------------------------------------------------------- 1 | option('archmode', type: 'combo', choices: ['both', 'only32', 'only64']) 2 | option('with_undoc', type: 'boolean', value: false) 3 | option('with_decode', type: 'boolean', value: true) 4 | option('with_encode', type: 'boolean', value: true) 5 | # encode2 is off-by-default to reduce size and compile-time 6 | option('with_encode2', type: 'boolean', value: false) 7 | -------------------------------------------------------------------------------- /parseinstrs.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | 3 | import argparse 4 | import bisect 5 | from collections import OrderedDict, defaultdict, namedtuple, Counter 6 | from enum import Enum 7 | from itertools import product 8 | import re 9 | import struct 10 | from typing import NamedTuple, FrozenSet, List, Tuple, Union, Optional, ByteString 11 | 12 | INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[ 13 | ("modrm_idx", 2), 14 | ("modreg_idx", 2), 15 | ("vexreg_idx", 2), # note: vexreg w/o vex prefix is zeroreg_val 16 | ("imm_idx", 2), 17 | ("evex_bcst", 1), 18 | ("evex_mask", 1), 19 | ("zeroreg_val", 1), 20 | ("lock", 1), 21 | ("imm_control", 3), 22 | ("vsib", 1), 23 | ("modrm_size", 2), 24 | ("modreg_size", 2), 25 | ("vexreg_size", 2), 26 | ("imm_size", 2), 27 | ("legacy", 1), 28 | ("unused2", 1), 29 | ("size_fix1", 3), 30 | ("size_fix2", 2), 31 | ("instr_width", 1), 32 | ("modrm_ty", 3), 33 | ("modreg_ty", 3), 34 | ("vexreg_ty", 2), 35 | ("imm_ty", 0), 36 | ("evex_rc", 2), 37 | ("evex_bcst16", 1), 38 | ("opsize", 3), 39 | ("modrm", 1), 40 | ("ign66", 1), 41 | ][::-1]) 42 | class InstrFlags(namedtuple("InstrFlags", INSTR_FLAGS_FIELDS)): 43 | def __new__(cls, **kwargs): 44 | init = {**{f: 0 for f in cls._fields}, **kwargs} 45 | return super(InstrFlags, cls).__new__(cls, **init) 46 | def _encode(self): 47 | enc = 0 48 | for value, size in zip(self, INSTR_FLAGS_SIZES): 49 | enc = enc << size | (value & ((1 << size) - 1)) 50 | return enc 51 | 52 | ENCODINGS = { 53 | "NP": InstrFlags(), 54 | "M": InstrFlags(modrm=1, modrm_idx=0^3), 55 | "R": InstrFlags(modrm=1, modreg_idx=0^3), # AMX TILEZERO 56 | "M1": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=1), 57 | "MI": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=4), 58 | "IM": InstrFlags(modrm=1, modrm_idx=1^3, imm_idx=0^3, imm_control=4), 59 | "MC": InstrFlags(modrm=1, modrm_idx=0^3, vexreg_idx=1^3, zeroreg_val=1), 60 | "MR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3), 61 | "RM": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3), 62 | "RMA": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3), 63 | "MRI": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, imm_idx=2^3, imm_control=4), 64 | "RMI": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, imm_idx=2^3, imm_control=4), 65 | "MRC": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, vexreg_idx=2^3, zeroreg_val=1), 66 | "AM": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3), 67 | "MA": InstrFlags(modrm=1, modrm_idx=0^3, vexreg_idx=1^3), 68 | "I": InstrFlags(imm_idx=0^3, imm_control=4), 69 | "IA": InstrFlags(vexreg_idx=0^3, imm_idx=1^3, imm_control=4), 70 | "O": InstrFlags(modrm_idx=0^3), 71 | "OI": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=4), 72 | "OA": InstrFlags(modrm_idx=0^3, vexreg_idx=1^3), 73 | "S": InstrFlags(modreg_idx=0^3), # segment register in bits 3,4,5 74 | "A": InstrFlags(vexreg_idx=0^3), 75 | "D": InstrFlags(imm_idx=0^3, imm_control=6), 76 | "FD": InstrFlags(vexreg_idx=0^3, imm_idx=1^3, imm_control=2), 77 | "TD": InstrFlags(vexreg_idx=1^3, imm_idx=0^3, imm_control=2), 78 | 79 | "RVM": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3), 80 | "RVMI": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=4), 81 | "RVMR": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=3), 82 | "RMV": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3), 83 | "VM": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3), 84 | "VMI": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3, imm_idx=2^3, imm_control=4), 85 | "MVR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3), 86 | "MRV": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, vexreg_idx=2^3), 87 | } 88 | ENCODING_OPTYS = ["modrm", "modreg", "vexreg", "imm"] 89 | ENCODING_OPORDER = { enc: sorted(ENCODING_OPTYS, key=lambda ty: getattr(ENCODINGS[enc], ty+"_idx")^3) for enc in ENCODINGS} 90 | 91 | OPKIND_CANONICALIZE = { 92 | "I": "IMM", # immediate 93 | "A": "IMM", # Direct address, far jmp 94 | "J": "IMM", # RIP-relative address 95 | "M": "MEM", # ModRM.r/m selects memory only 96 | "O": "MEM", # Direct address, FD/TD encoding 97 | "R": "GP", # ModRM.r/m selects GP 98 | "B": "GP", # VEX.vvvv selects GP 99 | "E": "GP", # ModRM.r/m selects GP or memory 100 | "G": "GP", # ModRM.reg selects GP 101 | "P": "MMX", # ModRM.reg selects MMX 102 | "N": "MMX", # ModRM.r/m selects MMX 103 | "Q": "MMX", # ModRM.r/m selects MMX or memory 104 | "V": "XMM", # ModRM.reg selects XMM 105 | "H": "XMM", # VEX.vvvv selects XMM 106 | "L": "XMM", # bits7:4 of imm8 select XMM 107 | "U": "XMM", # ModRM.r/m selects XMM 108 | "W": "XMM", # ModRM.r/m selects XMM or memory 109 | "S": "SEG", # ModRM.reg selects SEG 110 | "C": "CR", # ModRM.reg selects CR 111 | "D": "DR", # ModRM.reg selects DR 112 | 113 | # Custom names 114 | "F": "FPU", # F is used for RFLAGS by Intel 115 | "K": "MASK", 116 | "T": "TMM", 117 | "Z": "BND", 118 | } 119 | OPKIND_SIZES = { 120 | "b": 1, 121 | "w": 2, 122 | "d": 4, 123 | "ss": 4, # Scalar single of XMM (d) 124 | "q": 8, 125 | "sd": 8, # Scalar double of XMM (q) 126 | "t": 10, # FPU/ten-byte 127 | "dq": 16, 128 | "qq": 32, 129 | "oq": 64, # oct-quadword 130 | "": 0, # for MEMZ 131 | "v": -1, # operand size (w/d/q) 132 | "y": -1, # operand size (d/q) 133 | "z": -1, # w/d (immediates, min(operand size, 4)) 134 | "a": -1, # z:z 135 | "p": -1, # w:z 136 | "x": -2, # vector size 137 | "h": -3, # half x 138 | "f": -4, # fourth x 139 | "e": -5, # eighth x 140 | "pd": -2, # packed double (x) 141 | "ps": -2, # packed single (x) 142 | 143 | # Custom names 144 | "bs": -1, # sign-extended immediate 145 | "zd": 4, # z-immediate, but always 4-byte operand 146 | "zq": 8, # z-immediate, but always 8-byte operand 147 | } 148 | class OpKind(NamedTuple): 149 | regkind: str 150 | sizestr: str 151 | 152 | SZ_OP = -1 153 | SZ_VEC = -2 154 | SZ_VEC_HALF = -3 155 | SZ_VEC_QUARTER = -4 156 | SZ_VEC_EIGHTH = -5 157 | 158 | def abssize(self, opsz=None, vecsz=None): 159 | res = opsz if self.size == self.SZ_OP else \ 160 | vecsz if self.size == self.SZ_VEC else \ 161 | vecsz >> 1 if self.size == self.SZ_VEC_HALF else \ 162 | vecsz >> 2 if self.size == self.SZ_VEC_QUARTER else \ 163 | vecsz >> 3 if self.size == self.SZ_VEC_EIGHTH else self.size 164 | if res is None: 165 | raise Exception("unspecified operand size") 166 | return res 167 | def immsize(self, opsz): 168 | maxsz = 1 if self.sizestr == "bs" else 4 if self.sizestr[0] == "z" else 8 169 | return min(maxsz, self.abssize(opsz)) 170 | @property 171 | def kind(self): 172 | return OPKIND_CANONICALIZE[self.regkind] 173 | @property 174 | def size(self): 175 | return OPKIND_SIZES[self.sizestr] 176 | @classmethod 177 | def parse(cls, op): 178 | return cls(op[0], op[1:]) 179 | 180 | def __eq__(self, other): 181 | # Custom equality for canonicalization of kind/size. 182 | return isinstance(other, OpKind) and self.kind == other.kind and self.size == other.size 183 | 184 | class InstrDesc(NamedTuple): 185 | mnemonic: str 186 | encoding: str 187 | operands: Tuple[str, ...] 188 | flags: FrozenSet[str] 189 | 190 | OPKIND_REGTYS = { 191 | ("modrm", "GP"): 1, ("modreg", "GP"): 1, ("vexreg", "GP"): 1, 192 | ("modrm", "XMM"): 0, ("modreg", "XMM"): 0, ("vexreg", "XMM"): 0, 193 | ("modrm", "MMX"): 5, ("modreg", "MMX"): 5, 194 | ("modrm", "FPU"): 4, ("vexreg", "FPU"): 3, 195 | ("modrm", "TMM"): 6, ("modreg", "TMM"): 6, ("vexreg", "TMM"): 3, 196 | ("modrm", "MASK"): 7, ("modreg", "MASK"): 7, ("vexreg", "MASK"): 2, 197 | ("modreg", "SEG"): 3, 198 | ("modreg", "DR"): 0, # handled in code 199 | ("modreg", "CR"): 0, # handled in code 200 | ("modrm", "MEM"): 0, 201 | ("imm", "MEM"): 0, ("imm", "IMM"): 0, ("imm", "XMM"): 0, 202 | } 203 | OPKIND_SIZES = { 204 | 0: 0, 1: 1, 2: 2, 4: 3, 8: 4, 16: 5, 32: 6, 64: 7, 10: 0, 205 | # OpKind.SZ_OP: -2, OpKind.SZ_VEC: -3, OpKind.SZ_HALFVEC: -4, 206 | } 207 | 208 | @classmethod 209 | def parse(cls, desc): 210 | desc = desc.split() 211 | mnem, _, compactDesc = desc[5].partition("+") 212 | flags = frozenset(desc[6:] + [{ 213 | "w": "INSTR_WIDTH", 214 | "a": "U67", 215 | "s": "USEG", 216 | "k": "MASK", 217 | "b": "BCST", 218 | "e": "SAE", 219 | "r": "ER", 220 | }[c] for c in compactDesc]) 221 | operands = tuple(OpKind.parse(op) for op in desc[1:5] if op != "-") 222 | return cls(mnem, desc[0], operands, flags) 223 | 224 | def imm_size(self, opsz): 225 | flags = ENCODINGS[self.encoding] 226 | if flags.imm_control < 3: 227 | return 0 228 | if flags.imm_control == 3: 229 | return 1 230 | if self.mnemonic == "ENTER": 231 | return 3 232 | return self.operands[flags.imm_idx^3].immsize(opsz) 233 | 234 | def dynsizes(self): 235 | dynopsz = set(op.size for op in self.operands if op.size < 0) 236 | if {"INSTR_WIDTH", "SZ8"} & self.flags: dynopsz.add(OpKind.SZ_OP) 237 | if OpKind.SZ_OP in dynopsz and len(dynopsz) > 1: 238 | raise Exception(f"conflicting dynamic operand sizes in {self}") 239 | return dynopsz 240 | 241 | def encode(self, mnem, ign66, modrm): 242 | flags = ENCODINGS[self.encoding] 243 | extraflags = {} 244 | 245 | dynopsz = self.dynsizes() 246 | # Operand size either refers to vectors or GP, but not both 247 | if dynopsz and OpKind.SZ_OP not in dynopsz: # Vector operand size 248 | if self.flags & {"SZ8", "D64", "F64", "INSTR_WIDTH", "LOCK", "U66"}: 249 | raise Exception(f"incompatible flags in {self}") 250 | # Allow at most the vector size together with one alternative 251 | dynsizes = [OpKind.SZ_VEC] + list(dynopsz - {OpKind.SZ_VEC}) 252 | extraflags["opsize"] = 4 | (OpKind.SZ_VEC - dynsizes[-1]) 253 | if len(dynsizes) > 2: 254 | raise Exception(f"conflicting vector operand sizes in {self}") 255 | else: # either empty or GP operand size 256 | dynsizes = [OpKind.SZ_OP] 257 | if "SZ8" in self.flags: 258 | dynsizes = [] 259 | if "D64" in self.flags: extraflags["opsize"] = 2 260 | if "F64" in self.flags: extraflags["opsize"] = 3 261 | extraflags["lock"] = "LOCK" in self.flags 262 | 263 | if (self.flags & {"SZ8", "INSTR_WIDTH"} or 264 | mnem in ("MOVSX", "MOVZX", "XCHG_NOP", "3DNOW")): 265 | extraflags["legacy"] = 1 266 | # INSTR_WIDTH defaults to zero, so only enable when SZ8 is unset 267 | if "INSTR_WIDTH" in self.flags and "SZ8" not in self.flags: 268 | extraflags["instr_width"] = 1 269 | 270 | imm_byte = self.imm_size(4) == 1 271 | extraflags["imm_control"] = flags.imm_control | imm_byte 272 | 273 | # Sort fixed sizes encodable in size_fix2 as second element. 274 | # But: byte-sized immediates are handled specially and don't cost space. 275 | fixed = set(self.OPKIND_SIZES[op.size] for op in self.operands if 276 | op.size >= 0 and not (imm_byte and op.kind == "IMM")) 277 | fixed = sorted(fixed, key=lambda x: 1 <= x <= 4) 278 | if len(fixed) > 2 or (len(fixed) == 2 and not (1 <= fixed[1] <= 4)): 279 | raise Exception(f"invalid fixed sizes {fixed} in {self}") 280 | sizes = (fixed + [1, 1])[:2] + dynsizes # See operand_sizes in decode.c. 281 | extraflags["size_fix1"] = sizes[0] 282 | extraflags["size_fix2"] = sizes[1] - 1 283 | 284 | for i, opkind in enumerate(self.operands): 285 | sz = self.OPKIND_SIZES[opkind.size] if opkind.size >= 0 else opkind.size 286 | if opkind.kind == "IMM": 287 | if imm_byte and sz not in [1] + dynsizes[:1]: 288 | raise Exception(f"imm_byte with opsize {sz} in {self}") 289 | extraflags[f"imm_size"] = sz == 1 if imm_byte else sizes.index(sz) 290 | else: 291 | opname = ENCODING_OPORDER[self.encoding][i] 292 | extraflags[f"{opname}_size"] = sizes.index(sz) 293 | extraflags[f"{opname}_ty"] = self.OPKIND_REGTYS[opname, opkind.kind] 294 | 295 | # Miscellaneous Flags 296 | if "VSIB" in self.flags: extraflags["vsib"] = 1 297 | if "BCST" in self.flags: extraflags["evex_bcst"] = 1 298 | if "BCST16" in self.flags: extraflags["evex_bcst16"] = 1 299 | if "MASK" in self.flags: extraflags["evex_mask"] = 1 300 | if "SAE" in self.flags: extraflags["evex_rc"] = 1 301 | if "ER" in self.flags: extraflags["evex_rc"] = 3 302 | if modrm: extraflags["modrm"] = 1 303 | 304 | if "U66" not in self.flags and (ign66 or "I66" in self.flags): 305 | extraflags["ign66"] = 1 306 | 307 | enc = flags._replace(**extraflags)._encode() 308 | enc = tuple((enc >> i) & 0xffff for i in range(0, 48, 16)) 309 | # First 2 bytes are the mnemonic, last 6 bytes are the encoding. 310 | return f"{{FDI_{mnem}, {enc[0]}, {enc[1]}, {enc[2]}}}" 311 | 312 | class EntryKind(Enum): 313 | NONE = 0x00 314 | PREFIX = 0x10 315 | INSTR = 0x20 316 | WEAKINSTR = 0x30 317 | TABLE16 = 0x01 318 | TABLE8E = 0x11 319 | ESCAPE = 0x02 320 | TABLE256 = 0x12 321 | TABLE_VEX = 0x22 322 | TABLE_PREFIX = 0x03 323 | TABLE_ROOT = -1 324 | @property 325 | def is_table(self): 326 | return self != EntryKind.INSTR and self != EntryKind.WEAKINSTR and self != EntryKind.PREFIX 327 | 328 | opcode_regex = re.compile( 329 | r"^(?:(?P(?PE?VEX\.)?(?PNP|66|F2|F3|NFx)\." + 330 | r"(?:W(?P[01])\.)?(?:L(?P0|1|12|2|IG)\.)?))?" + 331 | r"(?P0f38|0f3a|0f|M[567]\.|)" + 332 | r"(?P[0-9a-f]{2})" + 333 | r"(?:/(?P[0-7]|[rm][0-7]?|[0-7][rm])|(?P[c-f][0-9a-f]))?(?P\+)?$") 334 | 335 | class Opcode(NamedTuple): 336 | prefix: Union[None, str] # None/NP/66/F2/F3/NFx 337 | escape: int # [0, 0f, 0f38, 0f3a] 338 | opc: int 339 | extended: bool # Extend opc or opcext in ModRM.rm, if present 340 | # Fixed ModRM.mod ("r"/"m"), ModRM.reg, ModRM.rm (opcext + AMX) 341 | modrm: Tuple[Union[None, str], Union[None, int], Union[None, int]] 342 | vex: int # 0 = legacy, 1 = VEX, 2 = EVEX 343 | vexl: Union[str, None] # 0, 1, 12, 2, IG, None = used, both 344 | rexw: Union[str, None] # 0, 1, None = both (or ignored) 345 | 346 | @classmethod 347 | def parse(cls, opcode_string): 348 | match = opcode_regex.match(opcode_string) 349 | if match is None: 350 | raise Exception(opcode_string) 351 | return None 352 | 353 | opcext = int(match.group("opcext") or "0", 16) 354 | modreg = match.group("modreg") 355 | if opcext: 356 | modrm = "r", (opcext >> 3) & 7, opcext & 7 357 | elif modreg: 358 | if modreg[0] in "rm": 359 | modrm = modreg[0], None, int(modreg[1:]) if modreg[1:] else None 360 | else: 361 | modrm = modreg[1:] or None, int(modreg[0]), None 362 | else: 363 | modrm = None, None, None 364 | 365 | return cls( 366 | prefix=match.group("legacy"), 367 | escape=["", "0f", "0f38", "0f3a", "M4.", "M5.", "M6.", "M7."].index(match.group("escape")), 368 | opc=int(match.group("opcode"), 16), 369 | extended=match.group("extended") is not None, 370 | modrm=modrm, 371 | vex=[None, "VEX.", "EVEX."].index(match.group("vex")), 372 | vexl=match.group("vexl"), 373 | rexw=match.group("rexw"), 374 | ) 375 | 376 | def verifyOpcodeDesc(opcode, desc): 377 | flags = ENCODINGS[desc.encoding] 378 | oporder = ENCODING_OPORDER[desc.encoding] 379 | expected_immkinds = ["", "I", "O", "L", "IA", "", "J"][flags.imm_control] 380 | fixed_mod = opcode.modrm[0] 381 | if opcode.extended or desc.mnemonic in ("MOV_CR2G", "MOV_DR2G", "MOV_G2CR", "MOV_G2DR"): 382 | fixed_mod = "r" 383 | expected_modrmkinds = {None: "EQWFKT", "r": "RNUFKT", "m": "M"}[fixed_mod] 384 | # allow F and R for zeroreg, which we overlap with vexreg 385 | expected_vexkinds = "BHKT" if opcode.vex else "BHRF" 386 | for i, opkind in enumerate(desc.operands): 387 | if oporder[i] == "modrm" and opkind.regkind not in expected_modrmkinds: 388 | raise Exception(f"modrm operand-regkind mismatch {opcode}, {desc}") 389 | if oporder[i] == "modreg" and opkind.regkind not in "GPVSCDFKT": 390 | raise Exception(f"modreg operand-regkind mismatch {opcode}, {desc}") 391 | if oporder[i] == "vexreg" and opkind.regkind not in expected_vexkinds: 392 | raise Exception(f"vexreg operand-regkind mismatch {opcode}, {desc}") 393 | if oporder[i] == "imm" and opkind.regkind not in expected_immkinds: 394 | raise Exception(f"imm operand-regkind mismatch {opcode}, {desc}") 395 | if "INSTR_WIDTH" in desc.flags and len(desc.operands) > 3: 396 | raise Exception(f"+w with four operands {opcode}, {desc}") 397 | if opcode.escape == 2 and flags.imm_control != 0: 398 | raise Exception(f"0f38 has no immediate operand {opcode}, {desc}") 399 | if opcode.escape == 3 and desc.imm_size(4) != 1: 400 | raise Exception(f"0f3a must have immediate byte {opcode}, {desc}") 401 | if opcode.escape == 0 and opcode.prefix is not None: 402 | raise Exception(f"unescaped opcode has prefix {opcode}, {desc}") 403 | if opcode.escape == 0 and opcode.vexl is not None: 404 | raise Exception(f"unescaped opcode has L specifier {opcode}, {desc}") 405 | if opcode.escape == 0 and opcode.rexw is not None: 406 | raise Exception(f"unescaped opcode has W specifier {opcode}, {desc}") 407 | if opcode.escape == 0 and opcode.vex: 408 | raise Exception(f"VEX opcode without escape {opcode}, {desc}") 409 | if opcode.vex and opcode.extended: 410 | raise Exception(f"VEX/EVEX must not be extended {opcode}, {desc}") 411 | if opcode.vex and opcode.prefix not in ("NP", "66", "F2", "F3"): 412 | raise Exception(f"VEX/EVEX must have mandatory prefix {opcode}, {desc}") 413 | if opcode.vexl == "IG" and desc.dynsizes() - {OpKind.SZ_OP}: 414 | raise Exception(f"(E)VEX.LIG with dynamic vector size {opcode}, {desc}") 415 | if "VSIB" in desc.flags and opcode.modrm[0] != "m": 416 | raise Exception(f"VSIB for non-memory opcode {opcode}, {desc}") 417 | if opcode.vex == 2 and flags.vexreg_idx: 418 | # Checking this here allows to omit check for V' in decoder. 419 | if desc.operands[flags.vexreg_idx ^ 3].kind != "XMM": 420 | raise Exception(f"EVEX.vvvv must refer to XMM {opcode}, {desc}") 421 | if opcode.vex == 2 and flags.modreg_idx and flags.modreg_idx ^ 3 != 0: 422 | # EVEX.z=0 is only checked for mask operands in ModReg 423 | if desc.operands[flags.modreg_idx ^ 3].kind == "MASK": 424 | raise Exception(f"ModRM.reg mask not first operand {opcode}, {desc}") 425 | # Verify tuple type 426 | if opcode.vex == 2 and opcode.modrm[0] != "r": 427 | tts = [s for s in desc.flags if s.startswith("TUPLE")] 428 | if len(tts) != 1: 429 | raise Exception(f"missing tuple type in {opcode}, {desc}") 430 | if flags.modrm_idx == 3 ^ 3: 431 | raise Exception(f"missing memory operand {opcode}, {desc}") 432 | # From Intel SDM 433 | bcst, evexw, vszs = { 434 | "TUPLE_FULL_16": (2, "0", ( 16, 32, 64)), 435 | "TUPLE_FULL_32": (4, "0", ( 16, 32, 64)), 436 | "TUPLE_FULL_64": (8, "1", ( 16, 32, 64)), 437 | "TUPLE_HALF_16": (2, "0", ( 8, 16, 32)), 438 | "TUPLE_HALF_32": (4, "0", ( 8, 16, 32)), 439 | "TUPLE_HALF_64": (8, "1", ( 8, 16, 32)), 440 | "TUPLE_QUARTER_16": (2, "0", ( 4, 8, 16)), 441 | "TUPLE_FULL_MEM": (None, None, ( 16, 32, 64)), 442 | "TUPLE_HALF_MEM": (None, None, ( 8, 16, 32)), 443 | "TUPLE_QUARTER_MEM": (None, None, ( 4, 8, 16)), 444 | "TUPLE_EIGHTH_MEM": (None, None, ( 2, 4, 8)), 445 | "TUPLE1_SCALAR_8": (None, None, ( 1, 1, 1)), 446 | "TUPLE1_SCALAR_16": (None, None, ( 2, 2, 2)), 447 | "TUPLE1_SCALAR_32": (None, "0", ( 4, 4, 4)), 448 | "TUPLE1_SCALAR_64": (None, "1", ( 8, 8, 8)), 449 | "TUPLE1_SCALAR_OPSZ": (None, None, ( 0, 0, 0)), 450 | "TUPLE1_FIXED_32": (None, None, ( 4, 4, 4)), 451 | "TUPLE1_FIXED_64": (None, None, ( 8, 8, 8)), 452 | "TUPLE2_32": (None, "0", ( 8, 8, 8)), 453 | "TUPLE2_64": (None, "1", (None, 16, 16)), 454 | "TUPLE4_32": (None, "0", (None, 16, 16)), 455 | "TUPLE4_64": (None, "1", (None, None, 32)), 456 | "TUPLE8_32": (None, "0", (None, None, 32)), 457 | "TUPLE_MEM128": (None, None, ( 16, 16, 16)), 458 | # TODO: Fix MOVDDUP tuple size :( 459 | "TUPLE_MOVDDUP": (None, None, ( 16, 32, 64)), 460 | }[tts[0]] 461 | if "BCST" in desc.flags: 462 | if bcst is None: 463 | raise Exception(f"broadcast on incompatible type {opcode}, {desc}") 464 | if ("BCST16" in desc.flags) != (bcst == 2): 465 | raise Exception(f"bcst16 mismatch, should be {bcst} {opcode}, {desc}") 466 | # EVEX.W is used to distinguish 4/8-byte broadcast size 467 | if evexw and opcode.rexw != evexw: 468 | raise Exception(f"incompatible EVEX.W {opcode}, {desc}") 469 | for l, tupsz in enumerate(vszs): 470 | opsz = desc.operands[flags.modrm_idx ^ 3].abssize(0, 16 << l) 471 | if tupsz is not None and opsz != tupsz: 472 | raise Exception(f"memory size {opsz} != {tupsz} {opcode}, {desc}") 473 | 474 | class Trie: 475 | KIND_ORDER = (EntryKind.TABLE_ROOT, EntryKind.ESCAPE, EntryKind.TABLE256, 476 | EntryKind.TABLE_PREFIX, EntryKind.TABLE16, 477 | EntryKind.TABLE8E, EntryKind.TABLE_VEX) 478 | TABLE_LENGTH = { 479 | EntryKind.TABLE_ROOT: 256, 480 | EntryKind.ESCAPE: 8, 481 | EntryKind.TABLE256: 256, 482 | EntryKind.TABLE_PREFIX: 4, 483 | EntryKind.TABLE16: 16, 484 | EntryKind.TABLE8E: 8, 485 | EntryKind.TABLE_VEX: 8, 486 | } 487 | 488 | def __init__(self, root_count): 489 | self.trie = [] 490 | self.trie.append([None] * root_count) 491 | self.kindmap = defaultdict(list) 492 | 493 | def _add_table(self, kind): 494 | self.trie.append([None] * self.TABLE_LENGTH[kind]) 495 | self.kindmap[kind].append(len(self.trie) - 1) 496 | return len(self.trie) - 1 497 | 498 | def _clone(self, elem): 499 | if not elem or not elem[0].is_table: 500 | return elem 501 | new_num = self._add_table(elem[0]) 502 | self.trie[new_num] = [self._clone(e) for e in self.trie[elem[1]]] 503 | return elem[0], new_num 504 | 505 | def _transform_opcode(self, opc): 506 | realopcext = opc.extended and opc.modrm[2] is None 507 | topc = [opc.opc + i for i in range(8 if realopcext else 1)] 508 | if opc.escape == 0 and opc.opc in (0xc4, 0xc5, 0x62): 509 | assert opc.prefix is None 510 | assert opc.modrm == ("m", None, None) 511 | assert opc.rexw is None 512 | assert opc.vexl is None 513 | # We do NOT encode /m, this is handled by prefix code. 514 | # Order must match KIND_ORDER. 515 | return topc, [0], None, None, None, None, None 516 | elif opc.escape == 0: 517 | troot, tescape, topc = topc, None, None 518 | else: 519 | troot = [[0x0f], [0xc4, 0xc5], [0x62]][opc.vex] 520 | tescape = [opc.escape] 521 | 522 | tprefix, t16, t8e, tvex = None, None, None, None 523 | if opc.prefix == "NFx": 524 | tprefix = [0, 1] 525 | elif opc.prefix: 526 | tprefix = [["NP", "66", "F3", "F2"].index(opc.prefix)] 527 | if opc.modrm != (None, None, None): 528 | # TODO: optimize for /r and /m specifiers to reduce size 529 | mod = {"m": [0], "r": [1], None: [0, 1]}[opc.modrm[0]] 530 | reg = [opc.modrm[1]] if opc.modrm[1] is not None else list(range(8)) 531 | t16 = [x + (y << 1) for x in mod for y in reg] 532 | if opc.modrm[2] is not None and not opc.extended: 533 | t8e = [opc.modrm[2]] 534 | if opc.rexw is not None or (opc.vexl or "IG") != "IG": 535 | rexw = {"0": [0], "1": [1<<0], None: [0, 1<<0]}[opc.rexw] 536 | if opc.vex < 2: 537 | vexl = {"0": [0], "1": [1<<1], "IG": [0, 1<<1]}[opc.vexl or "IG"] 538 | else: 539 | vexl = {"0": [0], "12": [1<<1, 2<<1], "2": [2<<1], "IG": [0, 1<<1, 2<<1, 3<<1]}[opc.vexl or "IG"] 540 | tvex = list(map(sum, product(rexw, vexl))) 541 | # Order must match KIND_ORDER. 542 | return troot, tescape, topc, tprefix, t16, t8e, tvex 543 | 544 | def add_opcode(self, opcode, descidx, root_idx, weak=False): 545 | opcode = self._transform_opcode(opcode) 546 | frontier = [(0, root_idx)] 547 | for elem_kind, elem in zip(self.KIND_ORDER, opcode): 548 | new_frontier = [] 549 | for entry_num, entry_idx in frontier: 550 | entry = self.trie[entry_num] 551 | if elem is None: 552 | if entry[entry_idx] is None or entry[entry_idx][0] != elem_kind: 553 | new_frontier.append((entry_num, entry_idx)) 554 | continue 555 | elem = list(range(self.TABLE_LENGTH[elem_kind])) 556 | if entry[entry_idx] is None: 557 | new_num = self._add_table(elem_kind) 558 | entry[entry_idx] = elem_kind, new_num 559 | elif entry[entry_idx][0] != elem_kind: 560 | # Need to add a new node here and copy entry one level below 561 | new_num = self._add_table(elem_kind) 562 | # Keep original entry, but clone others recursively 563 | self.trie[new_num][0] = entry[entry_idx] 564 | for i in range(1, len(self.trie[new_num])): 565 | self.trie[new_num][i] = self._clone(entry[entry_idx]) 566 | entry[entry_idx] = elem_kind, new_num 567 | for elem_idx in elem: 568 | new_frontier.append((entry[entry_idx][1], elem_idx)) 569 | frontier = new_frontier 570 | for entry_num, entry_idx in frontier: 571 | entry = self.trie[entry_num] 572 | if not entry[entry_idx] or entry[entry_idx][0] == EntryKind.WEAKINSTR: 573 | kind = EntryKind.INSTR if not weak else EntryKind.WEAKINSTR 574 | entry[entry_idx] = kind, descidx << 2 575 | elif not weak: 576 | raise Exception(f"redundant non-weak {opcode}") 577 | 578 | def add_prefix(self, byte, prefix, root_idx): 579 | if self.trie[0][root_idx] is None: 580 | self.trie[0][root_idx] = EntryKind.TABLE_ROOT, self._add_table(EntryKind.TABLE_ROOT) 581 | self.trie[self.trie[0][root_idx][1]][byte] = EntryKind.PREFIX, prefix 582 | 583 | def deduplicate(self): 584 | synonyms = {} 585 | for kind in self.KIND_ORDER[::-1]: 586 | entries = {} 587 | for num in self.kindmap[kind]: 588 | # Replace previous synonyms 589 | entry = self.trie[num] 590 | for i, elem in enumerate(entry): 591 | if elem and elem[0].is_table and elem[1] in synonyms: 592 | entry[i] = synonyms[elem[1]] 593 | 594 | unique_entry = tuple(entry) 595 | if len(set(unique_entry)) == 1: 596 | # Omit kind if all entries point to the same child 597 | synonyms[num] = entry[0] 598 | self.trie[num] = None 599 | elif unique_entry in entries: 600 | # Deduplicate entries of this kind 601 | synonyms[num] = kind, entries[unique_entry] 602 | self.trie[num] = None 603 | else: 604 | entries[unique_entry] = num 605 | 606 | def compile(self): 607 | offsets = [None] * len(self.trie) 608 | last_off = 0 609 | for num, entry in enumerate(self.trie[1:], start=1): 610 | if not entry: 611 | continue 612 | offsets[num] = last_off 613 | last_off += (len(entry) + 3) & ~3 614 | if last_off >= 0x8000: 615 | raise Exception(f"maximum table size exceeded: {last_off:#x}") 616 | 617 | data = [0] * last_off 618 | for off, entry in zip(offsets, self.trie): 619 | if off is None: 620 | continue 621 | for i, elem in enumerate(entry, start=off): 622 | if elem is not None: 623 | value = offsets[elem[1]] if elem[0].is_table else elem[1] 624 | data[i] = value | (elem[0].value & 3) 625 | return tuple(data), [offsets[v] for _, v in self.trie[0]] 626 | 627 | @property 628 | def stats(self): 629 | return {k.name: sum(self.trie[e] is not None for e in v) 630 | for k, v in self.kindmap.items()} 631 | 632 | 633 | def superstring(strs): 634 | # This faces the "shortest superstring" problem, which is NP-hard. 635 | # Preprocessing: remove any strings which are already completely covered 636 | realstrs = [] 637 | for s in sorted(strs, key=len, reverse=True): 638 | for s2 in realstrs: 639 | if s in s2: 640 | break 641 | else: 642 | realstrs.append(s) 643 | 644 | # Greedy heuristic generally yields acceptable results, though it depends on 645 | # the order of the menmonics. More compact results are possible, but the 646 | # expectable gains of an optimal result (probably with O(n!)) are small. 647 | # First sort strings and later do a binary search for each possible prefix. 648 | realstrs.sort() 649 | merged = "" 650 | while realstrs: 651 | for i in range(min(16, len(merged)), 0, -1): 652 | idx = bisect.bisect_left(realstrs, merged[-i:]) 653 | if idx < len(realstrs) and realstrs[idx][:i] == merged[-i:]: 654 | merged += realstrs.pop(idx)[i:] 655 | break 656 | else: 657 | merged += realstrs.pop() 658 | return merged 659 | 660 | def decode_table(entries, args): 661 | modes = args.modes 662 | 663 | trie = Trie(root_count=len(modes)) 664 | for i, mode in enumerate(modes): 665 | # Magic values must match PF_* enum in decode.c. 666 | trie.add_prefix(0x66, 0xfffa, i) 667 | trie.add_prefix(0x67, 0xfffb, i) 668 | trie.add_prefix(0xf0, 0xfffc, i) 669 | trie.add_prefix(0xf2, 0xfffd, i) 670 | trie.add_prefix(0xf3, 0xfffd, i) 671 | trie.add_prefix(0x64, 0xfff9, i) 672 | trie.add_prefix(0x65, 0xfff9, i) 673 | for seg in (0x26, 0x2e, 0x36, 0x3e): 674 | trie.add_prefix(seg, 0xfff8 + (mode <= 32), i) 675 | if mode > 32: 676 | for rex in range(0x40, 0x50): 677 | trie.add_prefix(rex, 0xfffe, i) 678 | 679 | # pause is hardcoded together with XCHG_NOP. 680 | mnems, descs, desc_map = {"PAUSE"}, [], {} 681 | descs.append("{0}") # desc index zero is "invalid" 682 | for weak, opcode, desc in entries: 683 | ign66 = opcode.prefix in ("NP", "66", "F2", "F3") 684 | modrm = opcode.modrm != (None, None, None) 685 | mnem = { 686 | "PUSH_SEG": "PUSH", "POP_SEG": "POP", 687 | "MOV_CR2G": "MOV_CR", "MOV_G2CR": "MOV_CR", 688 | "MOV_DR2G": "MOV_DR", "MOV_G2DR": "MOV_DR", 689 | "MMX_MOVD_M2G": "MMX_MOVD", "MMX_MOVD_G2M": "MMX_MOVD", 690 | "MMX_MOVQ_M2G": "MMX_MOVQ", "MMX_MOVQ_G2M": "MMX_MOVQ", 691 | "SSE_MOVD_X2G": "SSE_MOVD", "SSE_MOVD_G2X": "SSE_MOVD", 692 | "SSE_MOVQ_X2G": "SSE_MOVQ", "SSE_MOVQ_G2X": "SSE_MOVQ", 693 | "VMOVD_X2G": "VMOVD", "VMOVD_G2X": "VMOVD", 694 | "VMOVQ_X2G": "VMOVQ", "VMOVQ_G2X": "VMOVQ", 695 | }.get(desc.mnemonic, desc.mnemonic) 696 | mnems.add(mnem) 697 | descenc = desc.encode(mnem, ign66, modrm) 698 | desc_idx = desc_map.get(descenc) 699 | if desc_idx is None: 700 | desc_idx = desc_map[descenc] = len(descs) 701 | descs.append(descenc) 702 | for i, mode in enumerate(modes): 703 | if "IO"[mode <= 32]+"64" not in desc.flags: 704 | trie.add_opcode(opcode, desc_idx, i, weak) 705 | 706 | trie.deduplicate() 707 | table_data, root_offsets = trie.compile() 708 | 709 | mnems = sorted(mnems) 710 | decode_mnems_lines = [f"FD_MNEMONIC({m},{i})\n" for i, m in enumerate(mnems)] 711 | 712 | mnemonics_intel = [m.replace("SSE_", "").replace("MMX_", "") 713 | .replace("EVX_", "V") 714 | .replace("MOVABS", "MOV").replace("RESERVED_", "") 715 | .replace("JMPF", "JMP FAR").replace("CALLF", "CALL FAR") 716 | .replace("_S2G", "").replace("_G2S", "") 717 | .replace("_X2G", "").replace("_G2X", "") 718 | .replace("_CR", "").replace("_DR", "") 719 | .replace("REP_", "REP ").replace("CMPXCHGD", "CMPXCHG") 720 | .replace("JCXZ", "JCXZ JECXZJRCXZ") 721 | .replace("C_SEP", "CWD CDQ CQO") 722 | .replace("C_EX", "CBW CWDECDQE").replace("XCHG_NOP", "") 723 | .lower() for m in mnems] 724 | mnemonics_str = superstring(mnemonics_intel) 725 | 726 | if args.stats: 727 | print(f"Decode stats: Descs -- {len(descs)} ({8*len(descs)} bytes); ", 728 | f"Trie -- {2*len(table_data)} bytes, {trie.stats}; " 729 | f"Mnems -- {len(mnemonics_str)} + {3*len(mnemonics_intel)} bytes") 730 | 731 | defines = ["FD_TABLE_OFFSET_%d %d\n"%k for k in zip(modes, root_offsets)] 732 | 733 | return "".join(decode_mnems_lines), f"""// Auto-generated file -- do not modify! 734 | #if defined(FD_DECODE_TABLE_DATA) 735 | {"".join(f"{e:#06x}," for e in table_data)} 736 | #elif defined(FD_DECODE_TABLE_DESCS) 737 | {",".join(descs)} 738 | #elif defined(FD_DECODE_TABLE_STRTAB1) 739 | "{mnemonics_str}" 740 | #elif defined(FD_DECODE_TABLE_STRTAB2) 741 | {",".join(str(mnemonics_str.index(mnem)) for mnem in mnemonics_intel)} 742 | #elif defined(FD_DECODE_TABLE_STRTAB3) 743 | {",".join(str(len(mnem)) for mnem in mnemonics_intel)} 744 | #elif defined(FD_DECODE_TABLE_DEFINES) 745 | {"".join("#define " + line for line in defines)} 746 | #else 747 | #error "unspecified decode table" 748 | #endif 749 | """ 750 | 751 | class EncodeVariant(NamedTuple): 752 | opcode: Opcode 753 | desc: InstrDesc 754 | evexbcst: bool = False 755 | evexmask: int = 0 # 0 = none, 1 = must have mask, 2 = mask + EVEX.z 756 | evexsae: int = 0 # 0 = no EVEX.b, 1 = EVEX.b, 2 = EVEX.b + L'L is rounding mode 757 | evexdisp8scale: int = 0 # EVEX disp8 shift 758 | downgrade: int = 0 # 0 = none, 1 = to VEX, 2 = to VEX flipping REXW 759 | 760 | def encode_mnems(entries): 761 | # mapping from (mnem, opsize, ots) -> (opcode, desc) 762 | mnemonics = defaultdict(list) 763 | # Cannot have PAUSE in instrs.txt, because opcodes in without escape must 764 | # not have mandatory prefixes. For decode, this is hardcoded. 765 | mnemonics["PAUSE", 0, ""] = [EncodeVariant(Opcode.parse("F3.90"), InstrDesc.parse("NP - - - - NOP"))] 766 | for weak, opcode, desc in entries: 767 | if "I64" in desc.flags or desc.mnemonic[:9] == "RESERVED_": 768 | continue 769 | mnem_name = {"MOVABS": "MOV", "XCHG_NOP": "XCHG"}.get(desc.mnemonic, desc.mnemonic) 770 | mnem_name = mnem_name.replace("EVX_", "V") 771 | 772 | opsizes, vecsizes = {0}, {0} 773 | prepend_opsize, prepend_vecsize = False, False 774 | # Where to put the operand size in the mnemonic 775 | separate_opsize = "ENC_SEPSZ" in desc.flags 776 | 777 | if "ENC_NOSZ" in desc.flags or not desc.dynsizes(): 778 | pass 779 | elif OpKind.SZ_OP in desc.dynsizes(): 780 | if opcode.rexw is not None: 781 | raise Exception(f"unexpected REXW specifier {desc}") 782 | opsizes = {8} if "SZ8" in desc.flags else {16, 32, 64} 783 | if opcode.prefix in ("NP", "66", "F2", "F3") and "U66" not in desc.flags: 784 | opsizes -= {16} 785 | if "I66" in desc.flags: 786 | opsizes -= {16} 787 | if "D64" in desc.flags: 788 | opsizes -= {32} 789 | prepend_opsize = not separate_opsize 790 | if "F64" in desc.flags: 791 | opsizes = {64} 792 | prepend_opsize = False 793 | elif opcode.vex and opcode.vexl != "IG": # vectors; don't care for SSE 794 | vecsizes = {128, 256, 512} if opcode.vex == 2 else {128, 256} 795 | if opcode.vexl: 796 | vecsizes = {128 << int(c) for c in opcode.vexl} 797 | prepend_vecsize = not separate_opsize 798 | 799 | # All encoding types; reg is r/k (mask); modrm is r/m/b (broadcast) 800 | optypes_base = [] 801 | for i, opkind in enumerate(desc.operands): 802 | reg = "k" if opkind.kind == "MASK" else "r" 803 | opname = ENCODING_OPORDER[desc.encoding][i] 804 | if opname == "modrm": 805 | modrm_type = (opcode.modrm[0] or "rm").replace("r", reg) 806 | if opcode.extended or desc.mnemonic in ("MOV_CR2G", "MOV_DR2G", "MOV_G2CR", "MOV_G2DR"): 807 | modrm_type = reg 808 | if "BCST" in desc.flags: 809 | modrm_type += "b" 810 | optypes_base.append(modrm_type) 811 | elif opname == "modreg" or opname == "vexreg": 812 | optypes_base.append(reg) 813 | else: 814 | optypes_base.append(" iariioo"[ENCODINGS[desc.encoding].imm_control]) 815 | optypes = ["".join(x) for x in product(*optypes_base)] 816 | 817 | prefixes = [("", "")] 818 | if "LOCK" in desc.flags: 819 | prefixes.append(("LOCK_", "LOCK")) 820 | if "ENC_REP" in desc.flags: 821 | prefixes.append(("REP_", "F3")) 822 | if "ENC_REPCC" in desc.flags: 823 | prefixes.append(("REPNZ_", "F2")) 824 | prefixes.append(("REPZ_", "F3")) 825 | 826 | evexmasks = [0] 827 | if "MASK" in desc.flags: 828 | if "VSIB" in desc.flags: 829 | evexmasks = [1] 830 | else: 831 | evexmasks.append(1) 832 | if desc.operands[0].kind != "MASK": 833 | evexmasks.append(2) # maskz only for non-mask destinations 834 | evexsaes = [0] 835 | if "SAE" in desc.flags: 836 | evexsaes.append(1) 837 | elif "ER" in desc.flags: 838 | evexsaes.append(2) 839 | 840 | keys = (opsizes, vecsizes, prefixes, optypes, evexmasks, evexsaes) 841 | for opsize, vecsize, prefix, ots, evexmask, evexsae in product(*keys): 842 | has_memory = "m" in ots or "b" in ots 843 | if prefix[1] == "LOCK" and ots[0] != "m": 844 | continue 845 | if evexmask == 2 and ots[0] != "r": 846 | continue # EVEX.z must be zero for memory destination 847 | if evexsae and (vecsize not in (0, 512) or has_memory): 848 | continue # SAE/ER only works with 512 bit width and no memory 849 | 850 | spec_opcode = opcode 851 | if prefix[1]: 852 | spec_opcode = spec_opcode._replace(prefix=prefix[1]) 853 | if opsize == 64 and "D64" not in desc.flags and "F64" not in desc.flags: 854 | spec_opcode = spec_opcode._replace(rexw="1") 855 | if vecsize == 512: 856 | spec_opcode = spec_opcode._replace(vexl="2") 857 | if vecsize == 256: 858 | spec_opcode = spec_opcode._replace(vexl="1") 859 | if vecsize == 128: 860 | spec_opcode = spec_opcode._replace(vexl="0") 861 | if spec_opcode.vexl == "IG": 862 | spec_opcode = spec_opcode._replace(vexl="0") 863 | if ENCODINGS[desc.encoding].modrm_idx: 864 | modrm = ("m" if has_memory else "r",) + spec_opcode.modrm[1:] 865 | spec_opcode = spec_opcode._replace(modrm=modrm) 866 | if ENCODINGS[desc.encoding].modrm or None not in opcode.modrm: 867 | assert spec_opcode.modrm[0] in ("r", "m") 868 | 869 | evexbcst = "b" in ots 870 | evexdisp8scale = 0 871 | if spec_opcode.vex == 2 and has_memory: 872 | if not evexbcst: 873 | op = desc.operands[ENCODINGS[desc.encoding].modrm_idx^3] 874 | size = op.abssize(opsize//8, vecsize//8) 875 | evexdisp8scale = size.bit_length() - 1 876 | elif "BCST16" in desc.flags: 877 | evexdisp8scale = 1 878 | else: 879 | evexdisp8scale = 2 if spec_opcode.rexw != "1" else 3 880 | 881 | # Construct mnemonic name 882 | name = prefix[0] + mnem_name 883 | 884 | # Transform MOV_G2X/X2G into MOVD/MOVQ_G2X/X2G. This isn't done for 885 | # VEX for historical reasons and there's no reason to break 886 | # backwards compatibility. This enables EVEX->VEX fallback. 887 | if desc.mnemonic in ("EVX_MOV_G2X", "EVX_MOV_X2G"): 888 | name = name[:-4] + "DQ"[opsize == 64] + name[-4:] 889 | prepend_opsize, opsize = False, 0 890 | # For VMOVD with memory operand, there's no need to be explicit 891 | # about G2X/X2G, as there's no alternative. For VMOVQ, another 892 | # opcode exists, so keep G2X/X2G there for distinguishing. 893 | if name in ("VMOVD_G2X", "VMOVD_X2G") and has_memory: 894 | name = name.replace("_G2X", "").replace("_X2G", "") 895 | # PEXTR/PBROADCAST/PINSR are stored without size suffix in the table 896 | # to avoid having different tables for 32/64 bit mode due to EVEX.W 897 | # being ignored in 32-bit mode. Add suffix here. 898 | if desc.mnemonic == "EVX_PEXTR": 899 | name += " BW D Q"[desc.operands[0].abssize(opsize//8, vecsize//8)] 900 | prepend_opsize, opsize = False, 0 901 | if desc.mnemonic == "EVX_PBROADCAST": 902 | name += " BW D Q"[desc.operands[1].abssize(opsize//8, vecsize//8)] 903 | name += "_GP" 904 | prepend_opsize, opsize = False, 0 905 | if desc.mnemonic == "EVX_PINSR": 906 | name += " BW D Q"[desc.operands[2].abssize(opsize//8, vecsize//8)] 907 | prepend_opsize, opsize = False, 0 908 | 909 | if prepend_opsize and not ("D64" in desc.flags and opsize == 64): 910 | name += f"_{opsize}"[name[-1] not in "0123456789":] 911 | if prepend_vecsize: 912 | name += f"_{vecsize}"[name[-1] not in "0123456789":] 913 | for ot, op in zip(ots, desc.operands): 914 | name += ot.replace("o", "") 915 | if separate_opsize: 916 | name += f"{op.abssize(opsize//8, vecsize//8)*8}" 917 | if "VSIB" not in desc.flags: 918 | # VSIB implies non-zero mask register, so suffix is not required 919 | name += ["", "_mask", "_maskz"][evexmask] 920 | name += ["", "_sae", "_er"][evexsae] 921 | variant = EncodeVariant(spec_opcode, desc, evexbcst, evexmask, evexsae, evexdisp8scale) 922 | mnemonics[name, opsize, ots].append(variant) 923 | altname = { 924 | "C_EX16": "CBW", "C_EX32": "CWDE", "C_EX64": "CDQE", 925 | "C_SEP16": "CWD", "C_SEP32": "CDQ", "C_SEP64": "CQO", 926 | "CMPXCHGD32m": "CMPXCHG8Bm", "CMPXCHGD64m": "CMPXCHG16Bm", 927 | }.get(name) 928 | if altname: 929 | mnemonics[altname, opsize, ots].append(variant) 930 | 931 | for (mnem, opsize, ots), all_variants in mnemonics.items(): 932 | dedup = OrderedDict() 933 | for i, variant in enumerate(all_variants): 934 | PRIO = ["O", "OA", "AO", "AM", "MA", "IA", "OI"] 935 | enc_prio = PRIO.index(variant.desc.encoding) if variant.desc.encoding in PRIO else len(PRIO) 936 | unique = 0 if variant.desc.encoding != "S" else i 937 | # Prefer VEX over EVEX for shorter encoding 938 | key = variant.desc.imm_size(opsize//8), variant.opcode.vex, enc_prio, unique 939 | if key not in dedup: 940 | dedup[key] = variant 941 | variants = [dedup[k] for k in sorted(dedup.keys())] 942 | if len(variants) > 1 and any(v.opcode.vex for v in variants): 943 | # Case 1: VEX -> EVEX promotion (AVX-512, APX) 944 | # Case 2: legacy -> EVEX promotion (APX) 945 | # In any case, there should be exactly one EVEX opcode. 946 | if len(variants) != 2: 947 | raise Exception(f"VEX/EVEX mnemonic with more than two encodings {mnem} {opcode}") 948 | if variants[0].opcode.vex == 2 or variants[1].opcode.vex != 2: 949 | raise Exception(f"EVEX mnemonic not with non-EVEX pair {mnem} {opcode} {variants}") 950 | no_evex, evex = variants[0], variants[1] 951 | 952 | # Make sure that for promotions, only minor things vary. 953 | # REX.W is special, EVEX might mandate W1 while VEX mandates W0/WIG. 954 | # Technically ok: IG -> IG/IG -> 0/0 -> IG/0 -> 0/1 -> IG/1 -> 1 955 | # rexwdowngrade = (no_evex.opcode.rexw is None or 956 | # no_evex.opcode.rexw == evex.opcode.rexw) 957 | # 958 | # However, other encoders always use W0 in case of WIG for VEX, and 959 | # that's probably most beneficial... so: 960 | # Possible downgrades: IG -> IG/IG -> 0/0 -> IG/0 -> 0/1 -> 1 961 | # This affects quite a few instructions, so we use an extra bit to 962 | # flip EVEX.W to VEX.W. 963 | 964 | if (no_evex.opcode.prefix != evex.opcode.prefix or 965 | no_evex.opcode.escape != evex.opcode.escape or 966 | no_evex.opcode.opc != evex.opcode.opc or 967 | # reg/mem doesn't matter, it's already fixed in the mnemonic 968 | no_evex.opcode.modrm[1:] != evex.opcode.modrm[1:] or 969 | no_evex.opcode.vexl != evex.opcode.vexl or 970 | # we don't check rexw_flip here, we can always handle it 971 | no_evex.desc.encoding != evex.desc.encoding or 972 | no_evex.desc.operands != evex.desc.operands): 973 | print(mnem, no_evex) 974 | print(mnem, evex) 975 | # Should not happen. 976 | raise Exception("cannot downgrade EVEX?") 977 | else: 978 | rexw_flip = (no_evex.opcode.rexw == "1") != (evex.opcode.rexw == "1") 979 | variants = [evex._replace(downgrade=1 if not rexw_flip else 2)] 980 | mnemonics[mnem, opsize, ots] = variants 981 | 982 | return dict(mnemonics) 983 | 984 | def encode_table(entries, args): 985 | mnemonics = encode_mnems(entries) 986 | mnemonics["NOP", 0, ""] = [EncodeVariant(Opcode.parse("90"), InstrDesc.parse("NP - - - - NOP"))] 987 | mnem_map = {} 988 | alt_table = [0] # first entry is unused 989 | for (mnem, opsize, ots), variants in mnemonics.items(): 990 | supports_high_regs = [] 991 | if variants[0][1].mnemonic in ("MOVSX", "MOVZX") or opsize == 8: 992 | # Should be the same for all variants 993 | desc = variants[0][1] 994 | for i, (ot, op) in enumerate(zip(ots, desc.operands)): 995 | if ot == "r" and op.kind == "GP" and op.abssize(opsize//8) == 1: 996 | supports_high_regs.append(i) 997 | 998 | alt_indices = [i + len(alt_table) for i in range(len(variants) - 1)] + [0] 999 | enc_opcs = [] 1000 | for alt, variant in zip(alt_indices, variants): 1001 | opcode, desc = variant.opcode, variant.desc 1002 | encoding = ENCODINGS[desc.encoding] 1003 | opc_i = opcode.opc 1004 | if None not in opcode.modrm: 1005 | opc_i |= 0xc000 | opcode.modrm[1] << 11 | opcode.modrm[2] << 8 1006 | elif opcode.modrm[1] is not None: 1007 | opc_i |= opcode.modrm[1] << 8 1008 | if opcode.modrm == ("m", None, 4): 1009 | opc_i |= 0x2000000000 # FORCE_SIB 1010 | if not opcode.vex: 1011 | assert opcode.escape < 4 1012 | opc_i |= opcode.escape * 0x10000 1013 | opc_i |= 0x80000 if opcode.prefix == "66" or opsize == 16 else 0 1014 | opc_i |= 0x100000 if opcode.prefix == "F2" else 0 1015 | opc_i |= 0x200000 if opcode.prefix == "F3" else 0 1016 | else: 1017 | assert opcode.escape < 8 1018 | opc_i |= opcode.escape * 0x10000 1019 | if opcode.prefix == "66" or opsize == 16: 1020 | assert opcode.prefix not in ("F2", "F3") 1021 | opc_i |= 0x100000 1022 | if opcode.prefix == "F3": 1023 | opc_i |= 0x200000 1024 | elif opcode.prefix == "F2": 1025 | opc_i |= 0x300000 1026 | opc_i |= 0x400000 if opcode.rexw == "1" else 0 1027 | if opcode.prefix == "LOCK": 1028 | opc_i |= 0x800000 1029 | elif opcode.vex == 1: 1030 | opc_i |= 0x1000000 + 0x800000 * int(opcode.vexl or 0) 1031 | elif opcode.vex == 2: 1032 | opc_i |= 0x2000000 1033 | # L'L encodes SAE rounding mode otherwise 1034 | if not variant.evexsae: 1035 | opc_i |= 0x800000 * int(opcode.vexl or 0) 1036 | assert not (variant.evexsae and variant.evexbcst) 1037 | opc_i |= 0x4000000 if variant.evexsae or variant.evexbcst else 0 1038 | opc_i |= 0x8000000 if "VSIB" in desc.flags else 0 1039 | opc_i |= 0x1000000000 if variant.evexmask == 2 else 0 1040 | opc_i |= 0x4000000000 if variant.downgrade in (1, 2) else 0 1041 | opc_i |= 0x40000000000 if variant.downgrade == 2 else 0 1042 | opc_i |= 0x8000000000 * variant.evexdisp8scale 1043 | if alt >= 0x100: 1044 | raise Exception("encode alternate bits exhausted") 1045 | opc_i |= sum(1 << i for i in supports_high_regs) << 45 1046 | if encoding.imm_control >= 3: 1047 | opc_i |= desc.imm_size(opsize//8) << 47 1048 | elif encoding.imm_control in (1, 2): 1049 | # Must be an arbitrary non-zero value, replaced by address size 1050 | # for imm_ctl=2 and zero for imm_ctl=1 (constant 1). 1051 | opc_i |= 1 << 47 1052 | 1053 | enc_encoding = desc.encoding 1054 | if desc.encoding != "I" and desc.encoding.endswith("I"): 1055 | enc_encoding = desc.encoding[:-1] 1056 | elif desc.encoding == "IA": 1057 | enc_encoding = "A" 1058 | opc_i |= ["NP", "M", "R", "M1", "MC", "MR", "RM", "RMA", "MRC", 1059 | "AM", "MA", "I", "O", "OA", "S", "A", "D", "FD", "TD", "IM", 1060 | "RVM", "RVMR", "RMV", "VM", "MVR", "MRV", 1061 | ].index(enc_encoding) << 51 1062 | opc_i |= alt << 56 1063 | enc_opcs.append(opc_i) 1064 | mnem_map[f"FE_{mnem}"] = enc_opcs[0] 1065 | alt_table += enc_opcs[1:] 1066 | 1067 | mnem_tab = "".join(f"#define {m} {v:#x}\n" for m, v in mnem_map.items()) 1068 | alt_tab = "".join(f"[{i}] = {v:#x},\n" for i, v in enumerate(alt_table)) 1069 | return mnem_tab, alt_tab 1070 | 1071 | def unique(it): 1072 | vals = set(it) 1073 | if len(vals) != 1: 1074 | raise Exception(f"multiple values: {vals}") 1075 | return next(iter(vals)) 1076 | 1077 | def encode2_gen_legacy(variant: EncodeVariant, opsize: int, supports_high_regs: list[int], imm_expr: str, imm_size_expr: str, has_idx: bool) -> str: 1078 | opcode = variant.opcode 1079 | desc = variant.desc 1080 | flags = ENCODINGS[variant.desc.encoding] 1081 | code = "" 1082 | 1083 | rex_expr = "0" if opcode.rexw != "1" else "0x48" 1084 | for i in supports_high_regs: 1085 | rex_expr += f"|(op_reg_idx(op{i}) >= 4 && op_reg_idx(op{i}) <= 15?0x40:0)" 1086 | if flags.modrm_idx: 1087 | if opcode.modrm[0] == "m": 1088 | rex_expr += f"|(op_mem_base(op{flags.modrm_idx^3})&8?0x41:0)" 1089 | rex_expr += f"|(op_mem_idx(op{flags.modrm_idx^3})&8?0x42:0)" 1090 | elif desc.operands[flags.modrm_idx^3].kind in ("GP", "XMM"): 1091 | rex_expr += f"|(op_reg_idx(op{flags.modrm_idx^3})&8?0x41:0)" 1092 | if flags.modreg_idx: 1093 | if desc.operands[flags.modreg_idx^3].kind in ("GP", "XMM", "CR", "DR"): 1094 | rex_expr += f"|(op_reg_idx(op{flags.modreg_idx^3})&8?0x44:0)" 1095 | elif flags.modreg_idx: # O encoding 1096 | if desc.operands[flags.modreg_idx^3].kind in ("GP", "XMM"): 1097 | rex_expr += f"|(op_reg_idx(op{flags.modreg_idx^3})&8?0x41:0)" 1098 | 1099 | if rex_expr != "0": 1100 | code += f" unsigned rex = {rex_expr};\n" 1101 | for i in supports_high_regs: 1102 | code += f" if (rex && op_reg_gph(op{i})) return 0;\n" 1103 | 1104 | if not has_idx: 1105 | code += " unsigned idx = 0;\n" 1106 | if opcode.prefix == "LOCK": 1107 | code += f" buf[idx++] = 0xF0;\n" 1108 | if opsize == 16 or opcode.prefix == "66": 1109 | code += " buf[idx++] = 0x66;\n" 1110 | if opcode.prefix in ("F2", "F3"): 1111 | code += f" buf[idx++] = 0x{opcode.prefix};\n" 1112 | if opcode.rexw == "1": 1113 | code += f" buf[idx++] = rex;\n" 1114 | elif rex_expr != "0": 1115 | code += f" if (rex) buf[idx++] = rex;\n" 1116 | if opcode.escape: 1117 | code += f" buf[idx++] = 0x0F;\n" 1118 | if opcode.escape == 2: 1119 | code += f" buf[idx++] = 0x38;\n" 1120 | elif opcode.escape == 3: 1121 | code += f" buf[idx++] = 0x3A;\n" 1122 | code += f" buf[idx++] = {opcode.opc:#x};\n" 1123 | if None not in opcode.modrm: 1124 | opcext = 0xc0 | opcode.modrm[1] << 3 | opcode.modrm[2] 1125 | code += f" buf[idx++] = {opcext:#x};\n" 1126 | 1127 | if flags.modrm: 1128 | if flags.modreg_idx: 1129 | modreg = f"op_reg_idx(op{flags.modreg_idx^3})" 1130 | else: 1131 | modreg = opcode.modrm[1] or 0 1132 | if opcode.modrm[0] == "m": 1133 | assert "VSIB" not in desc.flags 1134 | assert opcode.modrm[2] is None 1135 | modrm = f"op{flags.modrm_idx^3}" 1136 | code += f" unsigned memoff = enc_mem(buf+idx, idx+{imm_size_expr}, {modrm}, {modreg}, 0, 0);\n" 1137 | code += f" if (!memoff) return 0;\n idx += memoff;\n" 1138 | else: 1139 | if flags.modrm_idx: 1140 | modrm = f"op_reg_idx(op{flags.modrm_idx^3})" 1141 | else: 1142 | modrm = f"{opcode.modrm[2] or 0}" 1143 | code += f" buf[idx++] = 0xC0|({modreg}<<3)|({modrm}&7);\n" 1144 | elif flags.modrm_idx: 1145 | code += f" buf[idx-1] |= op_reg_idx(op{flags.modrm_idx^3}) & 7;\n" 1146 | if flags.imm_control >= 2: 1147 | if flags.imm_control == 6: 1148 | imm_expr += " - idx" 1149 | code += f" enc_imm(buf+idx, {imm_expr}, {imm_size_expr});\n" 1150 | code += f" return idx + {imm_size_expr};\n" 1151 | else: 1152 | code += f" return idx;\n" 1153 | return code 1154 | 1155 | def encode2_gen_vex(variant: EncodeVariant, imm_expr: str, imm_size_expr: str, has_idx: bool) -> str: 1156 | opcode = variant.opcode 1157 | flags = ENCODINGS[variant.desc.encoding] 1158 | code = "" 1159 | 1160 | helperopc = opcode.opc << 16 1161 | helperopc |= ["NP", "66", "F3", "F2"].index(opcode.prefix) << 8 1162 | helperopc |= 0x8000 if opcode.rexw == "1" else 0 1163 | if not variant.evexsae: 1164 | # ER: L'L encodes rounding mode for SAE 1165 | helperopc |= 0x0020 * int(opcode.vexl or 0) # EVEX.L'L 1166 | helperopc |= opcode.escape << 10 1167 | helperopc |= 0x10 if variant.evexsae or variant.evexbcst else 0 # EVEX.b 1168 | helperopc |= 0x80 if variant.evexmask == 2 else 0 # EVEX.z 1169 | helperopc |= 0x1000000 if variant.downgrade in (1, 2) else 0 1170 | helperopc |= 0x2000000 if variant.downgrade == 2 else 0 1171 | helperopc = f"{helperopc:#x}" 1172 | if variant.evexsae == 2: 1173 | helperopc += "|(flags&FE_RC_MASK)" 1174 | if variant.evexmask: 1175 | code += " if (!op_reg_idx(opmask)) return 0;\n" 1176 | helperopc += "|(op_reg_idx(opmask)&7)" 1177 | 1178 | if flags.modreg_idx: 1179 | modreg = f"op_reg_idx(op{flags.modreg_idx^3})" 1180 | else: 1181 | modreg = opcode.modrm[1] or 0 1182 | vexop = f"op_reg_idx(op{flags.vexreg_idx^3})" if flags.vexreg_idx else 0 1183 | if not flags.modrm and opcode.modrm == (None, None, None): 1184 | # No ModRM, prefix only (VZEROUPPER/VZEROALL) 1185 | assert opcode.vex == 1 1186 | helperfn, helperargs = "enc_vex_common", f"0, 0, 0, 0" 1187 | elif opcode.modrm[0] == "m": 1188 | vsib = "VSIB" in variant.desc.flags 1189 | helperfn = "enc" + ["", "_vex", "_evex"][opcode.vex] + ["_mem", "_vsib"][vsib] 1190 | assert opcode.modrm[2] in (None, 4) 1191 | forcesib = 1 if opcode.modrm[2] == 4 else 0 # AMX 1192 | modrm = f"op{flags.modrm_idx^3}" 1193 | ripoff = imm_size_expr + ("" if not has_idx else "+idx") 1194 | helperargs = (f"{modrm}, {modreg}, {vexop}, {ripoff}, " + 1195 | f"{forcesib}, {variant.evexdisp8scale}") 1196 | else: 1197 | if flags.modrm_idx: 1198 | modrm = f"op_reg_idx(op{flags.modrm_idx^3})" 1199 | else: 1200 | modrm = f"{opcode.modrm[2] or 0}" 1201 | suffix = "_reg" 1202 | if (opcode.vex == 2 and flags.modrm_idx and 1203 | variant.desc.operands[flags.modrm_idx^3].kind == "XMM"): 1204 | suffix = "_xmm" 1205 | helperfn = "enc" + ["", "_vex", "_evex"][opcode.vex] + suffix 1206 | helperargs = f"{modrm}, {modreg}, {vexop}" 1207 | bufidx = "buf" if not has_idx else "buf+idx" 1208 | helpercall = f"{helperfn}({bufidx}, {helperopc}, {helperargs})" 1209 | if flags.imm_control >= 2: 1210 | assert flags.imm_control < 6, "jmp with VEX/EVEX?" 1211 | code += f" unsigned vexoff = {helpercall};\n" 1212 | code += f" enc_imm({bufidx}+vexoff, {imm_expr}, {imm_size_expr});\n" 1213 | code += f" return vexoff ? vexoff+{imm_size_expr}{'+idx' if has_idx else ''} : 0;\n" 1214 | elif has_idx: 1215 | code += f" unsigned vexoff = {helpercall};\n" 1216 | code += f" return vexoff ? vexoff+idx : 0;\n" 1217 | else: 1218 | code += f" return {helpercall};\n" 1219 | return code 1220 | 1221 | def encode2_table(entries, args): 1222 | mnemonics = encode_mnems(entries) 1223 | 1224 | enc_decls, enc_code = "", "" 1225 | for (mnem, opsize, ots), variants in mnemonics.items(): 1226 | max_imm_size = max(v.desc.imm_size(opsize//8) for v in variants) 1227 | 1228 | supports_high_regs = [] 1229 | if variants[0].desc.mnemonic in ("MOVSX", "MOVZX") or opsize == 8: 1230 | # Should be the same for all variants 1231 | for i, (ot, op) in enumerate(zip(ots, variants[0].desc.operands)): 1232 | if ot == "r" and op.kind == "GP" and op.abssize(opsize//8) == 1: 1233 | supports_high_regs.append(i) 1234 | supports_vsib = unique("VSIB" in v.desc.flags for v in variants) 1235 | opkinds = unique(tuple(op.kind for op in v.desc.operands) for v in variants) 1236 | evexmask = unique(v.evexmask for v in variants) 1237 | evexsae = unique(v.evexsae for v in variants) 1238 | 1239 | OPKIND_LUT = {"FPU": "ST", "SEG": "SREG", "MMX": "MM"} 1240 | reg_tys = [OPKIND_LUT.get(opkind, opkind) for opkind in opkinds] 1241 | 1242 | fnname = f"fe64_{mnem}" 1243 | op_tys = [{ 1244 | "i": f"int{max_imm_size*8 if max_imm_size != 3 else 32}_t", 1245 | "a": "uintptr_t", 1246 | "r": f"FeReg{reg_ty if i not in supports_high_regs else 'GPLH'}", 1247 | "k": "FeRegMASK", 1248 | "m": "FeMem" if not supports_vsib else "FeMemV", 1249 | "b": "FeMem", 1250 | "o": "const void*", 1251 | }[ot] for i, (ot, reg_ty) in enumerate(zip(ots, reg_tys))] 1252 | fn_opargs = ", FeRegMASK opmask" if evexmask else "" 1253 | fn_opargs += "".join(f", {ty} op{i}" for i, ty in enumerate(op_tys)) 1254 | fn_sig = f"unsigned ({fnname})(uint8_t* buf, int flags{fn_opargs})" 1255 | enc_decls += f"{fn_sig};\n" 1256 | if supports_high_regs: 1257 | enc_decls += f"#define fe64_{mnem}(buf, flags" 1258 | enc_decls += "".join(f", op{i}" for i in range(len(op_tys))) 1259 | enc_decls += f") {fnname}(buf, flags" 1260 | enc_decls += "".join(f", FE_MAKE_GPLH(op{i})" if i in supports_high_regs else f", op{i}" for i in range(len(op_tys))) 1261 | enc_decls += f")\n" 1262 | 1263 | code = f"{fn_sig} {{\n" 1264 | 1265 | has_memory = unique(v.opcode.modrm[0] == "m" for v in variants) 1266 | has_useg = unique("USEG" in v.desc.flags for v in variants) 1267 | has_u67 = unique("U67" in v.desc.flags for v in variants) 1268 | if has_memory or has_useg: 1269 | # segment override without addrsize override shouldn't happen 1270 | assert has_memory or has_u67 1271 | code += f" unsigned idx = UNLIKELY(flags & (FE_SEG_MASK|FE_ADDR32)) ? enc_seg67(buf, flags) : 0;\n" 1272 | elif has_u67: 1273 | # STOS, SCAS, JCXZ, LOOP, LOOPcc 1274 | code += f" unsigned idx = UNLIKELY(flags & FE_ADDR32) ? (*buf=0x67, 1) : 0;\n" 1275 | else: 1276 | code += " (void) flags;\n" 1277 | 1278 | # indicate whether an idx variable exists 1279 | has_idx = has_memory or has_useg or has_u67 1280 | 1281 | for i, variant in enumerate(variants): 1282 | opcode, desc = variant.opcode, variant.desc 1283 | flags = ENCODINGS[desc.encoding] 1284 | 1285 | conds = [] 1286 | # Select usable encoding. 1287 | if desc.encoding == "S": 1288 | # Segment encoding is weird. 1289 | conds.append(f"op_reg_idx(op0)=={(opcode.opc>>3)&0x7:#x}") 1290 | if desc.mnemonic == "XCHG_NOP" and opsize == 32: 1291 | # XCHG eax, eax must not be encoded as 90 -- that'd be NOP. 1292 | conds.append(f"!(op_reg_idx(op0)==0&&op_reg_idx(op1)==0)") 1293 | if flags.vexreg_idx and not opcode.vex: # vexreg w/o vex is zeroreg 1294 | conds.append(f"op_reg_idx(op{flags.vexreg_idx^3})=={flags.zeroreg_val}") 1295 | 1296 | imm_size = desc.imm_size(opsize//8) 1297 | imm_size_expr = f"{imm_size}" 1298 | imm_expr = f"(int64_t) op{flags.imm_idx^3}" 1299 | if flags.imm_control == 1: 1300 | conds.append(f"op{flags.imm_idx^3} == 1") 1301 | elif flags.imm_control == 2: 1302 | imm_size_expr = "(flags & FE_ADDR32 ? 4 : 8)" 1303 | imm_expr = f"(int64_t) (flags & FE_ADDR32 ? (int32_t) {imm_expr} : {imm_expr})" 1304 | elif flags.imm_control == 3: 1305 | imm_expr = f"op_reg_idx(op{flags.imm_idx^3}) << 4" 1306 | code += f" if (op_reg_idx(op{flags.imm_idx^3}) >= 16) return 0;\n" 1307 | elif flags.imm_control == 4 and imm_size == 3: # ENTER 1308 | code += f" if ((uint32_t) op{flags.imm_idx^3} >= 0x1000000) return 0;\n" 1309 | elif flags.imm_control == 4 and imm_size < max_imm_size: 1310 | conds.append(f"op_imm_n({imm_expr}, {imm_size})") 1311 | elif flags.imm_control == 6: 1312 | imm_expr = f"{imm_expr} - (int64_t) buf - {imm_size}" 1313 | if i != len(variants) - 1: # only Jcc+JMP 1314 | conds.append(f"!(flags & FE_JMPL)") 1315 | # assume one-byte opcode without escape/prefixes 1316 | conds.append(f"op_imm_n({imm_expr}-1, {imm_size})") 1317 | 1318 | if conds: 1319 | code += f" if ({'&&'.join(conds)}) {{\n" 1320 | 1321 | if opcode.vex: 1322 | code += encode2_gen_vex(variant, imm_expr, imm_size_expr, has_idx) 1323 | else: 1324 | code += encode2_gen_legacy(variant, opsize, supports_high_regs, imm_expr, imm_size_expr, has_idx) 1325 | 1326 | if conds: 1327 | code += " }\n" 1328 | else: 1329 | break 1330 | else: 1331 | code += " return 0;\n" 1332 | 1333 | enc_code += code + "}\n" 1334 | 1335 | return enc_decls, enc_code 1336 | 1337 | 1338 | if __name__ == "__main__": 1339 | generators = { 1340 | "decode": decode_table, 1341 | "encode": encode_table, 1342 | "encode2": encode2_table, 1343 | } 1344 | 1345 | parser = argparse.ArgumentParser() 1346 | parser.add_argument("--32", dest="modes", action="append_const", const=32) 1347 | parser.add_argument("--64", dest="modes", action="append_const", const=64) 1348 | parser.add_argument("--with-undoc", action="store_true") 1349 | parser.add_argument("--stats", action="store_true") 1350 | parser.add_argument("mode", choices=generators.keys()) 1351 | parser.add_argument("table", type=argparse.FileType('r')) 1352 | parser.add_argument("out_public", type=argparse.FileType('w')) 1353 | parser.add_argument("out_private", type=argparse.FileType('w')) 1354 | args = parser.parse_args() 1355 | 1356 | entries = [] 1357 | for line in args.table.read().splitlines(): 1358 | if not line or line[0] == "#": continue 1359 | line, weak = (line, False) if line[0] != "*" else (line[1:], True) 1360 | opcode_string, desc_string = tuple(line.split(maxsplit=1)) 1361 | opcode, desc = Opcode.parse(opcode_string), InstrDesc.parse(desc_string) 1362 | verifyOpcodeDesc(opcode, desc) 1363 | if "UNDOC" not in desc.flags or args.with_undoc: 1364 | entries.append((weak, opcode, desc)) 1365 | 1366 | res_public, res_private = generators[args.mode](entries, args) 1367 | args.out_public.write(res_public) 1368 | args.out_private.write(res_private) 1369 | --------------------------------------------------------------------------------