├── .build.yml
├── .github
    └── workflows
    │   └── ci.yml
├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── decode-test.c
├── decode.c
├── encode-test.c
├── encode-test.inc
├── encode.c
├── encode2-test.c
├── encode2-test.cc
├── encode2.c
├── fadec-enc.h
├── fadec-enc2.h
├── fadec.h
├── format.c
├── instrs.txt
├── meson.build
├── meson_options.txt
└── parseinstrs.py


/.build.yml:
--------------------------------------------------------------------------------
 1 | image: alpine/edge
 2 | sources:
 3 | - https://git.sr.ht/~aengelke/fadec
 4 | packages:
 5 | - meson
 6 | tasks:
 7 | - build: |
 8 |     mkdir fadec-build1
 9 |     meson fadec-build1 fadec
10 |     ninja -C fadec-build1
11 |     ninja -C fadec-build1 test
12 |     # Complete test with encode2 API.
13 |     mkdir fadec-build2
14 |     meson fadec-build2 fadec -Dwith_encode2=true
15 |     ninja -C fadec-build2
16 |     ninja -C fadec-build2 test
17 | 


--------------------------------------------------------------------------------
/.github/workflows/ci.yml:
--------------------------------------------------------------------------------
 1 | name: CI
 2 | 
 3 | on: [push]
 4 | 
 5 | jobs:
 6 |   build-linux:
 7 |     runs-on: ubuntu-latest
 8 |     steps:
 9 |     - uses: actions/checkout@v4
10 |     - name: Install dependencies
11 |       run: sudo apt install -y ninja-build meson
12 |     - name: Configure
13 |       run: mkdir build; CC=clang CXX=clang++ meson -Dbuildtype=debugoptimized -Dwith_encode2=true build
14 |     - name: Build
15 |       run: ninja -v -C build
16 |     - name: Test
17 |       run: meson test -v -C build
18 |   build-linux-cmake:
19 |     runs-on: ubuntu-latest
20 |     steps:
21 |     - uses: actions/checkout@v4
22 |     - name: Install dependencies
23 |       run: sudo apt install -y ninja-build cmake
24 |     - name: Configure
25 |       run: CC=clang CXX=clang++ cmake -B build -G Ninja -DFADEC_ENCODE2=ON
26 |     - name: Build
27 |       run: cmake --build build -v
28 |     - name: Test
29 |       run: ctest --test-dir build -V
30 |   build-windows:
31 |     runs-on: windows-latest
32 |     steps:
33 |     - uses: actions/checkout@v4
34 |     - name: Install dependencies
35 |       run: pip install ninja meson
36 |     - name: Configure
37 |       run: mkdir build; meson setup --vsenv -Dbuildtype=debugoptimized -Dwith_encode2=true build
38 |     - name: Build
39 |       run: meson compile -v -C build
40 |     - name: Test
41 |       run: meson test -v -C build
42 |   build-windows-cmake:
43 |     runs-on: windows-latest
44 |     steps:
45 |     - uses: actions/checkout@v4
46 |     - name: Configure
47 |       run: cmake -B build -DFADEC_ENCODE2=ON
48 |     - name: Build
49 |       run: cmake --build build -v
50 |     - name: Test
51 |       run: ctest --test-dir build -V -C Debug
52 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /build/
2 | 


--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
  1 | cmake_minimum_required(VERSION 3.23)
  2 | 
  3 | project(fadec LANGUAGES C)
  4 | enable_testing()
  5 | 
  6 | # TODO: make this actually optional
  7 | enable_language(CXX OPTIONAL)
  8 | 
  9 | # Options
 10 | set(FADEC_ARCHMODE "both" CACHE STRING "Support only 32-bit x86, 64-bit x86 or both")
 11 | set_property(CACHE FADEC_ARCHMODE PROPERTY STRINGS both only32 only64)
 12 | 
 13 | option(FADEC_UNDOC "Include undocumented instructions" FALSE)
 14 | option(FADEC_DECODE "Include support for decoding" TRUE)
 15 | option(FADEC_ENCODE "Include support for encoding" TRUE)
 16 | option(FADEC_ENCODE2 "Include support for new encoding API" FALSE)
 17 | 
 18 | set(CMAKE_C_STANDARD 11)
 19 | 
 20 | if (MSVC)
 21 |     add_compile_options(/W4 -D_CRT_SECURE_NO_WARNINGS /wd4018 /wd4146 /wd4244 /wd4245 /wd4267 /wd4310)
 22 |     add_compile_options($<$<COMPILE_LANGUAGE:CXX>:-Zc:preprocessor>)
 23 | else()
 24 |     add_compile_options(-Wall -Wextra -Wpedantic -Wno-overlength-strings)
 25 | endif()
 26 | 
 27 | find_package(Python3 3.6 REQUIRED)
 28 | 
 29 | add_library(fadec)
 30 | add_library(fadec::fadec ALIAS fadec)
 31 | set_target_properties(fadec PROPERTIES
 32 |     LINKER_LANGUAGE C
 33 | )
 34 | 
 35 | set(GEN_ARGS "")
 36 | if (NOT FADEC_ARCHMODE STREQUAL "only64")
 37 |     list(APPEND GEN_ARGS "--32")
 38 | endif ()
 39 | if (NOT FADEC_ARCHMODE STREQUAL "only32")
 40 |     list(APPEND GEN_ARGS "--64")
 41 | endif ()
 42 | if (FADEC_UNDOC)
 43 |     list(APPEND GEN_ARGS "--with-undoc")
 44 | endif ()
 45 | 
 46 | file(MAKE_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}/include")
 47 | 
 48 | function(fadec_component)
 49 |     cmake_parse_arguments(ARG "" "NAME" "HEADERS;SOURCES" ${ARGN})
 50 | 
 51 |     set(PRIV_INC ${CMAKE_CURRENT_BINARY_DIR}/include/fadec-${ARG_NAME}-private.inc)
 52 |     set(PUB_INC ${CMAKE_CURRENT_BINARY_DIR}/include/fadec-${ARG_NAME}-public.inc)
 53 | 
 54 |     add_custom_command(
 55 |         OUTPUT ${PRIV_INC} ${PUB_INC}
 56 |         COMMAND ${Python3_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/parseinstrs.py ${ARG_NAME} ${CMAKE_CURRENT_SOURCE_DIR}/instrs.txt
 57 |         ${PUB_INC} ${PRIV_INC} ${GEN_ARGS}
 58 |         DEPENDS instrs.txt parseinstrs.py
 59 |         COMMENT "Building table for ${ARG_NAME}"
 60 |     )
 61 | 
 62 |     list(APPEND FADEC_HEADERS ${PUB_INC})
 63 |     target_sources(fadec PRIVATE
 64 |         ${ARG_SOURCES}
 65 | 
 66 |         PUBLIC
 67 |         FILE_SET HEADERS
 68 |         BASE_DIRS .
 69 |         FILES
 70 |             ${ARG_HEADERS}
 71 | 
 72 |         PUBLIC
 73 |         FILE_SET generated_public TYPE HEADERS
 74 |         BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/include
 75 |         FILES
 76 |             ${PUB_INC}
 77 | 
 78 |         PRIVATE
 79 |         FILE_SET generated_private TYPE HEADERS
 80 |         BASE_DIRS ${CMAKE_CURRENT_BINARY_DIR}/include
 81 |         FILES
 82 |             ${PRIV_INC}
 83 |     )
 84 | 
 85 |     add_executable(fadec-${ARG_NAME}-test ${ARG_NAME}-test.c)
 86 |     target_link_libraries(fadec-${ARG_NAME}-test PRIVATE fadec)
 87 |     add_test(NAME ${ARG_NAME} COMMAND fadec-${ARG_NAME}-test)
 88 | 
 89 |     if (CMAKE_CXX_COMPILER AND ${ARG_NAME} STREQUAL "encode2")
 90 |         add_executable(fadec-${ARG_NAME}-test-cpp ${ARG_NAME}-test.cc)
 91 |         target_link_libraries(fadec-${ARG_NAME}-test-cpp PRIVATE fadec)
 92 |         add_test(NAME ${ARG_NAME}-cpp COMMAND fadec-${ARG_NAME}-test-cpp)
 93 |     endif()
 94 | endfunction()
 95 | 
 96 | if (FADEC_DECODE)
 97 |     fadec_component(NAME decode SOURCES decode.c format.c HEADERS fadec.h)
 98 | endif ()
 99 | if (FADEC_ENCODE)
100 |     fadec_component(NAME encode SOURCES encode.c HEADERS fadec-enc.h)
101 | endif ()
102 | if (FADEC_ENCODE2)
103 |     fadec_component(NAME encode2 SOURCES encode2.c HEADERS fadec-enc2.h)
104 | endif ()
105 | 
106 | install(TARGETS fadec EXPORT fadec
107 |     LIBRARY
108 |     ARCHIVE
109 |     FILE_SET HEADERS FILE_SET generated_public)
110 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright (c) 2018, Alexis Engelke
 2 | All rights reserved.
 3 | 
 4 | Redistribution and use in source and binary forms, with or without
 5 | modification, are permitted provided that the following conditions are met:
 6 | 
 7 | 1. Redistributions of source code must retain the above copyright notice,
 8 |    this list of conditions and the following disclaimer.
 9 | 
10 | 2. Redistributions in binary form must reproduce the above copyright notice,
11 |    this list of conditions and the following disclaimer in the documentation
12 |    and/or other materials provided with the distribution.
13 | 
14 | 3. Neither the name of the copyright holder nor the names of its contributors
15 |    may be used to endorse or promote products derived from this software
16 |    without specific prior written permission.
17 | 
18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19 | AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21 | ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22 | LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23 | CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24 | SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25 | INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26 | CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27 | ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28 | POSSIBILITY OF SUCH DAMAGE.
29 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Fadec — Fast Decoder for x86-32 and x86-64 and Encoder for x86-64
  2 | 
  3 | Fadec is a fast and lightweight decoder for x86-32 and x86-64. To meet the goal of speed, lookup tables are used to map the opcode the (internal) description of the instruction encoding. This table currently has a size of roughly 37 kiB (for 32/64-bit combined).
  4 | 
  5 | Fadec-Enc (or Faenc) is a small, lightweight and easy-to-use encoder, currently for x86-64 only.
  6 | 
  7 | ## Key features
  8 | 
  9 | > **Q: Why not just use any other decoding/encoding library available out there?**
 10 | >
 11 | > A: I needed to embed a small and fast decoder in a project for a freestanding environment (i.e., no libc). Further, only very few plain encoding libraries are available for x86-64; and most of them are large or make heavy use of external dependencies.
 12 | 
 13 | - **Small size:** the entire library with the x86-64/32 decoder and the x86-64 encoder are only 95 kiB; for specific use cases, the size can be reduced even further (e.g., by dropping AVX-512). The main decode/encode routines are only a few hundreds lines of code.
 14 | - **Performance:** Fadec is significantly faster than libopcodes, Capstone, or Zydis due to the absence of high-level abstractions and the small lookup table.
 15 | - **Zero dependencies:** the entire library has no dependencies, even on the standard library, making it suitable for freestanding environments without a full libc or `malloc`-style memory allocation.
 16 | - **Correctness:** even corner cases should be handled correctly (if not, that's a bug), e.g., the order of prefixes, immediate sizes of jump instructions, the presence of the `lock` prefix, or properly handling VEX.W in 32-bit mode.
 17 | 
 18 | All components of this library target the Intel 64 implementations of x86. While AMD64 is _mostly similar_, there are some minor differences (e.g. operand sizes for jump instructions, more instructions, `cr8` can be accessed with `lock` prefix, `f34190` is `xchg`, not `pause`) which are currently not handled.
 19 | 
 20 | ## Decoder Usage
 21 | 
 22 | ### Example
 23 | ```c
 24 | uint8_t buffer[] = {0x49, 0x90};
 25 | FdInstr instr;
 26 | // Decode from buffer into instr in 64-bit mode.
 27 | int ret = fd_decode(buffer, sizeof(buffer), 64, 0, &instr);
 28 | // ret<0 indicates an error, ret>0 the number of decoded bytes
 29 | // Relevant properties of instructions can now be queried using the FD_* macros.
 30 | // Or, we can format the instruction to a string buffer:
 31 | char fmtbuf[64];
 32 | fd_format(&instr, fmtbuf, sizeof(fmtbuf));
 33 | // fmtbuf now reads: "xchg r8, rax"
 34 | ```
 35 | 
 36 | ### API
 37 | 
 38 | The API consists of two functions to decode and format instructions, as well as several accessor macros. A full documentation can be found in [fadec.h](fadec.h). Direct access of any structure fields is not recommended.
 39 | 
 40 | - `int fd_decode(const uint8_t* buf, size_t len, int mode, uintptr_t address, FdInstr* out_instr)`
 41 |     - Decode a single instruction. For internal performance reasons, note that:
 42 |         - The decoded operand sizes are not always exact. However, the exact size can be reconstructed in all cases.
 43 |         - An implicit `fwait` in FPU instructions is decoded as a separate instruction (matching the opcode layout in machine code). For example, `finit` is decoded as `FD_FWAIT` + `FD_FINIT`
 44 |     - Return value: number of bytes used, or a negative value in case of an error.
 45 |     - `buf`/`len`: buffer containing instruction bytes. At most 15 bytes will be read. If the instruction is longer than `len`, an error value is returned.
 46 |     - `mode`: architecture mode, either `32` or `64`.
 47 |     - `address`: set to `0`. (Obsolete use: virtual address of the decoded instruction.)
 48 |     - `out_instr`: Pointer to the instruction buffer, might get written partially in case of an error.
 49 | - `void fd_format(const FdInstr* instr, char* buf, size_t len)`
 50 |     - Format a single instruction to a human-readable format.
 51 |     - `instr`: decoded instruction.
 52 |     - `buf`/`len`: buffer for formatted instruction string
 53 | - Various accessor macros: see [fadec.h](fadec.h).
 54 | 
 55 | ## Encoder Usage
 56 | 
 57 | The encoder has two API variants: "v1" has a single entry point (`fe_enc64`) and the instruction is specified as integer parameter. "v2" has one entry point per instruction. v2 is currently about 3x faster than v1, but also has much larger code size (v1: <10 kiB; v2: ~3 MiB) and takes much longer to compile. It is therefore off by default and can be enabled by passing `-Dwith_encode2=true` to Meson. Both variants are supported.
 58 | 
 59 | ### Example (API v1)
 60 | 
 61 | ```c
 62 | int failed = 0;
 63 | uint8_t buf[64];
 64 | uint8_t* cur = buf;
 65 | 
 66 | // xor eax, eax
 67 | failed |= fe_enc64(&cur, FE_XOR32rr, FE_AX, FE_AX);
 68 | // movzx ecx, byte ptr [rdi + 1*rax + 0]
 69 | failed |= fe_enc64(&cur, FE_MOVZXr32m8, FE_CX, FE_MEM(FE_DI, 1, FE_AX, 0));
 70 | // test ecx, ecx
 71 | failed |= fe_enc64(&cur, FE_TEST32rr, FE_CX, FE_CX);
 72 | // jnz $
 73 | // This will be replaced later; FE_JMPL enforces use of longest offset
 74 | uint8_t* fwd_jmp = cur;
 75 | failed |= fe_enc64(&cur, FE_JNZ|FE_JMPL, (intptr_t) cur);
 76 | uint8_t* loop_tgt = cur;
 77 | // add rax, rcx
 78 | failed |= fe_enc64(&cur, FE_ADD64rr, FE_AX, FE_CX);
 79 | // sub ecx, 1
 80 | failed |= fe_enc64(&cur, FE_SUB32ri, FE_CX, 1);
 81 | // jnz loop_tgt
 82 | failed |= fe_enc64(&cur, FE_JNZ, (intptr_t) loop_tgt);
 83 | // Update previous jump to jump here. Note that we _must_ specify FE_JMPL too.
 84 | failed |= fe_enc64(&fwd_jmp, FE_JNZ|FE_JMPL, (intptr_t) cur);
 85 | // ret
 86 | failed |= fe_enc64(&cur, FE_RET);
 87 | // cur now points to the end of the buffer, failed indicates any failures.
 88 | ```
 89 | 
 90 | ### Example (API v2)
 91 | 
 92 | ```c
 93 | uint8_t buf[64];
 94 | uint8_t* cur = buf;
 95 | 
 96 | // xor eax, eax
 97 | cur += fe64_XOR32rr(cur, 0, FE_AX, FE_AX);
 98 | // movzx ecx, byte ptr [rdi + 1*rax + 0]
 99 | cur += fe64_MOVZXr32m8(cur, 0, FE_CX, FE_MEM(FE_DI, 1, FE_AX, 0));
100 | // test ecx, ecx
101 | cur += fe64_TEST32rr(cur, 0, FE_CX, FE_CX);
102 | // jnz $
103 | // This will be replaced later; FE_JMPL enforces use of longest offset
104 | uint8_t* fwd_jmp = cur;
105 | cur += fe64_JNZ(cur, FE_JMPL, cur);
106 | uint8_t* loop_tgt = cur;
107 | // add rax, rcx
108 | cur += fe64_ADD64rr(cur, 0, FE_AX, FE_CX);
109 | // sub ecx, 1
110 | cur += fe64_SUB32ri(cur, 0, FE_CX, 1);
111 | // jnz loop_tgt
112 | cur += fe64_JNZ(cur, 0, loop_tgt);
113 | // Update previous jump to jump here. Note that we _must_ specify FE_JMPL too.
114 | fe64_JNZ(fwd_jmp, FE_JMPL, cur);
115 | // ret
116 | cur += fe64_RET(cur, 0);
117 | // cur now points to the end of the buffer
118 | // errors are ignored, this example should not cause any :-)
119 | ```
120 | 
121 | ### API v1
122 | 
123 | The API consists of one function to handle encode requests, as well as some macros. More information can be found in [fadec-enc.h](fadec-enc.h). Usage of internals like enum values is not recommended.
124 | 
125 | - `int fe_enc64(uint8_t** buf, uint64_t mnem, int64_t operands...)`
126 |     - Encodes an instruction for x86-64 into `*buf`. EVEX-encoded instructions will transparently encode with the shorter VEX prefix where permitted.
127 |     - Return value: `0` on success, a negative value in error cases.
128 |     - `buf`: Pointer to the pointer to the instruction buffer. The pointer (`*buf`) will be advanced by the number of bytes written. The instruction buffer must have at least 15 bytes left.
129 |     - `mnem`: Instruction mnemonic to encode combined with extra flags:
130 |         - `FE_SEG(segreg)`: override segment to specified segment register.
131 |         - `FE_ADDR32`: override address size to 32-bit.
132 |         - `FE_JMPL`: use longest possible offset encoding, useful when jump target is not known.
133 |         - `FE_MASK(maskreg)`: specify non-zero mask register (1--7) for instructions that support masking (suffixed with `_mask` or `_maskz`) or require a mask (AVX-512 gather/scatter).
134 |         - `FE_RC_RN/RD/RU/RZ`: set rounding mode for instructions with static rounding control (suffixed `_er`).
135 |     - `operands...`: Up to 4 instruction operands. The operand kinds must match the requirements of the mnemonic.
136 |         - For register operands (`r`=non-mask register, `k`=mask register), use the register: `FE_AX`, `FE_AH`, `FE_XMM12`.
137 |         - For immediate operands (`i`=regular, `a`=absolute address), use the constant: `12`, `-0xbeef`.
138 |         - For memory operands (`m`=regular or `b`=broadcast), use: `FE_MEM(basereg,scale,indexreg,offset)`. Use `0` to specify _no register_. For RIP-relative addressing, the size of the instruction is added automatically.
139 |         - For offset operands (`o`), specify the target address.
140 | 
141 | ### API v2
142 | 
143 | The API consists of one function per instruction, as well as some macros. The API provides type safety for different register types as well as for memory operands (regular vs. VSIB). Besides a few details listed here, the usage is very similar to API v1. More information can be found in [fadec-enc2.h](fadec-enc2.h). Usage of internals like enum values is not recommended.
144 | 
145 | - `int fe64_<mnemonic>(uint8_t* buf, int flags, <operands...>)`
146 |     - Encodes the specified instruction for x86-64 into `buf`. EVEX-encoded instructions will transparently encode with the shorter VEX prefix where permitted.
147 |     - Return value: `0` on failure, otherwise the instruction length.
148 |     - `buf`: Pointer to the instruction buffer. The instruction buffer must have at least 15 bytes left. Bytes beyond the returned instruction length can be overwritten.
149 |     - `flags`: combination of extra flags, default to `0`:
150 |         - `FE_SEG(segreg)`: override segment to specified segment register.
151 |         - `FE_ADDR32`: override address size to 32-bit.
152 |         - `FE_JMPL`: use longest possible offset encoding, useful when jump target is not known.
153 |         - `FE_RC_RN/RD/RU/RZ`: set rounding mode for instructions with static rounding control (suffixed `_er`).
154 |     - `FeRegMASK opmask` (instructions with opmask only): specify non-zero mask register (1--7) for instructions suffixed with `_mask`/`_maskz` and AVX-512 gather/scatter.
155 |     - `operands...`: up to four instruction operands.
156 |         - Registers have types `FeRegGP`/`FeRegXMM`/`FeRegMASK`/etc.; byte registers accepting high-byte operands also accept `FeRegGPH`.
157 |         - Immediate operands have an appropriately sized integer type.
158 |         - Memory operands use a `FeMem` (VSIB: `FeMemV`) structure, use the macro `FE_MEM(basereg,scale,indexreg,offset)` (VSIB: `FE_MEMV(...)`). Use `FE_NOREG` to specify _no register_. For RIP-relative addressing, the size of the instruction is added automatically.
159 |         - For offset operands (`o`), specify the target address relative to `buf`.
160 | - `int fe64_NOP(uint8_t* buf, unsigned size)`
161 |     - Encode a series of `nop`s of `size` bytes, but at least emit one byte. This will use larger the `nop` encodings to reduce the number of instructions and is intended for filling padding.
162 | 
163 | ## Known issues
164 | - Decoder/Encoder: register uniqueness constraints are not enforced. This affects:
165 |     - VSIB-encoded instructions: no vector register may be used more than once
166 |     - AMX instructions: no tile register may be used more than once
167 |     - AVX-512 complex FP16 multiplication: destination must be not be equal to a source register
168 | - Prefixes for indirect jumps and calls are not properly decoded, e.g. `notrack`, `bnd`.
169 | - Low test coverage. (Help needed.)
170 | - No Python API.
171 | 
172 | Some ISA extensions are not supported, often because they are deprecated or unsupported by recent hardware. These are unlikely to be implemented in the near future:
173 | 
174 | - (Intel) MPX: Intel lists MPX as deprecated.
175 | - (Intel) HLE prefixes `xacquire`/`xrelease`: Intel lists HLE as deprecated. The formatter for decoded instructions is able to reconstruct these in most cases, though.
176 | - (Intel) Xeon Phi (KNC/KNL/KNM) extensions, including the MVEX prefix: the hardware is discontinued/no longer available.
177 | - (AMD) XOP: unsupported by newer hardware.
178 | - (AMD) FMA4: unsupported by newer hardware.
179 | 
180 | If you find any other issues, please report a bug. Or, even better, send a patch fixing the issue.
181 | 


--------------------------------------------------------------------------------
/decode.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdbool.h>
  3 | #include <stddef.h>
  4 | #include <stdint.h>
  5 | 
  6 | #include <fadec.h>
  7 | 
  8 | 
  9 | #ifdef __GNUC__
 10 | #define LIKELY(x) __builtin_expect((x), 1)
 11 | #define UNLIKELY(x) __builtin_expect((x), 0)
 12 | #define ASSUME(x) do { if (!(x)) __builtin_unreachable(); } while (0)
 13 | #else
 14 | #define LIKELY(x) (x)
 15 | #define UNLIKELY(x) (x)
 16 | #define ASSUME(x) ((void) 0)
 17 | #endif
 18 | 
 19 | // Defines FD_TABLE_OFFSET_32 and FD_TABLE_OFFSET_64, if available
 20 | #define FD_DECODE_TABLE_DEFINES
 21 | #include <fadec-decode-private.inc>
 22 | #undef FD_DECODE_TABLE_DEFINES
 23 | 
 24 | enum DecodeMode {
 25 |     DECODE_64 = 0,
 26 |     DECODE_32 = 1,
 27 | };
 28 | 
 29 | typedef enum DecodeMode DecodeMode;
 30 | 
 31 | #define ENTRY_NONE 0
 32 | #define ENTRY_INSTR 1
 33 | #define ENTRY_TABLE256 2
 34 | #define ENTRY_TABLE16 3
 35 | #define ENTRY_TABLE8E 4
 36 | #define ENTRY_TABLE_PREFIX 5
 37 | #define ENTRY_TABLE_VEX 6
 38 | #define ENTRY_TABLE_ROOT 8
 39 | #define ENTRY_MASK 7
 40 | 
 41 | static uint16_t
 42 | table_lookup(unsigned cur_idx, unsigned entry_idx) {
 43 |     static _Alignas(16) const uint16_t _decode_table[] = {
 44 | #define FD_DECODE_TABLE_DATA
 45 | #include <fadec-decode-private.inc>
 46 | #undef FD_DECODE_TABLE_DATA
 47 |     };
 48 |     return _decode_table[cur_idx + entry_idx];
 49 | }
 50 | 
 51 | static unsigned
 52 | table_walk(unsigned table_entry, unsigned entry_idx) {
 53 |     return table_lookup(table_entry & ~0x3, entry_idx);
 54 | }
 55 | 
 56 | #define LOAD_LE_1(buf) ((uint64_t) *(const uint8_t*) (buf))
 57 | #define LOAD_LE_2(buf) (LOAD_LE_1(buf) | LOAD_LE_1((const uint8_t*) (buf) + 1)<<8)
 58 | #define LOAD_LE_3(buf) (LOAD_LE_2(buf) | LOAD_LE_1((const uint8_t*) (buf) + 2)<<16)
 59 | #define LOAD_LE_4(buf) (LOAD_LE_2(buf) | LOAD_LE_2((const uint8_t*) (buf) + 2)<<16)
 60 | #define LOAD_LE_8(buf) (LOAD_LE_4(buf) | LOAD_LE_4((const uint8_t*) (buf) + 4)<<32)
 61 | 
 62 | enum
 63 | {
 64 |     PREFIX_REXB = 0x01,
 65 |     PREFIX_REXX = 0x02,
 66 |     PREFIX_REXR = 0x04,
 67 |     PREFIX_REXW = 0x08,
 68 |     PREFIX_REX = 0x40,
 69 |     PREFIX_REXRR = 0x10,
 70 |     PREFIX_VEX = 0x20,
 71 | };
 72 | 
 73 | struct InstrDesc
 74 | {
 75 |     uint16_t type;
 76 |     uint16_t operand_indices;
 77 |     uint16_t operand_sizes;
 78 |     uint16_t reg_types;
 79 | };
 80 | 
 81 | #define DESC_HAS_MODRM(desc) (((desc)->operand_indices & (3 << 0)) != 0)
 82 | #define DESC_MODRM_IDX(desc) ((((desc)->operand_indices >> 0) & 3) ^ 3)
 83 | #define DESC_HAS_MODREG(desc) (((desc)->operand_indices & (3 << 2)) != 0)
 84 | #define DESC_MODREG_IDX(desc) ((((desc)->operand_indices >> 2) & 3) ^ 3)
 85 | #define DESC_HAS_VEXREG(desc) (((desc)->operand_indices & (3 << 4)) != 0)
 86 | #define DESC_VEXREG_IDX(desc) ((((desc)->operand_indices >> 4) & 3) ^ 3)
 87 | #define DESC_IMM_CONTROL(desc) (((desc)->operand_indices >> 12) & 0x7)
 88 | #define DESC_IMM_IDX(desc) ((((desc)->operand_indices >> 6) & 3) ^ 3)
 89 | #define DESC_EVEX_BCST(desc) (((desc)->operand_indices >> 8) & 1)
 90 | #define DESC_EVEX_MASK(desc) (((desc)->operand_indices >> 9) & 1)
 91 | #define DESC_ZEROREG_VAL(desc) (((desc)->operand_indices >> 10) & 1)
 92 | #define DESC_LOCK(desc) (((desc)->operand_indices >> 11) & 1)
 93 | #define DESC_VSIB(desc) (((desc)->operand_indices >> 15) & 1)
 94 | #define DESC_OPSIZE(desc) (((desc)->reg_types >> 11) & 7)
 95 | #define DESC_MODRM_SIZE(desc) (((desc)->operand_sizes >> 0) & 3)
 96 | #define DESC_MODREG_SIZE(desc) (((desc)->operand_sizes >> 2) & 3)
 97 | #define DESC_VEXREG_SIZE(desc) (((desc)->operand_sizes >> 4) & 3)
 98 | #define DESC_IMM_SIZE(desc) (((desc)->operand_sizes >> 6) & 3)
 99 | #define DESC_LEGACY(desc) (((desc)->operand_sizes >> 8) & 1)
100 | #define DESC_SIZE_FIX1(desc) (((desc)->operand_sizes >> 10) & 7)
101 | #define DESC_SIZE_FIX2(desc) (((desc)->operand_sizes >> 13) & 3)
102 | #define DESC_INSTR_WIDTH(desc) (((desc)->operand_sizes >> 15) & 1)
103 | #define DESC_MODRM(desc) (((desc)->reg_types >> 14) & 1)
104 | #define DESC_IGN66(desc) (((desc)->reg_types >> 15) & 1)
105 | #define DESC_EVEX_SAE(desc) (((desc)->reg_types >> 8) & 1)
106 | #define DESC_EVEX_ER(desc) (((desc)->reg_types >> 9) & 1)
107 | #define DESC_EVEX_BCST16(desc) (((desc)->reg_types >> 10) & 1)
108 | #define DESC_REGTY_MODRM(desc) (((desc)->reg_types >> 0) & 7)
109 | #define DESC_REGTY_MODREG(desc) (((desc)->reg_types >> 3) & 7)
110 | #define DESC_REGTY_VEXREG(desc) (((desc)->reg_types >> 6) & 3)
111 | 
112 | int
113 | fd_decode(const uint8_t* buffer, size_t len_sz, int mode_int, uintptr_t address,
114 |           FdInstr* instr)
115 | {
116 |     int len = len_sz > 15 ? 15 : len_sz;
117 | 
118 |     // Ensure that we can actually handle the decode request
119 |     DecodeMode mode;
120 |     unsigned table_root_idx;
121 |     switch (mode_int)
122 |     {
123 | #if defined(FD_TABLE_OFFSET_32)
124 |     case 32: table_root_idx = FD_TABLE_OFFSET_32; mode = DECODE_32; break;
125 | #endif
126 | #if defined(FD_TABLE_OFFSET_64)
127 |     case 64: table_root_idx = FD_TABLE_OFFSET_64; mode = DECODE_64; break;
128 | #endif
129 |     default: return FD_ERR_INTERNAL;
130 |     }
131 | 
132 |     int off = 0;
133 |     uint8_t vex_operand = 0;
134 | 
135 |     uint8_t addr_size = mode == DECODE_64 ? 3 : 2;
136 |     unsigned prefix_rex = 0;
137 |     uint8_t prefix_rep = 0;
138 |     unsigned vexl = 0;
139 |     unsigned prefix_evex = 0;
140 |     instr->segment = FD_REG_NONE;
141 | 
142 |     // Values must match prefixes in parseinstrs.py.
143 |     enum {
144 |         PF_SEG1 = 0xfff8 - 0xfff8,
145 |         PF_SEG2 = 0xfff9 - 0xfff8,
146 |         PF_66 = 0xfffa - 0xfff8,
147 |         PF_67 = 0xfffb - 0xfff8,
148 |         PF_LOCK = 0xfffc - 0xfff8,
149 |         PF_REP = 0xfffd - 0xfff8,
150 |         PF_REX = 0xfffe - 0xfff8,
151 |     };
152 | 
153 |     uint8_t prefixes[8] = {0};
154 |     unsigned table_entry = 0;
155 |     while (true) {
156 |         if (UNLIKELY(off >= len))
157 |             return FD_ERR_PARTIAL;
158 |         uint8_t prefix = buffer[off];
159 |         table_entry = table_lookup(table_root_idx, prefix);
160 |         if (LIKELY(table_entry - 0xfff8 >= 8))
161 |             break;
162 |         prefixes[PF_REX] = 0;
163 |         prefixes[table_entry - 0xfff8] = prefix;
164 |         off++;
165 |     }
166 |     if (off) {
167 |         if (UNLIKELY(prefixes[PF_SEG2])) {
168 |             if (prefixes[PF_SEG2] & 0x02)
169 |                 instr->segment = prefixes[PF_SEG2] >> 3 & 3;
170 |             else
171 |                 instr->segment = prefixes[PF_SEG2] & 7;
172 |         }
173 |         if (UNLIKELY(prefixes[PF_67]))
174 |             addr_size--;
175 |         prefix_rex = prefixes[PF_REX];
176 |         prefix_rep = prefixes[PF_REP];
177 |     }
178 | 
179 |     // table_entry kinds: INSTR(0), T16(1), ESCAPE_A(2), ESCAPE_B(3)
180 |     if (LIKELY(!(table_entry & 2))) {
181 |         off++;
182 | 
183 |         // Then, walk through ModR/M-encoded opcode extensions.
184 |         if (table_entry & 1) {
185 |             if (UNLIKELY(off >= len))
186 |                 return FD_ERR_PARTIAL;
187 |             unsigned isreg = buffer[off] >= 0xc0;
188 |             table_entry = table_walk(table_entry, ((buffer[off] >> 2) & 0xe) | isreg);
189 |             // table_entry kinds: INSTR(0), T8E(1)
190 |             if (table_entry & 1)
191 |                 table_entry = table_walk(table_entry, buffer[off] & 7);
192 |         }
193 | 
194 |         // table_entry kinds: INSTR(0)
195 |         goto direct;
196 |     }
197 | 
198 |     if (UNLIKELY(off >= len))
199 |         return FD_ERR_PARTIAL;
200 | 
201 |     unsigned opcode_escape = 0;
202 |     uint8_t mandatory_prefix = 0; // without escape/VEX/EVEX, this is ignored.
203 |     if (buffer[off] == 0x0f)
204 |     {
205 |         if (UNLIKELY(off + 1 >= len))
206 |             return FD_ERR_PARTIAL;
207 |         if (buffer[off + 1] == 0x38)
208 |             opcode_escape = 2;
209 |         else if (buffer[off + 1] == 0x3a)
210 |             opcode_escape = 3;
211 |         else
212 |             opcode_escape = 1;
213 |         off += opcode_escape >= 2 ? 2 : 1;
214 | 
215 |         // If there is no REP/REPNZ prefix offer 66h as mandatory prefix. If
216 |         // there is a REP prefix, then the 66h prefix is ignored here.
217 |         mandatory_prefix = prefix_rep ? prefix_rep ^ 0xf1 : !!prefixes[PF_66];
218 |     }
219 |     else if (UNLIKELY((unsigned) buffer[off] - 0xc4 < 2 || buffer[off] == 0x62))
220 |     {
221 |         unsigned vex_prefix = buffer[off];
222 |         // VEX (C4/C5) or EVEX (62)
223 |         if (UNLIKELY(off + 1 >= len))
224 |             return FD_ERR_PARTIAL;
225 |         if (UNLIKELY(mode == DECODE_32 && buffer[off + 1] < 0xc0)) {
226 |             off++;
227 |             table_entry = table_walk(table_entry, 0);
228 |             // table_entry kinds: INSTR(0)
229 |             goto direct;
230 |         }
231 | 
232 |         // VEX/EVEX + 66/F3/F2/REX will #UD.
233 |         // Note: REX is also here only respected if it immediately precedes the
234 |         // opcode, in this case the VEX/EVEX "prefix".
235 |         if (prefixes[PF_66] || prefixes[PF_REP] || prefix_rex)
236 |             return FD_ERR_UD;
237 | 
238 |         uint8_t byte = buffer[off + 1];
239 |         if (vex_prefix == 0xc5) // 2-byte VEX
240 |         {
241 |             opcode_escape = 1;
242 |             prefix_rex = byte & 0x80 ? 0 : PREFIX_REXR;
243 |         }
244 |         else // 3-byte VEX or EVEX
245 |         {
246 |             // SDM Vol 2A 2-15 (Dec. 2016): Ignored in 32-bit mode
247 |             if (mode == DECODE_64)
248 |                 prefix_rex = byte >> 5 ^ 0x7;
249 |             if (vex_prefix == 0x62) // EVEX
250 |             {
251 |                 if (byte & 0x08) // Bit 3 of opcode_escape must be clear.
252 |                     return FD_ERR_UD;
253 |                 _Static_assert(PREFIX_REXRR == 0x10, "wrong REXRR value");
254 |                 if (mode == DECODE_64)
255 |                     prefix_rex |= (byte & PREFIX_REXRR) ^ PREFIX_REXRR;
256 |             }
257 |             else // 3-byte VEX
258 |             {
259 |                 if (byte & 0x18) // Bits 4:3 of opcode_escape must be clear.
260 |                     return FD_ERR_UD;
261 |             }
262 | 
263 |             opcode_escape = (byte & 0x07);
264 |             if (UNLIKELY(opcode_escape == 0)) {
265 |                 int prefix_len = vex_prefix == 0x62 ? 4 : 3;
266 |                 // Pretend to decode the prefix plus one opcode byte.
267 |                 return off + prefix_len > len ? FD_ERR_PARTIAL : FD_ERR_UD;
268 |             }
269 | 
270 |             // Load third byte of VEX prefix
271 |             if (UNLIKELY(off + 2 >= len))
272 |                 return FD_ERR_PARTIAL;
273 |             byte = buffer[off + 2];
274 |             prefix_rex |= byte & 0x80 ? PREFIX_REXW : 0;
275 |         }
276 | 
277 |         mandatory_prefix = byte & 3;
278 |         vex_operand = ((byte & 0x78) >> 3) ^ 0xf;
279 |         prefix_rex |= PREFIX_VEX;
280 | 
281 |         if (vex_prefix == 0x62) // EVEX
282 |         {
283 |             if (!(byte & 0x04)) // Bit 10 must be 1.
284 |                 return FD_ERR_UD;
285 |             if (UNLIKELY(off + 3 >= len))
286 |                 return FD_ERR_PARTIAL;
287 |             byte = buffer[off + 3];
288 |             // prefix_evex is z:L'L/RC:b:V':aaa
289 |             vexl = (byte >> 5) & 3;
290 |             prefix_evex = byte | 0x100; // Ensure that prefix_evex is non-zero.
291 |             if (mode == DECODE_64) // V' causes UD in 32-bit mode
292 |                 vex_operand |= byte & 0x08 ? 0 : 0x10; // V'
293 |             else if (!(byte & 0x08))
294 |                 return FD_ERR_UD;
295 |             off += 4;
296 |         }
297 |         else // VEX
298 |         {
299 |             vexl = byte & 0x04 ? 1 : 0;
300 |             off += 0xc7 - vex_prefix; // 3 for c4, 2 for c5
301 |         }
302 |     }
303 | 
304 |     table_entry = table_walk(table_entry, opcode_escape);
305 |     // table_entry kinds: INSTR(0) [only for invalid], T256(2)
306 |     if (UNLIKELY(!table_entry))
307 |         return FD_ERR_UD;
308 |     if (UNLIKELY(off >= len))
309 |         return FD_ERR_PARTIAL;
310 |     table_entry = table_walk(table_entry, buffer[off++]);
311 |     // table_entry kinds: INSTR(0), T16(1), TVEX(2), TPREFIX(3)
312 | 
313 |     // Handle mandatory prefixes (which behave like an opcode ext.).
314 |     if ((table_entry & 3) == 3)
315 |         table_entry = table_walk(table_entry, mandatory_prefix);
316 |     // table_entry kinds: INSTR(0), T16(1), TVEX(2)
317 | 
318 |     // Then, walk through ModR/M-encoded opcode extensions.
319 |     if (table_entry & 1) {
320 |         if (UNLIKELY(off >= len))
321 |             return FD_ERR_PARTIAL;
322 |         unsigned isreg = buffer[off] >= 0xc0;
323 |         table_entry = table_walk(table_entry, ((buffer[off] >> 2) & 0xe) | isreg);
324 |         // table_entry kinds: INSTR(0), T8E(1), TVEX(2)
325 |         if (table_entry & 1)
326 |             table_entry = table_walk(table_entry, buffer[off] & 7);
327 |     }
328 |     // table_entry kinds: INSTR(0), TVEX(2)
329 | 
330 |     // For VEX prefix, we have to distinguish between VEX.W and VEX.L which may
331 |     // be part of the opcode.
332 |     if (UNLIKELY(table_entry & 2))
333 |     {
334 |         uint8_t index = 0;
335 |         index |= prefix_rex & PREFIX_REXW ? (1 << 0) : 0;
336 |         // When EVEX.L'L is the rounding mode, the instruction must not have
337 |         // L'L constraints.
338 |         index |= vexl << 1;
339 |         table_entry = table_walk(table_entry, index);
340 |     }
341 |     // table_entry kinds: INSTR(0)
342 | 
343 | direct:
344 |     // table_entry kinds: INSTR(0)
345 |     if (UNLIKELY(!table_entry))
346 |         return FD_ERR_UD;
347 | 
348 |     static _Alignas(16) const struct InstrDesc descs[] = {
349 | #define FD_DECODE_TABLE_DESCS
350 | #include <fadec-decode-private.inc>
351 | #undef FD_DECODE_TABLE_DESCS
352 |     };
353 |     const struct InstrDesc* desc = &descs[table_entry >> 2];
354 | 
355 |     instr->type = desc->type;
356 |     instr->addrsz = addr_size;
357 |     instr->flags = ((prefix_rep + 1) & 6) + (mode == DECODE_64 ? FD_FLAG_64 : 0);
358 |     instr->address = address;
359 | 
360 |     for (unsigned i = 0; i < sizeof(instr->operands) / sizeof(FdOp); i++)
361 |         instr->operands[i] = (FdOp) {0};
362 | 
363 |     if (DESC_MODRM(desc) && UNLIKELY(off++ >= len))
364 |         return FD_ERR_PARTIAL;
365 |     unsigned op_byte = buffer[off - 1] | (!DESC_MODRM(desc) ? 0xc0 : 0);
366 | 
367 |     if (UNLIKELY(prefix_evex)) {
368 |         // VSIB inst (gather/scatter) without mask register or w/EVEX.z is UD
369 |         if (DESC_VSIB(desc) && (!(prefix_evex & 0x07) || (prefix_evex & 0x80)))
370 |             return FD_ERR_UD;
371 |         // Inst doesn't support masking, so EVEX.z or EVEX.aaa is UD
372 |         if (!DESC_EVEX_MASK(desc) && (prefix_evex & 0x87))
373 |             return FD_ERR_UD;
374 |         // EVEX.z without EVEX.aaa is UD. The Intel SDM is rather unprecise
375 |         // about this, but real hardware doesn't accept this.
376 |         if ((prefix_evex & 0x87) == 0x80)
377 |             return FD_ERR_UD;
378 | 
379 |         // Cases for SAE/RC (reg operands only):
380 |         //  - ER supported -> all ok
381 |         //  - SAE supported -> assume L'L is RC, but ignored (undocumented)
382 |         //  - Neither supported -> b == 0
383 |         if ((prefix_evex & 0x10) && (op_byte & 0xc0) == 0xc0) { // EVEX.b+reg
384 |             if (!DESC_EVEX_SAE(desc))
385 |                 return FD_ERR_UD;
386 |             vexl = 2;
387 |             if (DESC_EVEX_ER(desc))
388 |                 instr->evex = prefix_evex;
389 |             else
390 |                 instr->evex = (prefix_evex & 0x87) | 0x60; // set RC, clear B
391 |         } else {
392 |             if (UNLIKELY(vexl == 3)) // EVEX.L'L == 11b is UD
393 |                 return FD_ERR_UD;
394 |             instr->evex = prefix_evex & 0x87; // clear RC, clear B
395 |         }
396 | 
397 |         if (DESC_VSIB(desc))
398 |             vex_operand &= 0xf; // EVEX.V' is used as index extension instead.
399 |     } else {
400 |         instr->evex = 0;
401 |     }
402 | 
403 |     unsigned op_size;
404 |     unsigned op_size_alt = 0;
405 |     if (!(DESC_OPSIZE(desc) & 4)) {
406 |         if (mode == DECODE_64)
407 |             op_size = ((prefix_rex & PREFIX_REXW) || DESC_OPSIZE(desc) == 3) ? 4 :
408 |                               UNLIKELY(prefixes[PF_66] && !DESC_IGN66(desc)) ? 2 :
409 |                                                            DESC_OPSIZE(desc) ? 4 :
410 |                                                                                3;
411 |         else
412 |             op_size = UNLIKELY(prefixes[PF_66] && !DESC_IGN66(desc)) ? 2 : 3;
413 |     } else {
414 |         op_size = 5 + vexl;
415 |         op_size_alt = op_size - (DESC_OPSIZE(desc) & 3);
416 |     }
417 | 
418 |     uint8_t operand_sizes[4] = {
419 |         DESC_SIZE_FIX1(desc), DESC_SIZE_FIX2(desc) + 1, op_size, op_size_alt
420 |     };
421 | 
422 |     if (UNLIKELY(instr->type == FDI_MOV_CR || instr->type == FDI_MOV_DR)) {
423 |         unsigned modreg = (op_byte >> 3) & 0x7;
424 |         unsigned modrm = op_byte & 0x7;
425 | 
426 |         FdOp* op_modreg = &instr->operands[DESC_MODREG_IDX(desc)];
427 |         op_modreg->type = FD_OT_REG;
428 |         op_modreg->size = op_size;
429 |         op_modreg->reg = modreg | (prefix_rex & PREFIX_REXR ? 8 : 0);
430 |         op_modreg->misc = instr->type == FDI_MOV_CR ? FD_RT_CR : FD_RT_DR;
431 |         if (instr->type == FDI_MOV_CR && (~0x011d >> op_modreg->reg) & 1)
432 |             return FD_ERR_UD;
433 |         else if (instr->type == FDI_MOV_DR && prefix_rex & PREFIX_REXR)
434 |             return FD_ERR_UD;
435 | 
436 |         FdOp* op_modrm = &instr->operands[DESC_MODRM_IDX(desc)];
437 |         op_modrm->type = FD_OT_REG;
438 |         op_modrm->size = op_size;
439 |         op_modrm->reg = modrm | (prefix_rex & PREFIX_REXB ? 8 : 0);
440 |         op_modrm->misc = FD_RT_GPL;
441 |         goto skip_modrm;
442 |     }
443 | 
444 |     if (DESC_HAS_MODREG(desc))
445 |     {
446 |         FdOp* op_modreg = &instr->operands[DESC_MODREG_IDX(desc)];
447 |         unsigned reg_idx = (op_byte & 0x38) >> 3;
448 |         unsigned reg_ty = DESC_REGTY_MODREG(desc);
449 |         op_modreg->misc = reg_ty;
450 |         if (LIKELY(reg_ty < 2))
451 |             reg_idx += prefix_rex & PREFIX_REXR ? 8 : 0;
452 |         else if (reg_ty == 7 && (prefix_rex & PREFIX_REXR || prefix_evex & 0x80))
453 |             return FD_ERR_UD; // REXR in 64-bit mode or EVEX.z with mask as dest
454 |         if (UNLIKELY(reg_ty == FD_RT_VEC)) // REXRR ignored above in 32-bit mode
455 |             reg_idx += prefix_rex & PREFIX_REXRR ? 16 : 0;
456 |         else if (UNLIKELY(prefix_rex & PREFIX_REXRR))
457 |             return FD_ERR_UD;
458 |         op_modreg->type = FD_OT_REG;
459 |         op_modreg->size = operand_sizes[DESC_MODREG_SIZE(desc)];
460 |         op_modreg->reg = reg_idx;
461 |     }
462 | 
463 |     if (DESC_HAS_MODRM(desc))
464 |     {
465 |         FdOp* op_modrm = &instr->operands[DESC_MODRM_IDX(desc)];
466 |         op_modrm->size = operand_sizes[DESC_MODRM_SIZE(desc)];
467 | 
468 |         unsigned rm = op_byte & 0x07;
469 |         if (op_byte >= 0xc0)
470 |         {
471 |             uint8_t reg_idx = rm;
472 |             unsigned reg_ty = DESC_REGTY_MODRM(desc);
473 |             op_modrm->misc = reg_ty;
474 |             if (LIKELY(reg_ty < 2))
475 |                 reg_idx += prefix_rex & PREFIX_REXB ? 8 : 0;
476 |             if (prefix_evex && reg_ty == 0) // vector registers only
477 |                 reg_idx += prefix_rex & PREFIX_REXX ? 16 : 0;
478 |             op_modrm->type = FD_OT_REG;
479 |             op_modrm->reg = reg_idx;
480 |         }
481 |         else
482 |         {
483 |             unsigned dispscale = 0;
484 | 
485 |             if (UNLIKELY(prefix_evex)) {
486 |                 // EVEX.z for memory destination operand is UD.
487 |                 if (UNLIKELY(prefix_evex & 0x80) && DESC_MODRM_IDX(desc) == 0)
488 |                     return FD_ERR_UD;
489 | 
490 |                 // EVEX.b for memory-operand without broadcast support is UD.
491 |                 if (UNLIKELY(prefix_evex & 0x10)) {
492 |                     if (UNLIKELY(!DESC_EVEX_BCST(desc)))
493 |                         return FD_ERR_UD;
494 |                     if (UNLIKELY(DESC_EVEX_BCST16(desc)))
495 |                         dispscale = 1;
496 |                     else
497 |                         dispscale = prefix_rex & PREFIX_REXW ? 3 : 2;
498 |                     instr->segment |= dispscale << 6; // Store broadcast size
499 |                     op_modrm->type = FD_OT_MEMBCST;
500 |                 } else {
501 |                     dispscale = op_modrm->size - 1;
502 |                     op_modrm->type = FD_OT_MEM;
503 |                 }
504 |             } else {
505 |                 op_modrm->type = FD_OT_MEM;
506 |             }
507 | 
508 |             // 16-bit address size implies different ModRM encoding
509 |             if (UNLIKELY(addr_size == 1)) {
510 |                 ASSUME(mode == DECODE_32);
511 |                 if (UNLIKELY(DESC_VSIB(desc))) // 16-bit addr size + VSIB is UD
512 |                     return FD_ERR_UD;
513 |                 if (rm < 6)
514 |                     op_modrm->misc = rm & 1 ? FD_REG_DI : FD_REG_SI;
515 |                 else
516 |                     op_modrm->misc = FD_REG_NONE;
517 | 
518 |                 if (rm < 4)
519 |                     op_modrm->reg = rm & 2 ? FD_REG_BP : FD_REG_BX;
520 |                 else if (rm < 6 || (op_byte & 0xc7) == 0x06)
521 |                     op_modrm->reg = FD_REG_NONE;
522 |                 else
523 |                     op_modrm->reg = rm == 6 ? FD_REG_BP : FD_REG_BX;
524 | 
525 |                 const uint8_t* dispbase = &buffer[off];
526 |                 if (op_byte & 0x40) {
527 |                     if (UNLIKELY((off += 1) > len))
528 |                         return FD_ERR_PARTIAL;
529 |                     instr->disp = (int8_t) LOAD_LE_1(dispbase) * (1 << dispscale);
530 |                 } else if (op_byte & 0x80 || (op_byte & 0xc7) == 0x06) {
531 |                     if (UNLIKELY((off += 2) > len))
532 |                         return FD_ERR_PARTIAL;
533 |                     instr->disp = (int16_t) LOAD_LE_2(dispbase);
534 |                 } else {
535 |                     instr->disp = 0;
536 |                 }
537 |                 goto end_modrm;
538 |             }
539 | 
540 |             // SIB byte
541 |             uint8_t base = rm;
542 |             if (rm == 4) {
543 |                 if (UNLIKELY(off >= len))
544 |                     return FD_ERR_PARTIAL;
545 |                 uint8_t sib = buffer[off++];
546 |                 unsigned scale = sib & 0xc0;
547 |                 unsigned idx = (sib & 0x38) >> 3;
548 |                 idx += prefix_rex & PREFIX_REXX ? 8 : 0;
549 |                 base = sib & 0x07;
550 |                 if (idx == 4)
551 |                     idx = FD_REG_NONE;
552 |                 op_modrm->misc = scale | idx;
553 |             } else {
554 |                 op_modrm->misc = FD_REG_NONE;
555 |             }
556 | 
557 |             if (UNLIKELY(DESC_VSIB(desc))) {
558 |                 // VSIB must have a memory operand with SIB byte.
559 |                 if (rm != 4)
560 |                     return FD_ERR_UD;
561 |                 _Static_assert(FD_REG_NONE == 0x3f, "unexpected FD_REG_NONE");
562 |                 // idx 4 is valid for VSIB
563 |                 if ((op_modrm->misc & 0x3f) == FD_REG_NONE)
564 |                     op_modrm->misc &= 0xc4;
565 |                 if (prefix_evex) // EVEX.V':EVEX.X:SIB.idx
566 |                     op_modrm->misc |= prefix_evex & 0x8 ? 0 : 0x10;
567 |             }
568 | 
569 |             // RIP-relative addressing only if SIB-byte is absent
570 |             if (op_byte < 0x40 && rm == 5 && mode == DECODE_64)
571 |                 op_modrm->reg = FD_REG_IP;
572 |             else if (op_byte < 0x40 && base == 5)
573 |                 op_modrm->reg = FD_REG_NONE;
574 |             else
575 |                 op_modrm->reg = base + (prefix_rex & PREFIX_REXB ? 8 : 0);
576 | 
577 |             const uint8_t* dispbase = &buffer[off];
578 |             if (op_byte & 0x40) {
579 |                 if (UNLIKELY((off += 1) > len))
580 |                     return FD_ERR_PARTIAL;
581 |                 instr->disp = (int8_t) LOAD_LE_1(dispbase) * (1 << dispscale);
582 |             } else if (op_byte & 0x80 || (op_byte < 0x40 && base == 5)) {
583 |                 if (UNLIKELY((off += 4) > len))
584 |                     return FD_ERR_PARTIAL;
585 |                 instr->disp = (int32_t) LOAD_LE_4(dispbase);
586 |             } else {
587 |                 instr->disp = 0;
588 |             }
589 |         end_modrm:;
590 |         }
591 |     }
592 | 
593 |     if (UNLIKELY(DESC_HAS_VEXREG(desc)))
594 |     {
595 |         FdOp* operand = &instr->operands[DESC_VEXREG_IDX(desc)];
596 |         if (DESC_ZEROREG_VAL(desc)) {
597 |             operand->type = FD_OT_REG;
598 |             operand->size = 1;
599 |             operand->reg = FD_REG_CL;
600 |             operand->misc = FD_RT_GPL;
601 |         } else {
602 |             operand->type = FD_OT_REG;
603 |             // Without VEX prefix, this encodes an implicit register
604 |             operand->size = operand_sizes[DESC_VEXREG_SIZE(desc)];
605 |             if (mode == DECODE_32)
606 |                 vex_operand &= 0x7;
607 |             // Note: 32-bit will never UD here. EVEX.V' is caught above already.
608 |             // Note: UD if > 16 for non-VEC. No EVEX-encoded instruction uses
609 |             // EVEX.vvvv to refer to non-vector registers. Verified in parseinstrs.
610 |             operand->reg = vex_operand;
611 | 
612 |             unsigned reg_ty = DESC_REGTY_VEXREG(desc); // VEC GPL MSK FPU/TMM
613 |             if (prefix_rex & PREFIX_VEX) { // TMM with VEX, FPU otherwise
614 |                 // In 64-bit mode: UD if FD_RT_MASK and vex_operand&8 != 0
615 |                 if (reg_ty == 2 && vex_operand >= 8)
616 |                     return FD_ERR_UD;
617 |                 if (UNLIKELY(reg_ty == 3)) // TMM
618 |                     operand->reg &= 0x7; // TODO: verify
619 |                 operand->misc = (06710 >> (3 * reg_ty)) & 0x7;
620 |             } else {
621 |                 operand->misc = (04710 >> (3 * reg_ty)) & 0x7;
622 |             }
623 |         }
624 |     }
625 |     else if (vex_operand != 0)
626 |     {
627 |         // TODO: bit 3 ignored in 32-bit mode? unverified
628 |         return FD_ERR_UD;
629 |     }
630 | 
631 |     uint32_t imm_control = UNLIKELY(DESC_IMM_CONTROL(desc));
632 |     if (LIKELY(!imm_control)) {
633 |     } else if (UNLIKELY(imm_control == 1))
634 |     {
635 |         // 1 = immediate constant 1, used for shifts
636 |         FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
637 |         operand->type = FD_OT_IMM;
638 |         operand->size = 1;
639 |         instr->imm = 1;
640 |     }
641 |     else if (UNLIKELY(imm_control == 2))
642 |     {
643 |         // 2 = memory, address-sized, used for mov with moffs operand
644 |         FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
645 |         operand->type = FD_OT_MEM;
646 |         operand->size = operand_sizes[DESC_IMM_SIZE(desc)];
647 |         operand->reg = FD_REG_NONE;
648 |         operand->misc = FD_REG_NONE;
649 | 
650 |         int moffsz = 1 << addr_size;
651 |         if (UNLIKELY(off + moffsz > len))
652 |             return FD_ERR_PARTIAL;
653 |         if (moffsz == 2)
654 |             instr->disp = LOAD_LE_2(&buffer[off]);
655 |         if (moffsz == 4)
656 |             instr->disp = LOAD_LE_4(&buffer[off]);
657 |         if (LIKELY(moffsz == 8))
658 |             instr->disp = LOAD_LE_8(&buffer[off]);
659 |         off += moffsz;
660 |     }
661 |     else if (UNLIKELY(imm_control == 3))
662 |     {
663 |         // 3 = register in imm8[7:4], used for RVMR encoding with VBLENDVP[SD]
664 |         FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
665 |         operand->type = FD_OT_REG;
666 |         operand->size = op_size;
667 |         operand->misc = FD_RT_VEC;
668 | 
669 |         if (UNLIKELY(off + 1 > len))
670 |             return FD_ERR_PARTIAL;
671 |         uint8_t reg = (uint8_t) LOAD_LE_1(&buffer[off]);
672 |         off += 1;
673 | 
674 |         if (mode == DECODE_32)
675 |             reg &= 0x7f;
676 |         operand->reg = reg >> 4;
677 |         instr->imm = reg & 0x0f;
678 |     }
679 |     else if (imm_control != 0)
680 |     {
681 |         // 4/5 = immediate, operand-sized/8 bit
682 |         // 6/7 = offset, operand-sized/8 bit (used for jumps/calls)
683 |         int imm_byte = imm_control & 1;
684 |         int imm_offset = imm_control & 2;
685 | 
686 |         FdOp* operand = &instr->operands[DESC_IMM_IDX(desc)];
687 |         operand->type = FD_OT_IMM;
688 | 
689 |         if (imm_byte) {
690 |             if (UNLIKELY(off + 1 > len))
691 |                 return FD_ERR_PARTIAL;
692 |             instr->imm = (int8_t) LOAD_LE_1(&buffer[off++]);
693 |             operand->size = DESC_IMM_SIZE(desc) & 1 ? 1 : op_size;
694 |         } else {
695 |             operand->size = operand_sizes[DESC_IMM_SIZE(desc)];
696 | 
697 |             uint8_t imm_size;
698 |             if (UNLIKELY(instr->type == FDI_RET || instr->type == FDI_RETF ||
699 |                          instr->type == FDI_SSE_EXTRQ ||
700 |                          instr->type == FDI_SSE_INSERTQ))
701 |                 imm_size = 2;
702 |             else if (UNLIKELY(instr->type == FDI_JMPF || instr->type == FDI_CALLF))
703 |                 imm_size = (1 << op_size >> 1) + 2;
704 |             else if (UNLIKELY(instr->type == FDI_ENTER))
705 |                 imm_size = 3;
706 |             else if (instr->type == FDI_MOVABS)
707 |                 imm_size = (1 << op_size >> 1);
708 |             else
709 |                 imm_size = op_size == 2 ? 2 : 4;
710 | 
711 |             if (UNLIKELY(off + imm_size > len))
712 |                 return FD_ERR_PARTIAL;
713 | 
714 |             if (imm_size == 2)
715 |                 instr->imm = (int16_t) LOAD_LE_2(&buffer[off]);
716 |             else if (imm_size == 3)
717 |                 instr->imm = LOAD_LE_3(&buffer[off]);
718 |             else if (imm_size == 4)
719 |                 instr->imm = (int32_t) LOAD_LE_4(&buffer[off]);
720 |             else if (imm_size == 6)
721 |                 instr->imm = LOAD_LE_4(&buffer[off]) | LOAD_LE_2(&buffer[off+4]) << 32;
722 |             else if (imm_size == 8)
723 |                 instr->imm = (int64_t) LOAD_LE_8(&buffer[off]);
724 |             off += imm_size;
725 |         }
726 | 
727 |         if (imm_offset)
728 |         {
729 |             if (instr->address != 0)
730 |                 instr->imm += instr->address + off;
731 |             else
732 |                 operand->type = FD_OT_OFF;
733 |         }
734 |     }
735 | 
736 | skip_modrm:
737 |     if (UNLIKELY(prefixes[PF_LOCK])) {
738 |         if (!DESC_LOCK(desc) || instr->operands[0].type != FD_OT_MEM)
739 |             return FD_ERR_UD;
740 |         instr->flags |= FD_FLAG_LOCK;
741 |     }
742 | 
743 |     if (UNLIKELY(DESC_LEGACY(desc))) {
744 |         // Without REX prefix, convert one-byte GP regs to high-byte regs
745 |         // This actually only applies to SZ8/MOVSX/MOVZX; but no VEX-encoded
746 |         // instructions have a byte-sized GP register in the first two operands.
747 |         if (!(prefix_rex & PREFIX_REX)) {
748 |             for (int i = 0; i < 2; i++) {
749 |                 FdOp* operand = &instr->operands[i];
750 |                 if (operand->type == FD_OT_NONE)
751 |                     break;
752 |                 if (operand->type == FD_OT_REG && operand->misc == FD_RT_GPL &&
753 |                     operand->size == 1 && operand->reg >= 4)
754 |                     operand->misc = FD_RT_GPH;
755 |             }
756 |         }
757 | 
758 |         if (instr->type == FDI_XCHG_NOP) {
759 |             // Only 4890, 90, and 6690 are true NOPs.
760 |             if (instr->operands[0].reg == 0) {
761 |                 instr->operands[0].type = FD_OT_NONE;
762 |                 instr->operands[1].type = FD_OT_NONE;
763 |                 instr->type = FD_HAS_REP(instr) ? FDI_PAUSE : FDI_NOP;
764 |             } else if ((instr->operands[0].reg & 7) == 0 && FD_HAS_REP(instr)) {
765 |                 // On Intel, REX.B is ignored for F3.90.
766 |                 instr->operands[0].type = FD_OT_NONE;
767 |                 instr->operands[1].type = FD_OT_NONE;
768 |                 instr->type = FDI_PAUSE;
769 |             } else {
770 |                 instr->type = FDI_XCHG;
771 |             }
772 |         }
773 | 
774 |         if (UNLIKELY(instr->type == FDI_3DNOW)) {
775 |             unsigned opc3dn = instr->imm;
776 |             if (opc3dn & 0x40)
777 |                 return FD_ERR_UD;
778 |             uint64_t msk = opc3dn & 0x80 ? 0x88d144d144d14400 : 0x30003000;
779 |             if (!(msk >> (opc3dn & 0x3f) & 1))
780 |                 return FD_ERR_UD;
781 |         }
782 | 
783 |         instr->operandsz = UNLIKELY(DESC_INSTR_WIDTH(desc)) ? op_size - 1 : 0;
784 |     } else {
785 |         instr->operandsz = 0;
786 |     }
787 | 
788 |     instr->size = off;
789 | 
790 |     return off;
791 | }
792 | 


--------------------------------------------------------------------------------
/encode-test.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <inttypes.h>
 6 | 
 7 | #include <fadec-enc.h>
 8 | 
 9 | 
10 | static
11 | void
12 | print_hex(const uint8_t* buf, size_t len)
13 | {
14 |     for (size_t i = 0; i < len; i++)
15 |         printf("%02x", buf[i]);
16 | }
17 | 
18 | static
19 | int
20 | test(uint8_t* buf, const char* name, uint64_t mnem, uint64_t op0, uint64_t op1, uint64_t op2, uint64_t op3, const void* exp, size_t exp_len)
21 | {
22 |     memset(buf, 0, 16);
23 | 
24 |     uint8_t* inst = buf;
25 |     int res = fe_enc64(&inst, mnem, op0, op1, op2, op3);
26 |     if ((res != 0) != (exp_len == 0)) goto fail;
27 |     if (inst - buf != (ptrdiff_t) exp_len) goto fail;
28 |     if (memcmp(buf, exp, exp_len)) goto fail;
29 | 
30 |     return 0;
31 | 
32 | fail:
33 |     printf("Failed case %s:\n", name);
34 |     printf("  Exp (%2zu): ", exp_len);
35 |     print_hex(exp, exp_len);
36 |     printf("\n  Got (%2zd): ", inst - buf);
37 |     print_hex(buf, inst - buf);
38 |     printf("\n");
39 |     return -1;
40 | }
41 | 
42 | #define TEST2(str, exp, exp_len, mnem, flags, op0, op1, op2, op3, ...) test(buf, str, FE_ ## mnem|flags, op0, op1, op2, op3, exp, exp_len)
43 | #define TEST1(str, exp, ...) TEST2(str, exp, sizeof(exp)-1, __VA_ARGS__, 0, 0, 0, 0, 0)
44 | #define TEST(exp, ...) failed |= TEST1(#__VA_ARGS__, exp, __VA_ARGS__)
45 | 
46 | int
47 | main(int argc, char** argv)
48 | {
49 |     (void) argc; (void) argv;
50 | 
51 |     int failed = 0;
52 |     uint8_t buf[16];
53 | 
54 |     // VSIB encoding doesn't differ for this API
55 | #define FE_MEMV FE_MEM
56 | #define FE_PTR(off) ((intptr_t) buf + (off))
57 | #define FLAGMASK(flags, mask) (flags | FE_MASK(mask & 7))
58 | #include "encode-test.inc"
59 | 
60 |     puts(failed ? "Some tests FAILED" : "All tests PASSED");
61 |     return failed ? EXIT_FAILURE : EXIT_SUCCESS;
62 | }
63 | 


--------------------------------------------------------------------------------
/encode.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdbool.h>
  3 | #include <stddef.h>
  4 | #include <stdint.h>
  5 | 
  6 | #include <fadec-enc.h>
  7 | 
  8 | 
  9 | #ifdef __GNUC__
 10 | #define LIKELY(x) __builtin_expect((x), 1)
 11 | #define UNLIKELY(x) __builtin_expect((x), 0)
 12 | #else
 13 | #define LIKELY(x) (x)
 14 | #define UNLIKELY(x) (x)
 15 | #endif
 16 | 
 17 | #define OPC_66 0x80000
 18 | #define OPC_F2 0x100000
 19 | #define OPC_F3 0x200000
 20 | #define OPC_REXW 0x400000
 21 | #define OPC_LOCK 0x800000
 22 | #define OPC_VEXL0 0x1000000
 23 | #define OPC_VEXL1 0x1800000
 24 | #define OPC_EVEXL0 0x2000000
 25 | #define OPC_EVEXL1 0x2800000
 26 | #define OPC_EVEXL2 0x3000000
 27 | #define OPC_EVEXL3 0x3800000
 28 | #define OPC_EVEXB 0x4000000
 29 | #define OPC_VSIB 0x8000000
 30 | #define OPC_67 FE_ADDR32
 31 | #define OPC_SEG_MSK 0xe0000000
 32 | #define OPC_JMPL FE_JMPL
 33 | #define OPC_MASK_MSK 0xe00000000
 34 | #define OPC_EVEXZ 0x1000000000
 35 | #define OPC_USER_MSK (OPC_67|OPC_SEG_MSK|OPC_MASK_MSK)
 36 | #define OPC_FORCE_SIB 0x2000000000
 37 | #define OPC_DOWNGRADE_VEX 0x4000000000
 38 | #define OPC_DOWNGRADE_VEX_FLIPW 0x40000000000
 39 | #define OPC_EVEX_DISP8SCALE 0x38000000000
 40 | #define OPC_GPH_OP0 0x200000000000
 41 | #define OPC_GPH_OP1 0x400000000000
 42 | 
 43 | #define EPFX_REX_MSK 0x43f
 44 | #define EPFX_REX 0x20
 45 | #define EPFX_EVEX 0x40
 46 | #define EPFX_REXR 0x10
 47 | #define EPFX_REXX 0x08
 48 | #define EPFX_REXB 0x04
 49 | #define EPFX_REXR4 0x02
 50 | #define EPFX_REXB4 0x01
 51 | #define EPFX_REXX4 0x400
 52 | #define EPFX_VVVV_IDX 11
 53 | 
 54 | static bool op_mem(FeOp op) { return op < 0; }
 55 | static bool op_reg(FeOp op) { return op >= 0; }
 56 | static bool op_reg_gpl(FeOp op) { return (op & ~0x1f) == 0x100; }
 57 | static bool op_reg_gph(FeOp op) { return (op & ~0x3) == 0x204; }
 58 | static bool op_reg_xmm(FeOp op) { return (op & ~0x1f) == 0x600; }
 59 | static int64_t op_mem_offset(FeOp op) { return (int32_t) op; }
 60 | static unsigned op_mem_base(FeOp op) { return (op >> 32) & 0xfff; }
 61 | static unsigned op_mem_idx(FeOp op) { return (op >> 44) & 0xfff; }
 62 | static unsigned op_mem_scale(FeOp op) { return (op >> 56) & 0xf; }
 63 | static unsigned op_reg_idx(FeOp op) { return op & 0xff; }
 64 | static bool op_imm_n(FeOp imm, unsigned immsz) {
 65 |     if (immsz == 0 && !imm) return true;
 66 |     if (immsz == 1 && (int8_t) imm == imm) return true;
 67 |     if (immsz == 2 && (int16_t) imm == imm) return true;
 68 |     if (immsz == 3 && (imm&0xffffff) == imm) return true;
 69 |     if (immsz == 4 && (int32_t) imm == imm) return true;
 70 |     if (immsz == 8 && (int64_t) imm == imm) return true;
 71 |     return false;
 72 | }
 73 | 
 74 | static
 75 | unsigned
 76 | opc_size(uint64_t opc, uint64_t epfx)
 77 | {
 78 |     unsigned res = 1;
 79 |     if (UNLIKELY(opc & OPC_EVEXL0)) {
 80 |         res += 4;
 81 |     } else if (UNLIKELY(opc & OPC_VEXL0)) {
 82 |         if (opc & (OPC_REXW|0x20000) || epfx & (EPFX_REXX|EPFX_REXB))
 83 |             res += 3;
 84 |         else
 85 |             res += 2;
 86 |     } else {
 87 |         if (opc & OPC_LOCK) res++;
 88 |         if (opc & OPC_66) res++;
 89 |         if (opc & (OPC_F2|OPC_F3)) res++;
 90 |         if (opc & OPC_REXW || epfx & EPFX_REX_MSK) res++;
 91 |         if (opc & 0x30000) res++;
 92 |         if (opc & 0x20000) res++;
 93 |     }
 94 |     if (opc & OPC_SEG_MSK) res++;
 95 |     if (opc & OPC_67) res++;
 96 |     if (opc & 0x8000) res++;
 97 |     return res;
 98 | }
 99 | 
100 | static
101 | int
102 | enc_opc(uint8_t** restrict buf, uint64_t opc, uint64_t epfx)
103 | {
104 |     if (opc & OPC_SEG_MSK)
105 |         *(*buf)++ = (0x65643e362e2600 >> (8 * ((opc >> 29) & 7))) & 0xff;
106 |     if (opc & OPC_67) *(*buf)++ = 0x67;
107 |     if (opc & OPC_EVEXL0) {
108 |         *(*buf)++ = 0x62;
109 |         unsigned b1 = opc >> 16 & 7;
110 |         if (!(epfx & EPFX_REXR)) b1 |= 0x80;
111 |         if (!(epfx & EPFX_REXX)) b1 |= 0x40;
112 |         if (!(epfx & EPFX_REXB)) b1 |= 0x20;
113 |         if (!(epfx & EPFX_REXR4)) b1 |= 0x10;
114 |         if ((epfx & EPFX_REXB4)) b1 |= 0x08;
115 |         *(*buf)++ = b1;
116 |         unsigned b2 = opc >> 20 & 3;
117 |         if (!(epfx & EPFX_REXX4)) b2 |= 0x04;
118 |         b2 |= (~(epfx >> EPFX_VVVV_IDX) & 0xf) << 3;
119 |         if (opc & OPC_REXW) b2 |= 0x80;
120 |         *(*buf)++ = b2;
121 |         unsigned b3 = opc >> 33 & 7;
122 |         b3 |= (~(epfx >> EPFX_VVVV_IDX) & 0x10) >> 1;
123 |         if (opc & OPC_EVEXB) b3 |= 0x10;
124 |         b3 |= (opc >> 23 & 3) << 5;
125 |         if (opc & OPC_EVEXZ) b3 |= 0x80;
126 |         *(*buf)++ = b3;
127 |     } else if (opc & OPC_VEXL0) {
128 |         if (epfx & (EPFX_REXR4|EPFX_REXX4|EPFX_REXB4|(0x10<<EPFX_VVVV_IDX))) return -1;
129 |         bool vex3 = opc & (OPC_REXW|0x20000) || epfx & (EPFX_REXX|EPFX_REXB);
130 |         unsigned pp = opc >> 20 & 3;
131 |         *(*buf)++ = 0xc4 | !vex3;
132 |         unsigned b2 = pp | (opc & 0x800000 ? 0x4 : 0);
133 |         if (vex3) {
134 |             unsigned b1 = opc >> 16 & 7;
135 |             if (!(epfx & EPFX_REXR)) b1 |= 0x80;
136 |             if (!(epfx & EPFX_REXX)) b1 |= 0x40;
137 |             if (!(epfx & EPFX_REXB)) b1 |= 0x20;
138 |             *(*buf)++ = b1;
139 |             if (opc & OPC_REXW) b2 |= 0x80;
140 |         } else {
141 |             if (!(epfx & EPFX_REXR)) b2 |= 0x80;
142 |         }
143 |         b2 |= (~(epfx >> EPFX_VVVV_IDX) & 0xf) << 3;
144 |         *(*buf)++ = b2;
145 |     } else {
146 |         if (opc & OPC_LOCK) *(*buf)++ = 0xF0;
147 |         if (opc & OPC_66) *(*buf)++ = 0x66;
148 |         if (opc & OPC_F2) *(*buf)++ = 0xF2;
149 |         if (opc & OPC_F3) *(*buf)++ = 0xF3;
150 |         if (opc & OPC_REXW || epfx & (EPFX_REX_MSK)) {
151 |             unsigned rex = 0x40;
152 |             if (opc & OPC_REXW) rex |= 8;
153 |             if (epfx & EPFX_REXR) rex |= 4;
154 |             if (epfx & EPFX_REXX) rex |= 2;
155 |             if (epfx & EPFX_REXB) rex |= 1;
156 |             *(*buf)++ = rex;
157 |         }
158 |         if (opc & 0x30000) *(*buf)++ = 0x0F;
159 |         if ((opc & 0x30000) == 0x20000) *(*buf)++ = 0x38;
160 |         if ((opc & 0x30000) == 0x30000) *(*buf)++ = 0x3A;
161 |     }
162 |     *(*buf)++ = opc & 0xff;
163 |     if (opc & 0x8000) *(*buf)++ = (opc >> 8) & 0xff;
164 |     return 0;
165 | }
166 | 
167 | static
168 | int
169 | enc_imm(uint8_t** restrict buf, uint64_t imm, unsigned immsz)
170 | {
171 |     if (!op_imm_n(imm, immsz)) return -1;
172 |     for (unsigned i = 0; i < immsz; i++)
173 |         *(*buf)++ = imm >> 8 * i;
174 |     return 0;
175 | }
176 | 
177 | static
178 | int
179 | enc_o(uint8_t** restrict buf, uint64_t opc, uint64_t epfx, uint64_t op0)
180 | {
181 |     if (op_reg_idx(op0) & 0x8) epfx |= EPFX_REXB;
182 | 
183 |     // NB: this cannot happen. There is only one O-encoded instruction which
184 |     // accepts high-byte registers (b0+/MOVABS Rb,Ib), which will never have a
185 |     // REx prefix if the operand is a high-byte register.
186 |     // bool has_rex = opc & OPC_REXW || epfx & EPFX_REX_MSK;
187 |     // if (has_rex && op_reg_gph(op0)) return -1;
188 | 
189 |     if (enc_opc(buf, opc, epfx)) return -1;
190 |     *(*buf - 1) = (*(*buf - 1) & 0xf8) | (op_reg_idx(op0) & 0x7);
191 |     return 0;
192 | }
193 | 
194 | static
195 | int
196 | enc_mr(uint8_t** restrict buf, uint64_t opc, uint64_t epfx, uint64_t op0,
197 |        uint64_t op1, unsigned immsz)
198 | {
199 |     // If !op_reg(op1), it is a constant value for ModRM.reg
200 |     if (op_reg(op0) && (op_reg_idx(op0) & 0x8)) epfx |= EPFX_REXB;
201 |     if (op_reg(op0) && (op_reg_idx(op0) & 0x10))
202 |         epfx |= 0 ? EPFX_REXB4 : EPFX_REXX|EPFX_EVEX;
203 |     if (op_mem(op0) && (op_mem_base(op0) & 0x8)) epfx |= EPFX_REXB;
204 |     if (op_mem(op0) && (op_mem_base(op0) & 0x10)) epfx |= EPFX_REXB4;
205 |     if (op_mem(op0) && (op_mem_idx(op0) & 0x8)) epfx |= EPFX_REXX;
206 |     if (op_mem(op0) && (op_mem_idx(op0) & 0x10))
207 |         epfx |= opc & OPC_VSIB ? 0x10<<EPFX_VVVV_IDX : EPFX_REXX4;
208 |     if (op_reg(op1) && (op_reg_idx(op1) & 0x8)) epfx |= EPFX_REXR;
209 |     if (op_reg(op1) && (op_reg_idx(op1) & 0x10)) epfx |= EPFX_REXR4;
210 | 
211 |     bool has_rex = opc & (OPC_REXW|OPC_VEXL0|OPC_EVEXL0) || (epfx & EPFX_REX_MSK);
212 |     if (has_rex && (op_reg_gph(op0) || op_reg_gph(op1))) return -1;
213 | 
214 |     if (epfx & (EPFX_EVEX|EPFX_REXB4|EPFX_REXX4|EPFX_REXR4|(0x10<<EPFX_VVVV_IDX))) {
215 |         if (!(opc & OPC_EVEXL0)) return -1;
216 |     } else if (opc & OPC_DOWNGRADE_VEX) { // downgrade EVEX to VEX
217 |         // clear EVEX and disp8scale, set VEX
218 |         opc = (opc & ~(uint64_t) (OPC_EVEXL0|OPC_EVEX_DISP8SCALE)) | OPC_VEXL0;
219 |         if (opc & OPC_DOWNGRADE_VEX_FLIPW)
220 |             opc ^= OPC_REXW;
221 |     }
222 | 
223 |     if (LIKELY(op_reg(op0))) {
224 |         if (enc_opc(buf, opc, epfx)) return -1;
225 |         *(*buf)++ = 0xc0 | ((op_reg_idx(op1) & 7) << 3) | (op_reg_idx(op0) & 7);
226 |         return 0;
227 |     }
228 | 
229 |     unsigned opcsz = opc_size(opc, epfx);
230 | 
231 |     int mod = 0, reg = op1 & 7, rm;
232 |     int scale = 0, idx = 4, base = 0;
233 |     int32_t off = op_mem_offset(op0);
234 |     bool withsib = opc & OPC_FORCE_SIB;
235 | 
236 |     if (!!op_mem_idx(op0) != !!op_mem_scale(op0)) return -1;
237 |     if (!op_mem_idx(op0) && (opc & OPC_VSIB)) return -1;
238 |     if (op_mem_idx(op0))
239 |     {
240 |         if (opc & OPC_VSIB)
241 |         {
242 |             if (!op_reg_xmm(op_mem_idx(op0))) return -1;
243 |             // EVEX VSIB requires non-zero opmask
244 |             if ((opc & OPC_EVEXL0) && !(opc & OPC_MASK_MSK)) return -1;
245 |         }
246 |         else
247 |         {
248 |             if (!op_reg_gpl(op_mem_idx(op0))) return -1;
249 |             if (op_reg_idx(op_mem_idx(op0)) == 4) return -1;
250 |         }
251 |         idx = op_mem_idx(op0) & 7;
252 |         int scalabs = op_mem_scale(op0);
253 |         if (scalabs & (scalabs - 1)) return -1;
254 |         scale = (scalabs & 0xA ? 1 : 0) | (scalabs & 0xC ? 2 : 0);
255 |         withsib = true;
256 |     }
257 | 
258 |     unsigned dispsz = 0;
259 |     if (!op_mem_base(op0))
260 |     {
261 |         base = 5;
262 |         rm = 4;
263 |         dispsz = 4;
264 |     }
265 |     else if (op_mem_base(op0) == FE_IP)
266 |     {
267 |         rm = 5;
268 |         dispsz = 4;
269 |         // Adjust offset, caller doesn't know instruction length.
270 |         off -= opcsz + 5 + immsz;
271 |         if (withsib) return -1;
272 |     }
273 |     else
274 |     {
275 |         if (!op_reg_gpl(op_mem_base(op0))) return -1;
276 |         rm = op_reg_idx(op_mem_base(op0)) & 7;
277 |         if (withsib || rm == 4) {
278 |             base = rm;
279 |             rm = 4;
280 |         }
281 |         if (off) {
282 |             unsigned disp8scale = (opc & OPC_EVEX_DISP8SCALE) >> 39;
283 |             if (!(off & ((1 << disp8scale) - 1)) && op_imm_n(off >> disp8scale, 1)) {
284 |                 mod = 0x40;
285 |                 dispsz = 1;
286 |                 off >>= disp8scale;
287 |             } else {
288 |                 mod = 0x80;
289 |                 dispsz = 4;
290 |             }
291 |         } else if (rm == 5) {
292 |             mod = 0x40;
293 |             dispsz = 1;
294 |         }
295 |     }
296 | 
297 |     if (opcsz + 1 + (rm == 4) + dispsz + immsz > 15) return -1;
298 | 
299 |     if (enc_opc(buf, opc, epfx)) return -1;
300 |     *(*buf)++ = mod | (reg << 3) | rm;
301 |     if (UNLIKELY(rm == 4))
302 |         *(*buf)++ = (scale << 6) | (idx << 3) | base;
303 |     return enc_imm(buf, off, dispsz);
304 | }
305 | 
306 | typedef enum {
307 |     ENC_NP, ENC_M, ENC_R, ENC_M1, ENC_MC, ENC_MR, ENC_RM, ENC_RMA, ENC_MRC,
308 |     ENC_AM, ENC_MA, ENC_I, ENC_O, ENC_OA, ENC_S, ENC_A, ENC_D, ENC_FD, ENC_TD,
309 |     ENC_IM,
310 |     ENC_RVM, ENC_RVMR, ENC_RMV, ENC_VM, ENC_MVR, ENC_MRV,
311 |     ENC_MAX
312 | } Encoding;
313 | 
314 | struct EncodingInfo {
315 |     uint8_t modrm : 2;
316 |     uint8_t modreg : 2;
317 |     uint8_t vexreg : 2;
318 |     uint8_t immidx : 2;
319 |     // 0 = normal or jump, 1 = constant 1, 2 = address-size, 3 = RVMR
320 |     uint8_t immctl : 3;
321 |     uint8_t zregidx : 2;
322 |     uint8_t zregval : 1;
323 | };
324 | 
325 | const struct EncodingInfo encoding_infos[ENC_MAX] = {
326 |     [ENC_NP]      = { 0 },
327 |     [ENC_M]       = { .modrm = 0x0^3, .immidx = 1 },
328 |     [ENC_R]       = { .modreg = 0x0^3 },
329 |     [ENC_M1]      = { .modrm = 0x0^3, .immctl = 1, .immidx = 1 },
330 |     [ENC_MC]      = { .modrm = 0x0^3, .zregidx = 0x1^3, .zregval = 1 },
331 |     [ENC_MR]      = { .modrm = 0x0^3, .modreg = 0x1^3, .immidx = 2 },
332 |     [ENC_RM]      = { .modrm = 0x1^3, .modreg = 0x0^3, .immidx = 2 },
333 |     [ENC_RMA]     = { .modrm = 0x1^3, .modreg = 0x0^3, .zregidx = 0x2^3, .zregval = 0 },
334 |     [ENC_MRC]     = { .modrm = 0x0^3, .modreg = 0x1^3, .zregidx = 0x2^3, .zregval = 1 },
335 |     [ENC_AM]      = { .modrm = 0x1^3, .zregidx = 0x0^3, .zregval = 0 },
336 |     [ENC_MA]      = { .modrm = 0x0^3, .zregidx = 0x1^3, .zregval = 0 },
337 |     [ENC_I]       = { .immidx = 0 },
338 |     [ENC_O]       = { .modreg = 0x0^3, .immidx = 1 },
339 |     [ENC_OA]      = { .modreg = 0x0^3, .zregidx = 0x1^3, .zregval = 0 },
340 |     [ENC_S]       = { 0 },
341 |     [ENC_A]       = { .zregidx = 0x0^3, .zregval = 0, .immidx = 1 },
342 |     [ENC_D]       = { .immidx = 0 },
343 |     [ENC_FD]      = { .zregidx = 0x0^3, .zregval = 0, .immctl = 2, .immidx = 1 },
344 |     [ENC_TD]      = { .zregidx = 0x1^3, .zregval = 0, .immctl = 2, .immidx = 0 },
345 |     [ENC_IM]      = { .modrm = 0x1^3, .immidx = 0 },
346 |     [ENC_RVM]     = { .modrm = 0x2^3, .modreg = 0x0^3, .vexreg = 0x1^3, .immidx = 3 },
347 |     [ENC_RVMR]    = { .modrm = 0x2^3, .modreg = 0x0^3, .vexreg = 0x1^3, .immctl = 3, .immidx = 3 },
348 |     [ENC_RMV]     = { .modrm = 0x1^3, .modreg = 0x0^3, .vexreg = 0x2^3 },
349 |     [ENC_VM]      = { .modrm = 0x1^3, .vexreg = 0x0^3, .immidx = 2 },
350 |     [ENC_MVR]     = { .modrm = 0x0^3, .modreg = 0x2^3, .vexreg = 0x1^3 },
351 |     [ENC_MRV]     = { .modrm = 0x0^3, .modreg = 0x1^3, .vexreg = 0x2^3 },
352 | };
353 | 
354 | static const uint64_t alt_tab[] = {
355 | #include <fadec-encode-private.inc>
356 | };
357 | 
358 | int
359 | fe_enc64_impl(uint8_t** restrict buf, uint64_t opc, FeOp op0, FeOp op1,
360 |               FeOp op2, FeOp op3)
361 | {
362 |     uint8_t* buf_start = *buf;
363 |     uint64_t ops[4] = {op0, op1, op2, op3};
364 | 
365 |     uint64_t epfx = 0;
366 |     // Doesn't change between variants
367 |     if ((opc & OPC_GPH_OP0) && op_reg_gpl(op0) && op0 >= FE_SP)
368 |         epfx |= EPFX_REX;
369 |     else if (!(opc & OPC_GPH_OP0) && op_reg_gph(op0))
370 |         goto fail;
371 |     if ((opc & OPC_GPH_OP1) && op_reg_gpl(op1) && op1 >= FE_SP)
372 |         epfx |= EPFX_REX;
373 |     else if (!(opc & OPC_GPH_OP1) && op_reg_gph(op1))
374 |         goto fail;
375 | 
376 | try_encode:;
377 |     unsigned enc = (opc >> 51) & 0x1f;
378 |     const struct EncodingInfo* ei = &encoding_infos[enc];
379 | 
380 |     int64_t imm = 0xcc;
381 |     unsigned immsz = (opc >> 47) & 0xf;
382 | 
383 |     if (UNLIKELY(ei->zregidx && op_reg_idx(ops[ei->zregidx^3]) != ei->zregval))
384 |         goto next;
385 | 
386 |     if (UNLIKELY(enc == ENC_S)) {
387 |         if ((op_reg_idx(op0) << 3 & 0x20) != (opc & 0x20)) goto next;
388 |         opc |= op_reg_idx(op0) << 3;
389 |     }
390 | 
391 |     if (immsz) {
392 |         imm = ops[ei->immidx];
393 |         if (UNLIKELY(ei->immctl)) {
394 |             if (ei->immctl == 2) {
395 |                 immsz = UNLIKELY(opc & OPC_67) ? 4 : 8;
396 |                 if (immsz == 4) imm = (int32_t) imm; // address are zero-extended
397 |             } else if (ei->immctl == 3) {
398 |                 if (!op_reg_xmm(imm)) goto fail;
399 |                 imm = op_reg_idx(imm) << 4;
400 |                 if (!op_imm_n(imm, 1)) goto fail;
401 |             } else if (ei->immctl == 1) {
402 |                 if (imm != 1) goto next;
403 |                 immsz = 0;
404 |             }
405 |         } else {
406 |             if (enc == ENC_D) {
407 |                 if (UNLIKELY(opc & FE_JMPL) && opc >> 56) goto next;
408 |                 imm -= (int64_t) *buf + opc_size(opc, epfx) + immsz;
409 |             }
410 |             if (!op_imm_n(imm, immsz)) goto next;
411 |         }
412 |     }
413 | 
414 |     // NOP has no operands, so this must be the 32-bit OA XCHG
415 |     if ((opc & 0xfffffff) == 0x90 && ops[0] == FE_AX) goto next;
416 | 
417 |     if (UNLIKELY(enc == ENC_R)) {
418 |         if (enc_mr(buf, opc, epfx, 0, ops[0], immsz)) goto fail;
419 |     } else if (ei->modrm) {
420 |         FeOp modreg = ei->modreg ? ops[ei->modreg^3] : (opc & 0xff00) >> 8;
421 |         if (ei->vexreg)
422 |             epfx |= ((uint64_t) op_reg_idx(ops[ei->vexreg^3])) << EPFX_VVVV_IDX;
423 |         // Can fail for upgrade to EVEX due to high register numbers
424 |         if (enc_mr(buf, opc, epfx, ops[ei->modrm^3], modreg, immsz)) goto next;
425 |     } else if (ei->modreg) {
426 |         if (enc_o(buf, opc, epfx, ops[ei->modreg^3])) goto fail;
427 |     } else {
428 |         if (enc_opc(buf, opc, epfx)) goto fail;
429 |     }
430 | 
431 |     if (immsz)
432 |         if (enc_imm(buf, imm, immsz)) goto fail;
433 | 
434 |     return 0;
435 | 
436 | next:;
437 |     uint64_t alt = opc >> 56;
438 |     if (alt) { // try alternative encoding, if available
439 |         opc = alt_tab[alt] | (opc & OPC_USER_MSK);
440 |         goto try_encode;
441 |     }
442 | 
443 | fail:
444 |     // Don't advance buffer on error; though we shouldn't write anything.
445 |     *buf = buf_start;
446 |     return -1;
447 | }
448 | 


--------------------------------------------------------------------------------
/encode2-test.c:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <stdio.h>
 3 | #include <stdlib.h>
 4 | #include <string.h>
 5 | #include <inttypes.h>
 6 | 
 7 | #include <fadec-enc2.h>
 8 | 
 9 | 
10 | static
11 | void print_hex(const uint8_t* buf, size_t len) {
12 |     for (size_t i = 0; i < len; i++)
13 |         printf("%02x", buf[i]);
14 | }
15 | 
16 | static int
17 | check(const uint8_t* buf, const void* exp, size_t exp_len, unsigned res, const char* name) {
18 |     if (res == exp_len && !memcmp(buf, exp, exp_len))
19 |         return 0;
20 |     printf("Failed case (new) %s:\n", name);
21 |     printf("  Exp (%2zu): ", exp_len);
22 |     print_hex((const uint8_t*)exp, exp_len);
23 |     printf("\n  Got (%2u): ", res);
24 |     print_hex(buf, res);
25 |     printf("\n");
26 |     return -1;
27 | }
28 | 
29 | #define TEST1(str, exp, name, ...) do { \
30 |             memset(buf, 0, sizeof buf); \
31 |             unsigned res = fe64_ ## name(buf, __VA_ARGS__); \
32 |             failed |= check(buf, exp, sizeof(exp) - 1, res, str); \
33 |         } while (0)
34 | #define TEST(exp, ...) TEST1(#__VA_ARGS__, exp, __VA_ARGS__)
35 | 
36 | int
37 | main(void) {
38 |     int failed = 0;
39 |     uint8_t buf[16];
40 | 
41 |     // This API is type safe and prohibits compilation of reg-type mismatches
42 | #define ENC_TEST_TYPESAFE
43 |     // Silence -Warray-bounds with double cast
44 | #define FE_PTR(off) (const void*) ((uintptr_t) buf + (off))
45 | #define FLAGMASK(flags, mask) flags, mask
46 | #include "encode-test.inc"
47 | 
48 |     TEST("\x90", NOP, 0);
49 |     TEST("\x90", NOP, 1);
50 |     TEST("\x66\x90", NOP, 2);
51 |     TEST("\x0f\x1f\x00", NOP, 3);
52 |     TEST("\x0f\x1f\x40\x00", NOP, 4);
53 |     TEST("\x0f\x1f\x44\x00\x00", NOP, 5);
54 |     TEST("\x66\x0f\x1f\x44\x00\x00", NOP, 6);
55 |     TEST("\x0f\x1f\x80\x00\x00\x00\x00", NOP, 7);
56 |     TEST("\x0f\x1f\x84\x00\x00\x00\x00\x00", NOP, 8);
57 |     TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00", NOP, 9);
58 |     TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00\x90", NOP, 10);
59 |     TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00\x66\x90", NOP, 11);
60 |     TEST("\x66\x0f\x1f\x84\x00\x00\x00\x00\x00\x0f\x1f\x00", NOP, 12);
61 | 
62 |     puts(failed ? "Some tests FAILED" : "All tests PASSED");
63 |     return failed ? EXIT_FAILURE : EXIT_SUCCESS;
64 | }
65 | 


--------------------------------------------------------------------------------
/encode2-test.cc:
--------------------------------------------------------------------------------
 1 | 
 2 | #include <array>
 3 | #include <cstring>
 4 | #include <cstdio>
 5 | #include <cstdlib>
 6 | 
 7 | #include <fadec-enc2.h>
 8 | 
 9 | 
10 | using Buffer = std::array<uint8_t, 16>;
11 | 
12 | static
13 | void print_hex(const uint8_t* buf, size_t len) {
14 |     for (size_t i = 0; i < len; i++)
15 |         std::printf("%02x", buf[i]);
16 | }
17 | 
18 | static int
19 | check(const Buffer& buf, const char* exp, size_t exp_len, unsigned res, const char* name) {
20 |     if (res == exp_len && !std::memcmp(buf.data(), exp, exp_len))
21 |         return 0;
22 |     std::printf("Failed case (new) %s:\n", name);
23 |     std::printf("  Exp (%2zu): ", exp_len);
24 |     print_hex(reinterpret_cast<const uint8_t*>(exp), exp_len);
25 |     std::printf("\n  Got (%2u): ", res);
26 |     print_hex(buf.data(), res);
27 |     std::printf("\n");
28 |     return -1;
29 | }
30 | 
31 | #define TEST1(str, exp, name, ...) do { \
32 |             buf.fill(0); \
33 |             unsigned res = fe64_ ## name(buf.data(), __VA_ARGS__); \
34 |             failed |= check(buf, exp, sizeof(exp) - 1, res, str); \
35 |         } while (0)
36 | #define TEST(exp, ...) TEST1(#__VA_ARGS__, exp, __VA_ARGS__)
37 | 
38 | #define TEST_CPP1(str, exp, expr) do { \
39 |             buf.fill(0); \
40 |             unsigned res = (expr); \
41 |             failed |= check(buf, exp, sizeof(exp) - 1, res, str); \
42 |         } while (0)
43 | #define TEST_CPP(exp, ...) TEST_CPP1(#__VA_ARGS__, exp, __VA_ARGS__)
44 | 
45 | int main() {
46 |     int failed = 0;
47 |     Buffer buf{};
48 | 
49 |     // This API is type safe and prohibits compilation of reg-type mismatches
50 | #define ENC_TEST_TYPESAFE
51 |     // Silence -Warray-bounds with double cast
52 | #define FE_PTR(off) (const void*) ((uintptr_t) buf.data() + (off))
53 | #define FLAGMASK(flags, mask) flags, mask
54 | #include "encode-test.inc"
55 | 
56 |     // Test implicit conversion of parameters also on the actual functions
57 |     TEST_CPP("\x0f\x90\xc0", fe64_SETO8r(buf.data(), 0, FE_AX));
58 |     TEST_CPP("\x0f\x90\xc0", (fe64_SETO8r)(buf.data(), 0, FE_AX));
59 |     TEST_CPP("\x0f\x90\xc4", fe64_SETO8r(buf.data(), 0, FE_AH));
60 |     TEST_CPP("\x0f\x90\xc4", (fe64_SETO8r)(buf.data(), 0, FE_AH));
61 | 
62 |     std::puts(failed ? "Some tests FAILED" : "All tests PASSED");
63 |     return failed ? EXIT_FAILURE : EXIT_SUCCESS;
64 | }
65 | 


--------------------------------------------------------------------------------
/encode2.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdbool.h>
  3 | #include <stddef.h>
  4 | #include <stdint.h>
  5 | 
  6 | #include <fadec-enc2.h>
  7 | 
  8 | 
  9 | #ifdef __GNUC__
 10 | #define LIKELY(x) __builtin_expect(!!(x), 1)
 11 | #define UNLIKELY(x) __builtin_expect(!!(x), 0)
 12 | #define HINT_COLD __attribute__((cold))
 13 | #else
 14 | #define LIKELY(x) (x)
 15 | #define UNLIKELY(x) (x)
 16 | #define HINT_COLD
 17 | #endif
 18 | 
 19 | #define op_reg_idx(op) (op).idx
 20 | #define op_reg_gph(op) (((op).idx & ~0x3) == 0x24)
 21 | #define op_mem_base(mem) op_reg_idx((mem).base)
 22 | #define op_mem_idx(mem) op_reg_idx((mem).idx)
 23 | 
 24 | static bool
 25 | op_imm_n(int64_t imm, unsigned immsz) {
 26 |     if (immsz == 0 && !imm) return true;
 27 |     if (immsz == 1 && (int8_t) imm == imm) return true;
 28 |     if (immsz == 2 && (int16_t) imm == imm) return true;
 29 |     if (immsz == 3 && (imm&0xffffff) == imm) return true;
 30 |     if (immsz == 4 && (int32_t) imm == imm) return true;
 31 |     if (immsz == 8 && (int64_t) imm == imm) return true;
 32 |     return false;
 33 | }
 34 | 
 35 | HINT_COLD static unsigned
 36 | enc_seg67(uint8_t* buf, unsigned flags) {
 37 |     unsigned idx = 0;
 38 |     if (UNLIKELY(flags & FE_SEG_MASK)) {
 39 |         unsigned seg = (0x65643e362e2600 >> (8 * (flags & FE_SEG_MASK))) & 0xff;
 40 |         buf[idx++] = seg;
 41 |     }
 42 |     if (UNLIKELY(flags & FE_ADDR32)) buf[idx++] = 0x67;
 43 |     return idx;
 44 | }
 45 | 
 46 | static void
 47 | enc_imm(uint8_t* buf, uint64_t imm, unsigned immsz) {
 48 |     for (unsigned i = 0; i < immsz; i++)
 49 |         *buf++ = imm >> 8 * i;
 50 | }
 51 | 
 52 | static int
 53 | enc_mem_common(uint8_t* buf, unsigned ripoff, FeMem op0, uint64_t op1,
 54 |                unsigned sibidx, unsigned disp8scale) {
 55 |     int mod = 0, reg = op1 & 7, rm;
 56 |     unsigned sib = 0x20;
 57 |     bool withsib = false;
 58 |     unsigned dispsz = 0;
 59 |     int32_t off = op0.off;
 60 | 
 61 |     if (sibidx < 8) {
 62 |         int scalabs = op0.scale;
 63 |         if (scalabs & (scalabs - 1))
 64 |             return 0;
 65 |         unsigned scale = (scalabs & 0xA ? 1 : 0) | (scalabs & 0xC ? 2 : 0);
 66 |         sib = scale << 6 | sibidx << 3;
 67 |         withsib = true;
 68 |     }
 69 | 
 70 |     if (UNLIKELY(op0.base.idx >= 0x20)) {
 71 |         if (UNLIKELY(op0.base.idx >= op_reg_idx(FE_NOREG))) {
 72 |             *buf++ = (reg << 3) | 4;
 73 |             *buf++ = sib | 5;
 74 |             enc_imm(buf, off, 4);
 75 |             return 6;
 76 |         } else if (LIKELY(op0.base.idx == FE_IP.idx)) {
 77 |             if (withsib)
 78 |                 return 0;
 79 |             *buf++ = (reg << 3) | 5;
 80 |             // Adjust offset, caller doesn't know instruction length.
 81 |             enc_imm(buf, off - ripoff - 5, 4);
 82 |             return 5;
 83 |         } else {
 84 |             return 0;
 85 |         }
 86 |     }
 87 | 
 88 |     rm = op_reg_idx(op0.base) & 7;
 89 | 
 90 |     if (off) {
 91 |         if (LIKELY(!disp8scale)) {
 92 |             mod = (int8_t) off == off ? 0x40 : 0x80;
 93 |             dispsz = (int8_t) off == off ? 1 : 4;
 94 |         } else {
 95 |             if (!(off & ((1 << disp8scale) - 1)) && op_imm_n(off >> disp8scale, 1))
 96 |                 off >>= disp8scale, mod = 0x40, dispsz = 1;
 97 |             else
 98 |                 mod = 0x80, dispsz = 4;
 99 |         }
100 |     } else if (rm == 5) {
101 |         dispsz = 1;
102 |         mod = 0x40;
103 |     }
104 | 
105 |     // Always write four bytes of displacement. The buffer is always large
106 |     // enough, and we truncate by returning a smaller "written bytes" count.
107 |     if (withsib || rm == 4) {
108 |         *buf++ = mod | (reg << 3) | 4;
109 |         *buf++ = sib | rm;
110 |         enc_imm(buf, off, 4);
111 |         return 2 + dispsz;
112 |     } else {
113 |         *buf++ = mod | (reg << 3) | rm;
114 |         enc_imm(buf, off, 4);
115 |         return 1 + dispsz;
116 |     }
117 | }
118 | 
119 | static int
120 | enc_mem(uint8_t* buf, unsigned ripoff, FeMem op0, uint64_t op1, bool forcesib,
121 |         unsigned disp8scale) {
122 |     unsigned sibidx = forcesib ? 4 : 8;
123 |     if (op_reg_idx(op0.idx) < op_reg_idx(FE_NOREG)) {
124 |         if (!op0.scale)
125 |             return 0;
126 |         if (op_reg_idx(op0.idx) == 4)
127 |             return 0;
128 |         sibidx = op_reg_idx(op0.idx) & 7;
129 |     } else if (op0.scale) {
130 |         return 0;
131 |     }
132 |     return enc_mem_common(buf, ripoff, op0, op1, sibidx, disp8scale);
133 | }
134 | 
135 | static int
136 | enc_mem_vsib(uint8_t* buf, unsigned ripoff, FeMemV op0, uint64_t op1,
137 |              bool forcesib, unsigned disp8scale) {
138 |     (void) forcesib;
139 |     if (!op0.scale)
140 |         return 0;
141 |     FeMem mem = FE_MEM(op0.base, op0.scale, FE_NOREG, op0.off);
142 |     return enc_mem_common(buf, ripoff, mem, op1, op_reg_idx(op0.idx) & 7,
143 |                           disp8scale);
144 | }
145 | 
146 | // EVEX/VEX "Opcode" format:
147 | //
148 | // | EVEX byte 4 | P P M M M - - W | Opcode byte | VEX-D VEX-D-FLIPW
149 | // 0             8                 16            24
150 | 
151 | enum {
152 |     FE_OPC_VEX_WPP_SHIFT = 8,
153 |     FE_OPC_VEX_WPP_MASK = 0x83 << FE_OPC_VEX_WPP_SHIFT,
154 |     FE_OPC_VEX_MMM_SHIFT = 10,
155 |     FE_OPC_VEX_MMM_MASK = 0x1f << FE_OPC_VEX_MMM_SHIFT,
156 |     FE_OPC_VEX_DOWNGRADE_VEX = 1 << 24,
157 |     FE_OPC_VEX_DOWNGRADE_VEX_FLIPW = 1 << 25,
158 | };
159 | 
160 | static int
161 | enc_vex_common(uint8_t* buf, unsigned opcode, unsigned base,
162 |                unsigned idx, unsigned reg, unsigned vvvv) {
163 |     if ((base | idx | reg | vvvv) & 0x10) return 0;
164 |     bool vex3 = ((base | idx) & 0x08) || (opcode & 0xfc00) != 0x0400;
165 |     if (vex3) {
166 |         *buf++ = 0xc4;
167 |         unsigned b1 = (opcode & FE_OPC_VEX_MMM_MASK) >> FE_OPC_VEX_MMM_SHIFT;
168 |         if (!(reg & 0x08)) b1 |= 0x80;
169 |         if (!(idx & 0x08)) b1 |= 0x40;
170 |         if (!(base & 0x08)) b1 |= 0x20;
171 |         *buf++ = b1;
172 |         unsigned b2 = (opcode & FE_OPC_VEX_WPP_MASK) >> FE_OPC_VEX_WPP_SHIFT;
173 |         if (opcode & 0x20) b2 |= 0x04;
174 |         b2 |= (vvvv ^ 0xf) << 3;
175 |         *buf++ = b2;
176 |     } else {
177 |         *buf++ = 0xc5;
178 |         unsigned b2 = opcode >> FE_OPC_VEX_WPP_SHIFT & 3;
179 |         if (opcode & 0x20) b2 |= 0x04;
180 |         if (!(reg & 0x08)) b2 |= 0x80;
181 |         b2 |= (vvvv ^ 0xf) << 3;
182 |         *buf++ = b2;
183 |     }
184 |     *buf++ = (opcode & 0xff0000) >> 16;
185 |     return 3 + vex3;
186 | }
187 | 
188 | static int
189 | enc_vex_reg(uint8_t* buf, unsigned opcode, uint64_t rm, uint64_t reg,
190 |             uint64_t vvvv) {
191 |     unsigned off = enc_vex_common(buf, opcode, rm, 0, reg, vvvv);
192 |     buf[off] = 0xc0 | (reg << 3 & 0x38) | (rm & 7);
193 |     return off ? off + 1 : 0;
194 | }
195 | 
196 | static int
197 | enc_vex_mem(uint8_t* buf, unsigned opcode, FeMem rm, uint64_t reg,
198 |             uint64_t vvvv, unsigned ripoff, bool forcesib, unsigned disp8scale) {
199 |     unsigned off = enc_vex_common(buf, opcode, op_reg_idx(rm.base), op_reg_idx(rm.idx), reg, vvvv);
200 |     unsigned memoff = enc_mem(buf + off, ripoff + off, rm, reg, forcesib, disp8scale);
201 |     return off && memoff ? off + memoff : 0;
202 | }
203 | 
204 | static int
205 | enc_vex_vsib(uint8_t* buf, unsigned opcode, FeMemV rm, uint64_t reg,
206 |              uint64_t vvvv, unsigned ripoff, bool forcesib, unsigned disp8scale) {
207 |     unsigned off = enc_vex_common(buf, opcode, op_reg_idx(rm.base), op_reg_idx(rm.idx), reg, vvvv);
208 |     unsigned memoff = enc_mem_vsib(buf + off, ripoff + off, rm, reg, forcesib, disp8scale);
209 |     return off && memoff ? off + memoff : 0;
210 | }
211 | 
212 | static int
213 | enc_evex_common(uint8_t* buf, unsigned opcode, unsigned base,
214 |                 unsigned idx, unsigned reg, unsigned vvvv) {
215 |     *buf++ = 0x62;
216 |     bool evexr3 = reg & 0x08;
217 |     bool evexr4 = reg & 0x10;
218 |     bool evexb3 = base & 0x08;
219 |     bool evexb4 = base & 0x10; // evexb4 is unused in AVX-512 encoding
220 |     bool evexx3 = idx & 0x08;
221 |     bool evexx4 = idx & 0x10;
222 |     bool evexv4 = vvvv & 0x10;
223 |     unsigned b1 = (opcode & FE_OPC_VEX_MMM_MASK) >> FE_OPC_VEX_MMM_SHIFT;
224 |     if (!evexr3) b1 |= 0x80;
225 |     if (!evexx3) b1 |= 0x40;
226 |     if (!evexb3) b1 |= 0x20;
227 |     if (!evexr4) b1 |= 0x10;
228 |     if (evexb4) b1 |= 0x08;
229 |     *buf++ = b1;
230 |     unsigned b2 = (opcode & FE_OPC_VEX_WPP_MASK) >> FE_OPC_VEX_WPP_SHIFT;
231 |     if (!evexx4) b2 |= 0x04;
232 |     b2 |= (~vvvv & 0xf) << 3;
233 |     *buf++ = b2;
234 |     unsigned b3 = opcode & 0xff;
235 |     if (!evexv4) b3 |= 0x08;
236 |     *buf++ = b3;
237 |     *buf++ = (opcode & 0xff0000) >> 16;
238 |     return 5;
239 | }
240 | 
241 | static unsigned
242 | enc_evex_to_vex(unsigned opcode) {
243 |     return opcode & FE_OPC_VEX_DOWNGRADE_VEX_FLIPW ? opcode ^ 0x8000 : opcode;
244 | }
245 | 
246 | // Encode AVX-512 EVEX r/m-reg, non-xmm reg, vvvv, prefer vex
247 | static int
248 | enc_evex_reg(uint8_t* buf, unsigned opcode, unsigned rm,
249 |              unsigned reg, unsigned vvvv) {
250 |     unsigned off;
251 |     if (!((rm | reg | vvvv) & 0x10) && (opcode & FE_OPC_VEX_DOWNGRADE_VEX))
252 |         off = enc_vex_common(buf, enc_evex_to_vex(opcode), rm, 0, reg, vvvv);
253 |     else
254 |         off = enc_evex_common(buf, opcode, rm, 0, reg, vvvv);
255 |     buf[off] = 0xc0 | (reg << 3 & 0x38) | (rm & 7);
256 |     return off + 1;
257 | }
258 | 
259 | // Encode AVX-512 EVEX r/m-reg, xmm reg, vvvv, prefer vex
260 | static int
261 | enc_evex_xmm(uint8_t* buf, unsigned opcode, unsigned rm,
262 |              unsigned reg, unsigned vvvv) {
263 |     unsigned off;
264 |     if (!((rm | reg | vvvv) & 0x10) && (opcode & FE_OPC_VEX_DOWNGRADE_VEX))
265 |         off = enc_vex_common(buf, enc_evex_to_vex(opcode), rm, 0, reg, vvvv);
266 |     else
267 |         // AVX-512 XMM reg encoding uses X3 instead of B4.
268 |         off = enc_evex_common(buf, opcode, rm & 0x0f, rm >> 1, reg, vvvv);
269 |     buf[off] = 0xc0 | (reg << 3 & 0x38) | (rm & 7);
270 |     return off + 1;
271 | }
272 | 
273 | static int
274 | enc_evex_mem(uint8_t* buf, unsigned opcode, FeMem rm, uint64_t reg,
275 |              uint64_t vvvv, unsigned ripoff, bool forcesib, unsigned disp8scale) {
276 |     unsigned off;
277 |     if (!((op_reg_idx(rm.base) | op_reg_idx(rm.idx) | reg | vvvv) & 0x10) &&
278 |         (opcode & FE_OPC_VEX_DOWNGRADE_VEX)) {
279 |         disp8scale = 0; // Only AVX-512 EVEX compresses displacement
280 |         off = enc_vex_common(buf, enc_evex_to_vex(opcode), op_reg_idx(rm.base), op_reg_idx(rm.idx), reg, vvvv);
281 |     } else {
282 |         off = enc_evex_common(buf, opcode, op_reg_idx(rm.base), op_reg_idx(rm.idx), reg, vvvv);
283 |     }
284 |     unsigned memoff = enc_mem(buf + off, ripoff + off, rm, reg, forcesib, disp8scale);
285 |     return off && memoff ? off + memoff : 0;
286 | }
287 | 
288 | static int
289 | enc_evex_vsib(uint8_t* buf, unsigned opcode, FeMemV rm, uint64_t reg,
290 |              uint64_t vvvv, unsigned ripoff, bool forcesib, unsigned disp8scale) {
291 |     (void) vvvv;
292 |     // EVEX VSIB requires non-zero mask operand
293 |     if (!(opcode & 0x7)) return 0;
294 |     // EVEX.X4 is encoded in EVEX.V4
295 |     unsigned idx = op_reg_idx(rm.idx);
296 |     unsigned off = enc_evex_common(buf, opcode, op_reg_idx(rm.base), idx & 0x0f, reg, idx & 0x10);
297 |     unsigned memoff = enc_mem_vsib(buf + off, ripoff + off, rm, reg, forcesib, disp8scale);
298 |     return off && memoff ? off + memoff : 0;
299 | }
300 | 
301 | unsigned fe64_NOP(uint8_t* buf, unsigned flags) {
302 |     unsigned len = flags ? flags : 1;
303 |     // Taken from Intel SDM
304 |     static const uint8_t tbl[] = {
305 |         0x90,
306 |         0x66, 0x90,
307 |         0x0f, 0x1f, 0x00,
308 |         0x0f, 0x1f, 0x40, 0x00,
309 |         0x0f, 0x1f, 0x44, 0x00, 0x00,
310 |         0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
311 |         0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
312 |         0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00,
313 |         0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00,
314 |     };
315 |     unsigned remain = len;
316 |     for (; remain > 9; remain -= 9)
317 |         for (unsigned i = 0; i < 9; i++)
318 |             *(buf++) = tbl[36 + i];
319 |     const uint8_t* src = tbl + (remain * (remain - 1)) / 2;
320 |     for (unsigned i = 0; i < remain; i++)
321 |         *(buf++) = src[i];
322 |     return len;
323 | }
324 | 
325 | #include <fadec-encode2-private.inc>
326 | 


--------------------------------------------------------------------------------
/fadec-enc.h:
--------------------------------------------------------------------------------
 1 | 
 2 | #ifndef FD_FADEC_ENC_H_
 3 | #define FD_FADEC_ENC_H_
 4 | 
 5 | #include <stddef.h>
 6 | #include <stdint.h>
 7 | 
 8 | #ifdef __cplusplus
 9 | extern "C" {
10 | #endif
11 | 
12 | typedef enum {
13 |     FE_AX = 0x100, FE_CX, FE_DX, FE_BX, FE_SP, FE_BP, FE_SI, FE_DI,
14 |     FE_R8, FE_R9, FE_R10, FE_R11, FE_R12, FE_R13, FE_R14, FE_R15,
15 |     FE_IP = 0x120,
16 |     FE_AH = 0x204, FE_CH, FE_DH, FE_BH,
17 |     FE_ES = 0x300, FE_CS, FE_SS, FE_DS, FE_FS, FE_GS,
18 |     FE_ST0 = 0x400, FE_ST1, FE_ST2, FE_ST3, FE_ST4, FE_ST5, FE_ST6, FE_ST7,
19 |     FE_MM0 = 0x500, FE_MM1, FE_MM2, FE_MM3, FE_MM4, FE_MM5, FE_MM6, FE_MM7,
20 |     FE_XMM0 = 0x600, FE_XMM1, FE_XMM2, FE_XMM3, FE_XMM4, FE_XMM5, FE_XMM6, FE_XMM7,
21 |     FE_XMM8, FE_XMM9, FE_XMM10, FE_XMM11, FE_XMM12, FE_XMM13, FE_XMM14, FE_XMM15,
22 |     FE_XMM16, FE_XMM17, FE_XMM18, FE_XMM19, FE_XMM20, FE_XMM21, FE_XMM22, FE_XMM23,
23 |     FE_XMM24, FE_XMM25, FE_XMM26, FE_XMM27, FE_XMM28, FE_XMM29, FE_XMM30, FE_XMM31,
24 |     FE_K0 = 0x700, FE_K1, FE_K2, FE_K3, FE_K4, FE_K5, FE_K6, FE_K7,
25 |     FE_TMM0 = 0x800, FE_TMM1, FE_TMM2, FE_TMM3, FE_TMM4, FE_TMM5, FE_TMM6, FE_TMM7,
26 | } FeReg;
27 | 
28 | typedef int64_t FeOp;
29 | 
30 | /** Construct a memory operand. Unused parts can be set to 0 and will be
31 |  * ignored. FE_IP can be used as base register, in which case the offset is
32 |  * interpreted as the offset from the /current/ position -- the size of the
33 |  * encoded instruction will be subtracted during encoding. scale must be 1, 2,
34 |  * 4, or 8; but is ignored if  idx == 0. **/
35 | #define FE_MEM(base,sc,idx,off) (INT64_MIN | ((int64_t) ((base) & 0xfff) << 32) | ((int64_t) ((idx) & 0xfff) << 44) | ((int64_t) ((sc) & 0xf) << 56) | ((off) & 0xffffffff))
36 | #define FE_NOREG ((FeReg) 0)
37 | 
38 | /** Add segment override prefix. This may or may not generate prefixes for the
39 |  * ignored prefixes ES/CS/DS/SS in 64-bit mode. **/
40 | #define FE_SEG(seg) ((uint64_t) (((seg) & 0x7) + 1) << 29)
41 | /** Do not use. **/
42 | #define FE_SEG_MASK 0xe0000000
43 | /** Overrides address size. **/
44 | #define FE_ADDR32 0x10000000
45 | /** Used together with a RIP-relative (conditional) jump, this will force the
46 |  * use of the encoding with the largest distance. Useful for reserving a jump
47 |  * when the target offset is still unknown; if the jump is re-encoded later on,
48 |  * FE_JMPL must be specified there, too, so that the encoding lengths match. **/
49 | #define FE_JMPL 0x100000000
50 | #define FE_MASK(kreg) ((uint64_t) ((kreg) & 0x7) << 33)
51 | #define FE_RC_RN 0x0000000
52 | #define FE_RC_RD 0x0800000
53 | #define FE_RC_RU 0x1000000
54 | #define FE_RC_RZ 0x1800000
55 | 
56 | #include <fadec-encode-public.inc>
57 | 
58 | /** Do not use. **/
59 | #define fe_enc64_1(buf, mnem, op0, op1, op2, op3, ...) fe_enc64_impl(buf, mnem, op0, op1, op2, op3)
60 | /** Encode a single instruction for 64-bit mode.
61 |  * \param buf Pointer to the buffer for instruction bytes, must have a size of
62 |  *        15 bytes. The pointer is advanced by the number of bytes used for
63 |  *        encoding the specified instruction.
64 |  * \param mnem Mnemonic, optionally or-ed with FE_SEG(), FE_ADDR32, or FE_JMPL.
65 |  * \param operands... Instruction operands. Immediate operands are passed as
66 |  *        plain value; register operands using the FeReg enum; memory operands
67 |  *        using FE_MEM(); and offset operands for RIP-relative jumps/calls are
68 |  *        specified as _address in buf_, e.g. (intptr_t) jmptgt, the address of
69 |  *        buf and the size of the encoded instruction are subtracted internally.
70 |  * \return Zero for success or a negative value in case of an error.
71 |  **/
72 | #define fe_enc64(buf, ...) fe_enc64_1(buf, __VA_ARGS__, 0, 0, 0, 0, 0)
73 | /** Do not use. **/
74 | int fe_enc64_impl(uint8_t** buf, uint64_t mnem, FeOp op0, FeOp op1, FeOp op2, FeOp op3);
75 | 
76 | #ifdef __cplusplus
77 | }
78 | #endif
79 | 
80 | #endif
81 | 


--------------------------------------------------------------------------------
/fadec-enc2.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef FD_FADEC_ENC2_H_
  3 | #define FD_FADEC_ENC2_H_
  4 | 
  5 | #include <stddef.h>
  6 | #include <stdint.h>
  7 | 
  8 | #ifdef __cplusplus
  9 | extern "C" {
 10 | #endif
 11 | 
 12 | #ifdef __cplusplus
 13 | #define FE_STRUCT(name) name
 14 | #else
 15 | #define FE_STRUCT(name) (name)
 16 | #endif
 17 | 
 18 | // Flags
 19 | #define FE_JMPL 0x8
 20 | #define FE_ADDR32 0x10
 21 | #define FE_SEG_MASK 0x7
 22 | #define FE_SEG(seg) (((seg).idx + 1) & FE_SEG_MASK)
 23 | #define FE_RC_MASK 0x60
 24 | #define FE_RC_RN 0x00
 25 | #define FE_RC_RD 0x20
 26 | #define FE_RC_RU 0x40
 27 | #define FE_RC_RZ 0x60
 28 | 
 29 | typedef struct FeRegGP { unsigned char idx; } FeRegGP;
 30 | #define FE_GP(idx) (FE_STRUCT(FeRegGP) { idx })
 31 | #define FE_AX FE_GP(0)
 32 | #define FE_CX FE_GP(1)
 33 | #define FE_DX FE_GP(2)
 34 | #define FE_BX FE_GP(3)
 35 | #define FE_SP FE_GP(4)
 36 | #define FE_BP FE_GP(5)
 37 | #define FE_SI FE_GP(6)
 38 | #define FE_DI FE_GP(7)
 39 | #define FE_R8 FE_GP(8)
 40 | #define FE_R9 FE_GP(9)
 41 | #define FE_R10 FE_GP(10)
 42 | #define FE_R11 FE_GP(11)
 43 | #define FE_R12 FE_GP(12)
 44 | #define FE_R13 FE_GP(13)
 45 | #define FE_R14 FE_GP(14)
 46 | #define FE_R15 FE_GP(15)
 47 | #define FE_IP FE_GP(0x20)
 48 | #define FE_NOREG FE_GP(0x80)
 49 | typedef struct FeRegGPH { unsigned char idx; } FeRegGPH;
 50 | #define FE_GPH(idx) (FE_STRUCT(FeRegGPH) { idx })
 51 | #define FE_AH FE_GPH(4)
 52 | #define FE_CH FE_GPH(5)
 53 | #define FE_DH FE_GPH(6)
 54 | #define FE_BH FE_GPH(7)
 55 | typedef struct FeRegSREG { unsigned char idx; } FeRegSREG;
 56 | #define FE_SREG(idx) (FE_STRUCT(FeRegSREG) { idx })
 57 | #define FE_ES FE_SREG(0)
 58 | #define FE_CS FE_SREG(1)
 59 | #define FE_SS FE_SREG(2)
 60 | #define FE_DS FE_SREG(3)
 61 | #define FE_FS FE_SREG(4)
 62 | #define FE_GS FE_SREG(5)
 63 | typedef struct FeRegST { unsigned char idx; } FeRegST;
 64 | #define FE_ST(idx) (FE_STRUCT(FeRegST) { idx })
 65 | #define FE_ST0 FE_ST(0)
 66 | #define FE_ST1 FE_ST(1)
 67 | #define FE_ST2 FE_ST(2)
 68 | #define FE_ST3 FE_ST(3)
 69 | #define FE_ST4 FE_ST(4)
 70 | #define FE_ST5 FE_ST(5)
 71 | #define FE_ST6 FE_ST(6)
 72 | #define FE_ST7 FE_ST(7)
 73 | typedef struct FeRegMM { unsigned char idx; } FeRegMM;
 74 | #define FE_MM(idx) (FE_STRUCT(FeRegMM) { idx })
 75 | #define FE_MM0 FE_MM(0)
 76 | #define FE_MM1 FE_MM(1)
 77 | #define FE_MM2 FE_MM(2)
 78 | #define FE_MM3 FE_MM(3)
 79 | #define FE_MM4 FE_MM(4)
 80 | #define FE_MM5 FE_MM(5)
 81 | #define FE_MM6 FE_MM(6)
 82 | #define FE_MM7 FE_MM(7)
 83 | typedef struct FeRegXMM { unsigned char idx; } FeRegXMM;
 84 | #define FE_XMM(idx) (FE_STRUCT(FeRegXMM) { idx })
 85 | #define FE_XMM0 FE_XMM(0)
 86 | #define FE_XMM1 FE_XMM(1)
 87 | #define FE_XMM2 FE_XMM(2)
 88 | #define FE_XMM3 FE_XMM(3)
 89 | #define FE_XMM4 FE_XMM(4)
 90 | #define FE_XMM5 FE_XMM(5)
 91 | #define FE_XMM6 FE_XMM(6)
 92 | #define FE_XMM7 FE_XMM(7)
 93 | #define FE_XMM8 FE_XMM(8)
 94 | #define FE_XMM9 FE_XMM(9)
 95 | #define FE_XMM10 FE_XMM(10)
 96 | #define FE_XMM11 FE_XMM(11)
 97 | #define FE_XMM12 FE_XMM(12)
 98 | #define FE_XMM13 FE_XMM(13)
 99 | #define FE_XMM14 FE_XMM(14)
100 | #define FE_XMM15 FE_XMM(15)
101 | #define FE_XMM16 FE_XMM(16)
102 | #define FE_XMM17 FE_XMM(17)
103 | #define FE_XMM18 FE_XMM(18)
104 | #define FE_XMM19 FE_XMM(19)
105 | #define FE_XMM20 FE_XMM(20)
106 | #define FE_XMM21 FE_XMM(21)
107 | #define FE_XMM22 FE_XMM(22)
108 | #define FE_XMM23 FE_XMM(23)
109 | #define FE_XMM24 FE_XMM(24)
110 | #define FE_XMM25 FE_XMM(25)
111 | #define FE_XMM26 FE_XMM(26)
112 | #define FE_XMM27 FE_XMM(27)
113 | #define FE_XMM28 FE_XMM(28)
114 | #define FE_XMM29 FE_XMM(29)
115 | #define FE_XMM30 FE_XMM(30)
116 | #define FE_XMM31 FE_XMM(31)
117 | typedef struct FeRegMASK { unsigned char idx; } FeRegMASK;
118 | #define FE_K(idx) (FE_STRUCT(FeRegMASK) { idx })
119 | #define FE_K0 FE_K(0)
120 | #define FE_K1 FE_K(1)
121 | #define FE_K2 FE_K(2)
122 | #define FE_K3 FE_K(3)
123 | #define FE_K4 FE_K(4)
124 | #define FE_K5 FE_K(5)
125 | #define FE_K6 FE_K(6)
126 | #define FE_K7 FE_K(7)
127 | typedef struct FeRegTMM { unsigned char idx; } FeRegTMM;
128 | #define FE_TMM(idx) (FE_STRUCT(FeRegTMM) { idx })
129 | #define FE_TMM0 FE_TMM(0)
130 | #define FE_TMM1 FE_TMM(1)
131 | #define FE_TMM2 FE_TMM(2)
132 | #define FE_TMM3 FE_TMM(3)
133 | #define FE_TMM4 FE_TMM(4)
134 | #define FE_TMM5 FE_TMM(5)
135 | #define FE_TMM6 FE_TMM(6)
136 | #define FE_TMM7 FE_TMM(7)
137 | typedef struct FeRegCR { unsigned char idx; } FeRegCR;
138 | #define FE_CR(idx) (FE_STRUCT(FeRegCR) { idx })
139 | typedef struct FeRegDR { unsigned char idx; } FeRegDR;
140 | #define FE_DR(idx) (FE_STRUCT(FeRegDR) { idx })
141 | 
142 | // Internal only
143 | // Disambiguate GP and GPH -- C++ uses conversion constructors; C uses _Generic.
144 | #ifdef __cplusplus
145 | }
146 | namespace {
147 |     struct FeRegGPLH {
148 |         unsigned char idx;
149 |         FeRegGPLH(FeRegGP gp) : idx(gp.idx) {}
150 |         FeRegGPLH(FeRegGPH gp) : idx(gp.idx | 0x20) {}
151 |     };
152 | }
153 | extern "C" {
154 | #define FE_MAKE_GPLH(reg) reg
155 | #else
156 | typedef struct FeRegGPLH { unsigned char idx; } FeRegGPLH;
157 | #define FE_GPLH(idx) (FE_STRUCT(FeRegGPLH) { idx })
158 | #define FE_MAKE_GPLH(reg) FE_GPLH(_Generic((reg), FeRegGPH: 0x20, FeRegGP: 0) | (reg).idx)
159 | #endif
160 | 
161 | typedef struct FeMem {
162 |     uint8_t flags;
163 |     FeRegGP base;
164 |     unsigned char scale;
165 |     // union {
166 |         FeRegGP idx;
167 |     //     FeRegXMM idx_xmm;
168 |     // };
169 |     int32_t off;
170 | } FeMem;
171 | #define FE_MEM(base,sc,idx,off) (FE_STRUCT(FeMem) { 0, base, sc, idx, off })
172 | typedef struct FeMemV {
173 |     uint8_t flags;
174 |     FeRegGP base;
175 |     unsigned char scale;
176 |     FeRegXMM idx;
177 |     int32_t off;
178 | } FeMemV;
179 | #define FE_MEMV(base,sc,idx,off) (FE_STRUCT(FeMemV) { 0, base, sc, idx, off })
180 | 
181 | // NOP is special: flags is interpreted as the length in bytes, 0 = 1 byte, too.
182 | unsigned fe64_NOP(uint8_t* buf, unsigned flags);
183 | 
184 | #include <fadec-encode2-public.inc>
185 | 
186 | #ifdef __cplusplus
187 | }
188 | #endif
189 | 
190 | #endif
191 | 


--------------------------------------------------------------------------------
/fadec.h:
--------------------------------------------------------------------------------
  1 | 
  2 | #ifndef FD_FADEC_H_
  3 | #define FD_FADEC_H_
  4 | 
  5 | #include <stddef.h>
  6 | #include <stdint.h>
  7 | 
  8 | #ifdef __cplusplus
  9 | extern "C" {
 10 | #endif
 11 | 
 12 | typedef enum {
 13 |     FD_REG_R0 = 0, FD_REG_R1, FD_REG_R2, FD_REG_R3,
 14 |     FD_REG_R4, FD_REG_R5, FD_REG_R6, FD_REG_R7,
 15 |     FD_REG_R8, FD_REG_R9, FD_REG_R10, FD_REG_R11,
 16 |     FD_REG_R12, FD_REG_R13, FD_REG_R14, FD_REG_R15,
 17 |     // Alternative names for byte registers
 18 |     FD_REG_AL = 0, FD_REG_CL, FD_REG_DL, FD_REG_BL,
 19 |     FD_REG_AH, FD_REG_CH, FD_REG_DH, FD_REG_BH,
 20 |     // Alternative names for general purpose registers
 21 |     FD_REG_AX = 0, FD_REG_CX, FD_REG_DX, FD_REG_BX,
 22 |     FD_REG_SP, FD_REG_BP, FD_REG_SI, FD_REG_DI,
 23 |     // FD_REG_IP can only be accessed in long mode (64-bit)
 24 |     FD_REG_IP = 0x10,
 25 |     // Segment register values
 26 |     FD_REG_ES = 0, FD_REG_CS, FD_REG_SS, FD_REG_DS, FD_REG_FS, FD_REG_GS,
 27 |     // No register specified
 28 |     FD_REG_NONE = 0x3f
 29 | } FdReg;
 30 | 
 31 | typedef enum {
 32 | #define FD_MNEMONIC(name,value) FDI_ ## name = value,
 33 | #include <fadec-decode-public.inc>
 34 | #undef FD_MNEMONIC
 35 | } FdInstrType;
 36 | 
 37 | /** Internal use only. **/
 38 | enum {
 39 |     FD_FLAG_LOCK = 1 << 0,
 40 |     FD_FLAG_REP = 1 << 2,
 41 |     FD_FLAG_REPNZ = 1 << 1,
 42 |     FD_FLAG_64 = 1 << 7,
 43 | };
 44 | 
 45 | /** Operand types. **/
 46 | typedef enum {
 47 |     FD_OT_NONE = 0,
 48 |     FD_OT_REG = 1,
 49 |     FD_OT_IMM = 2,
 50 |     FD_OT_MEM = 3,
 51 |     FD_OT_OFF = 4,
 52 |     FD_OT_MEMBCST = 5,
 53 | } FdOpType;
 54 | 
 55 | typedef enum {
 56 |     /** Vector (SSE/AVX) register XMMn/YMMn/ZMMn **/
 57 |     FD_RT_VEC = 0,
 58 |     /** Low general purpose register **/
 59 |     FD_RT_GPL = 1,
 60 |     /** High-byte general purpose register **/
 61 |     FD_RT_GPH = 2,
 62 |     /** Segment register **/
 63 |     FD_RT_SEG = 3,
 64 |     /** FPU register ST(n) **/
 65 |     FD_RT_FPU = 4,
 66 |     /** MMX register MMn **/
 67 |     FD_RT_MMX = 5,
 68 |     /** TMM register TMMn **/
 69 |     FD_RT_TMM = 6,
 70 |     /** Vector mask (AVX-512) register Kn **/
 71 |     FD_RT_MASK = 7,
 72 |     /** Bound register BNDn **/
 73 |     FD_RT_BND = 8,
 74 |     /** Control Register CRn **/
 75 |     FD_RT_CR = 9,
 76 |     /** Debug Register DRn **/
 77 |     FD_RT_DR = 10,
 78 |     /** Must be a memory operand **/
 79 |     FD_RT_MEM = 15,
 80 | } FdRegType;
 81 | 
 82 | /** Do not depend on the actual enum values. **/
 83 | typedef enum {
 84 |     /** Round to nearest (even) **/
 85 |     FD_RC_RN = 1,
 86 |     /** Round down **/
 87 |     FD_RC_RD = 3,
 88 |     /** Round up **/
 89 |     FD_RC_RU = 5,
 90 |     /** Round to zero (truncate) **/
 91 |     FD_RC_RZ = 7,
 92 |     /** Rounding mode as specified in MXCSR **/
 93 |     FD_RC_MXCSR = 0,
 94 |     /** Rounding mode irrelevant, but SAE **/
 95 |     FD_RC_SAE = 6,
 96 | } FdRoundControl;
 97 | 
 98 | /** Internal use only. **/
 99 | typedef struct {
100 |     uint8_t type;
101 |     uint8_t size;
102 |     uint8_t reg;
103 |     uint8_t misc;
104 | } FdOp;
105 | 
106 | /** Never(!) access struct fields directly. Use the macros defined below. **/
107 | typedef struct {
108 |     uint16_t type;
109 |     uint8_t flags;
110 |     uint8_t segment;
111 |     uint8_t addrsz;
112 |     uint8_t operandsz;
113 |     uint8_t size;
114 |     uint8_t evex;
115 | 
116 |     FdOp operands[4];
117 | 
118 |     int64_t disp;
119 |     int64_t imm;
120 | 
121 |     uint64_t address;
122 | } FdInstr;
123 | 
124 | typedef enum {
125 |     FD_ERR_UD = -1,
126 |     FD_ERR_INTERNAL = -2,
127 |     FD_ERR_PARTIAL = -3,
128 | } FdErr;
129 | 
130 | 
131 | /** Decode an instruction.
132 |  * \param buf Buffer for instruction bytes.
133 |  * \param len Length of the buffer (in bytes). An instruction is not longer than
134 |  *        15 bytes on all x86 architectures.
135 |  * \param mode Decoding mode, either 32 for protected/compatibility mode or 64
136 |  *        for long mode. 16-bit mode is not supported.
137 |  * \param address Virtual address where the decoded instruction. This is used
138 |  *        for computing jump targets. If "0" is passed, operands which require
139 |  *        adding EIP/RIP will be stored as FD_OT_OFF operands.
140 |  *        DEPRECATED: Strongly prefer passing 0 and using FD_OT_OFF operands.
141 |  * \param out_instr Pointer to the instruction buffer. Note that this may get
142 |  *        partially written even if an error is returned.
143 |  * \return The number of bytes consumed by the instruction, or a negative number
144 |  *         indicating an error.
145 |  **/
146 | int fd_decode(const uint8_t* buf, size_t len, int mode, uintptr_t address,
147 |               FdInstr* out_instr);
148 | 
149 | /** Format an instruction to a string.
150 |  * \param instr The instruction.
151 |  * \param buf The buffer to hold the formatted string.
152 |  * \param len The length of the buffer.
153 |  **/
154 | void fd_format(const FdInstr* instr, char* buf, size_t len);
155 | 
156 | /** Format an instruction to a string.
157 |  * NOTE: API stability is currently not guaranteed for this function; its name
158 |  * and/or signature may change in future.
159 |  *
160 |  * \param instr The instruction.
161 |  * \param addr The base address to use for printing FD_OT_OFF operands.
162 |  * \param buf The buffer to hold the formatted string.
163 |  * \param len The length of the buffer.
164 |  **/
165 | void fd_format_abs(const FdInstr* instr, uint64_t addr, char* buf, size_t len);
166 | 
167 | /** Get the stringified name of an instruction type.
168 |  * NOTE: API stability is currently not guaranteed for this function; changes
169 |  * to the signature and/or the returned string can be expected. E.g., a future
170 |  * version may take an extra parameter for the instruction operand size; or may
171 |  * take a complete decoded instruction as first parameter and return the
172 |  * mnemonic returned by fd_format.
173 |  *
174 |  * \param ty An instruction type
175 |  * \return The instruction type as string, or "(invalid)".
176 |  **/
177 | const char* fdi_name(FdInstrType ty);
178 | 
179 | 
180 | /** Gets the type/mnemonic of the instruction.
181 |  * ABI STABILITY NOTE: different versions or builds of the library may use
182 |  * different values. When linking as shared library, any interpretation of this
183 |  * value is meaningless; in such cases use  fdi_name.
184 |  *
185 |  * API STABILITY NOTE: a future version of this library may decode string
186 |  * instructions prefixed with REP/REPNZ and instructions prefixed with LOCK as
187 |  * separate instruction types. **/
188 | #define FD_TYPE(instr) ((FdInstrType) (instr)->type)
189 | /** DEPRECATED: This functionality is obsolete in favor of FD_OT_OFF.
190 |  * Gets the address of the instruction. Invalid if decoded  address == 0. **/
191 | #define FD_ADDRESS(instr) ((instr)->address)
192 | /** Gets the size of the instruction in bytes. **/
193 | #define FD_SIZE(instr) ((instr)->size)
194 | /** Gets the specified segment override, or FD_REG_NONE for default segment. **/
195 | #define FD_SEGMENT(instr) ((FdReg) (instr)->segment & 0x3f)
196 | /** Gets the address size attribute of the instruction in bytes. **/
197 | #define FD_ADDRSIZE(instr) (1 << (instr)->addrsz)
198 | /** Get the logarithmic address size; FD_ADDRSIZE == 1 << FD_ADDRSIZELG **/
199 | #define FD_ADDRSIZELG(instr) ((instr)->addrsz)
200 | /** Gets the operation width in bytes of the instruction if this is not encoded
201 |  * in the operands, for example for the string instruction (e.g. MOVS). **/
202 | #define FD_OPSIZE(instr) (1 << (instr)->operandsz)
203 | /** Get the logarithmic operand size; FD_OPSIZE == 1 << FD_OPSIZELG iff
204 |  * FD_OPSIZE is valid. **/
205 | #define FD_OPSIZELG(instr) ((instr)->operandsz)
206 | /** Indicates whether the instruction was encoded with a REP prefix. Needed for:
207 |  * (1) Handling the instructions MOVS, STOS, LODS, INS and OUTS properly.
208 |  * (2) Handling the instructions SCAS and CMPS, for which this means REPZ. **/
209 | #define FD_HAS_REP(instr) ((instr)->flags & FD_FLAG_REP)
210 | /** Indicates whether the instruction was encoded with a REPNZ prefix. **/
211 | #define FD_HAS_REPNZ(instr) ((instr)->flags & FD_FLAG_REPNZ)
212 | /** Indicates whether the instruction was encoded with a LOCK prefix. **/
213 | #define FD_HAS_LOCK(instr) ((instr)->flags & FD_FLAG_LOCK)
214 | /** Do not use. **/
215 | #define FD_IS64(instr) ((instr)->flags & FD_FLAG_64)
216 | 
217 | /** Gets the type of an operand at the given index. **/
218 | #define FD_OP_TYPE(instr,idx) ((FdOpType) (instr)->operands[idx].type)
219 | /** Gets the size in bytes of an operand. However, there are a few exceptions:
220 |  * (1) For some register types, e.g., segment registers, or x87 registers, the
221 |  *     size is zero. (This allows some simplifications internally.)
222 |  * (2) On some vector instructions this may be only an approximation of the
223 |  *     actually needed operand size (that is, an instruction may/must only use
224 |  *     a smaller part than specified here). The real operand size is always
225 |  *     fully recoverable in combination with the instruction type. **/
226 | #define FD_OP_SIZE(instr,idx) (1 << (instr)->operands[idx].size >> 1)
227 | /** Get the logarithmic size of an operand; see FD_OP_SIZE for special cases.
228 |  * The following equality holds:  FD_OP_SIZE == 1 << (FD_OP_SIZELG + 1) >> 1
229 |  * Note that typically  FD_OP_SIZE == 1 << FD_OP_SIZELG  unless a zero-sized
230 |  * memory operand, FPU register, or mask register is involved.  **/
231 | #define FD_OP_SIZELG(instr,idx) ((instr)->operands[idx].size - 1)
232 | /** Gets the accessed register index of a register operand. Note that /only/ the
233 |  * index is returned, no further interpretation of the index (which depends on
234 |  * the instruction type) is done. The register type can be fetched using
235 |  * FD_OP_REG_TYPE, e.g. for distinguishing high-byte registers.
236 |  * Only valid if  FD_OP_TYPE == FD_OT_REG  **/
237 | #define FD_OP_REG(instr,idx) ((FdReg) (instr)->operands[idx].reg)
238 | /** Gets the type of the accessed register.
239 |  * Only valid if  FD_OP_TYPE == FD_OT_REG  **/
240 | #define FD_OP_REG_TYPE(instr,idx) ((FdRegType) (instr)->operands[idx].misc)
241 | /** DEPRECATED: use FD_OP_REG_TYPE() == FD_RT_GPH instead.
242 |  * Returns whether the accessed register is a high-byte register. In that case,
243 |  * the register index has to be decreased by 4.
244 |  * Only valid if  FD_OP_TYPE == FD_OT_REG  **/
245 | #define FD_OP_REG_HIGH(instr,idx) (FD_OP_REG_TYPE(instr,idx) == FD_RT_GPH)
246 | /** Gets the index of the base register from a memory operand, or FD_REG_NONE,
247 |  * if the memory operand has no base register. This is the only case where the
248 |  * 64-bit register RIP can be returned, in which case the operand also has no
249 |  * scaled index register.
250 |  * Only valid if  FD_OP_TYPE == FD_OT_MEM/MEMBCST  **/
251 | #define FD_OP_BASE(instr,idx) ((FdReg) (instr)->operands[idx].reg)
252 | /** Gets the index of the index register from a memory operand, or FD_REG_NONE,
253 |  * if the memory operand has no scaled index register.
254 |  * Only valid if  FD_OP_TYPE == FD_OT_MEM/MEMBCST  **/
255 | #define FD_OP_INDEX(instr,idx) ((FdReg) (instr)->operands[idx].misc & 0x3f)
256 | /** Gets the scale of the index register from a memory operand when existent.
257 |  * This does /not/ return the scale in an absolute value but returns the amount
258 |  * of bits the index register is shifted to the left (i.e. the value in in the
259 |  * range 0-3). The actual scale can be computed easily using  1<<FD_OP_SCALE.
260 |  * Only valid if  FD_OP_TYPE == FD_OT_MEM/MEMBCST  and  FD_OP_INDEX != NONE **/
261 | #define FD_OP_SCALE(instr,idx) ((instr)->operands[idx].misc >> 6)
262 | /** Gets the sign-extended displacement of a memory operand.
263 |  * Only valid if  FD_OP_TYPE == FD_OT_MEM/MEMBCST  **/
264 | #define FD_OP_DISP(instr,idx) ((int64_t) (instr)->disp)
265 | /** Get memory broadcast size in bytes.
266 |  * Only valid if  FD_OP_TYPE == FD_OT_MEMBCST **/
267 | #define FD_OP_BCSTSZ(instr,idx) (1 << FD_OP_BCSTSZLG(instr,idx))
268 | /** Get logarithmic memory broadcast size (1 = 2-byte; 2=4-byte; 3=8-byte).
269 |  * Only valid if  FD_OP_TYPE == FD_OT_MEMBCST **/
270 | #define FD_OP_BCSTSZLG(instr,idx) ((instr)->segment >> 6)
271 | /** Gets the (sign-extended) encoded constant for an immediate operand.
272 |  * Only valid if  FD_OP_TYPE == FD_OT_IMM  or  FD_OP_TYPE == FD_OT_OFF  **/
273 | #define FD_OP_IMM(instr,idx) ((instr)->imm)
274 | 
275 | /** Get the opmask register for EVEX-encoded instructions; 0 for no mask. **/
276 | #define FD_MASKREG(instr) ((instr)->evex & 0x07)
277 | /** Get whether zero masking shall be used. Only valid if  FD_MASKREG != 0. **/
278 | #define FD_MASKZERO(instr) ((instr)->evex & 0x80)
279 | /** Get rounding mode for EVEX-encoded instructions. See FdRoundControl. **/
280 | #define FD_ROUNDCONTROL(instr) ((FdRoundControl) (((instr)->evex & 0x70) >> 4))
281 | 
282 | #ifdef __cplusplus
283 | }
284 | #endif
285 | 
286 | #endif
287 | 


--------------------------------------------------------------------------------
/format.c:
--------------------------------------------------------------------------------
  1 | 
  2 | #include <stdbool.h>
  3 | #include <stddef.h>
  4 | #include <stdint.h>
  5 | #ifdef _MSC_VER
  6 | #include <intrin.h>
  7 | #endif
  8 | 
  9 | #include <fadec.h>
 10 | 
 11 | 
 12 | #ifdef __GNUC__
 13 | #define LIKELY(x) __builtin_expect(!!(x), 1)
 14 | #define UNLIKELY(x) __builtin_expect(!!(x), 0)
 15 | #define DECLARE_ARRAY_SIZE(n) static n
 16 | #define DECLARE_RESTRICTED_ARRAY_SIZE(n) restrict static n
 17 | #else
 18 | #define LIKELY(x) (x)
 19 | #define UNLIKELY(x) (x)
 20 | #define DECLARE_ARRAY_SIZE(n) n
 21 | #define DECLARE_RESTRICTED_ARRAY_SIZE(n) n
 22 | #endif
 23 | 
 24 | #if defined(__has_attribute)
 25 | #if __has_attribute(fallthrough)
 26 | #define FALLTHROUGH() __attribute__((fallthrough))
 27 | #endif
 28 | #endif
 29 | #if !defined(FALLTHROUGH)
 30 | #define FALLTHROUGH() ((void)0)
 31 | #endif
 32 | 
 33 | struct FdStr {
 34 |     const char* s;
 35 |     unsigned sz;
 36 | };
 37 | 
 38 | #define fd_stre(s) ((struct FdStr) { (s "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0"), sizeof (s)-1 })
 39 | 
 40 | static char*
 41 | fd_strpcat(char* restrict dst, struct FdStr src) {
 42 | #ifdef __GNUC__
 43 |     unsigned lim = __builtin_constant_p(src.sz) && src.sz <= 8 ? 8 : 16;
 44 | #else
 45 |     unsigned lim = 16;
 46 | #endif
 47 |     for (unsigned i = 0; i < lim; i++)
 48 |         dst[i] = src.s[i];
 49 |     // __builtin_memcpy(dst, src.s, 16);
 50 |     return dst + src.sz;
 51 | }
 52 | 
 53 | static unsigned
 54 | fd_clz64(uint64_t v) {
 55 | #if defined(__GNUC__)
 56 |     return __builtin_clzll(v);
 57 | #elif defined(_MSC_VER)
 58 |     unsigned long index;
 59 | 
 60 |     // 32-bit MSVC doesn't support _BitScanReverse64. This is an attempt to
 61 |     // identify this case.
 62 | #if INTPTR_MAX == INT64_MAX
 63 |     _BitScanReverse64(&index, v);
 64 | #else
 65 |     if (_BitScanReverse(&index, v >> 32))
 66 |         return 31 - index;
 67 | 
 68 |     _BitScanReverse(&index, v & 0xffffffff);
 69 | #endif
 70 | 
 71 |     return 63 - index;
 72 | #else
 73 | #error Unsupported compiler.
 74 | #endif
 75 | }
 76 | 
 77 | #if defined(__SSE2__)
 78 | #include <immintrin.h>
 79 | #endif
 80 | 
 81 | static char*
 82 | fd_strpcatnum(char dst[DECLARE_ARRAY_SIZE(18)], uint64_t val) {
 83 |     unsigned lz = fd_clz64(val|1);
 84 |     unsigned numbytes = 16 - (lz / 4);
 85 | #if defined(__SSE2__)
 86 |     __m128i mv = _mm_set_epi64x(0, val << (lz & -4));
 87 |     __m128i mvp = _mm_unpacklo_epi8(mv, mv);
 88 |     __m128i mva = _mm_srli_epi16(mvp, 12);
 89 |     __m128i mvb = _mm_and_si128(mvp, _mm_set1_epi16(0x0f00u));
 90 |     __m128i ml = _mm_or_si128(mva, mvb);
 91 |     __m128i mn = _mm_or_si128(ml, _mm_set1_epi8(0x30));
 92 |     __m128i mgt = _mm_cmpgt_epi8(ml, _mm_set1_epi8(9));
 93 |     __m128i mgtm = _mm_and_si128(mgt, _mm_set1_epi8(0x61 - 0x3a));
 94 |     __m128i ma = _mm_add_epi8(mn, mgtm);
 95 |     __m128i msw = _mm_shufflehi_epi16(_mm_shufflelo_epi16(ma, 0x1b), 0x1b);
 96 |     __m128i ms = _mm_shuffle_epi32(msw, 0x4e);
 97 |     _mm_storeu_si128((__m128i_u*) (dst + 2), ms);
 98 | #else
 99 |     unsigned idx = numbytes + 2;
100 |     do {
101 |         dst[--idx] = "0123456789abcdef"[val % 16];
102 |         val /= 16;
103 |     } while (val);
104 | #endif
105 |     dst[0] = '0';
106 |     dst[1] = 'x';
107 |     return dst + numbytes + 2;
108 | }
109 | 
110 | static char*
111 | fd_strpcatreg(char* restrict dst, size_t rt, size_t ri, unsigned size) {
112 |     const char* nametab =
113 |         "\2al\4bnd0\2cl\4bnd1\2dl\4bnd2\2bl\4bnd3"
114 |         "\3spl\0   \3bpl\0   \3sil\0   \3dil\0   "
115 |         "\3r8b\0   \3r9b\0   \4r10b\0  \4r11b\0  "
116 |         "\4r12b\2ah\4r13b\2ch\4r14b\2dh\4r15b\2bh\0\0      "
117 | 
118 |         "\2ax\4tmm0\2cx\4tmm1\2dx\4tmm2\2bx\4tmm3"
119 |         "\2sp\4tmm4\2bp\4tmm5\2si\4tmm6\2di\4tmm7"
120 |         "\3r8w \2es\3r9w \2cs\4r10w\2ss\4r11w\2ds"
121 |         "\4r12w\2fs\4r13w\2gs\4r14w\0  \4r15w\0  \2ip\0    "
122 | 
123 |         "\3eax\3mm0\3ecx\3mm1\3edx\3mm2\3ebx\3mm3"
124 |         "\3esp\3mm4\3ebp\3mm5\3esi\3mm6\3edi\3mm7"
125 |         "\3r8d \2k0\3r9d \2k1\4r10d\2k2\4r11d\2k3"
126 |         "\4r12d\2k4\4r13d\2k5\4r14d\2k6\4r15d\2k7\3eip\0   "
127 | 
128 |         "\3rax\3cr0\3rcx\0   \3rdx\3cr2\3rbx\3cr3"
129 |         "\3rsp\3cr4\3rbp\0   \3rsi\0   \3rdi\0   "
130 |         "\2r8 \3cr8\2r9 \3dr0\3r10\3dr1\3r11\3dr2"
131 |         "\3r12\3dr3\3r13\3dr4\3r14\3dr5\3r15\3dr6\3rip\3dr7"
132 | 
133 |         "\5st(0)\0 \5st(1)\0 \5st(2)\0 \5st(3)\0 "
134 |         "\5st(4)\0 \5st(5)\0 \5st(6)\0 \5st(7)\0 "
135 | 
136 |         "\4xmm0\0  \4xmm1\0  \4xmm2\0  \4xmm3\0  "
137 |         "\4xmm4\0  \4xmm5\0  \4xmm6\0  \4xmm7\0  "
138 |         "\4xmm8\0  \4xmm9\0  \5xmm10\0 \5xmm11\0 "
139 |         "\5xmm12\0 \5xmm13\0 \5xmm14\0 \5xmm15\0 "
140 |         "\5xmm16\0 \5xmm17\0 \5xmm18\0 \5xmm19\0 "
141 |         "\5xmm20\0 \5xmm21\0 \5xmm22\0 \5xmm23\0 "
142 |         "\5xmm24\0 \5xmm25\0 \5xmm26\0 \5xmm27\0 "
143 |         "\5xmm28\0 \5xmm29\0 \5xmm30\0 \5xmm31\0 ";
144 | 
145 |     static const uint16_t nametabidx[] = {
146 |         [FD_RT_GPL] = 0 * 17*8 + 0 * 8 + 0,
147 |         [FD_RT_GPH] = 0 * 17*8 + 8 * 8 + 5,
148 |         [FD_RT_SEG] = 1 * 17*8 + 8 * 8 + 5,
149 |         [FD_RT_FPU] = 4 * 17*8 + 0 * 8 + 0,
150 |         [FD_RT_MMX] = 2 * 17*8 + 0 * 8 + 4,
151 |         [FD_RT_VEC] = 4 * 17*8 + 8 * 8 + 0,
152 |         [FD_RT_MASK]= 2 * 17*8 + 8 * 8 + 5,
153 |         [FD_RT_BND] = 0 * 17*8 + 0 * 8 + 3,
154 |         [FD_RT_CR]  = 3 * 17*8 + 0 * 8 + 4,
155 |         [FD_RT_DR]  = 3 * 17*8 + 9 * 8 + 4,
156 |         [FD_RT_TMM] = 1 * 17*8 + 0 * 8 + 3,
157 |     };
158 | 
159 |     unsigned idx = rt == FD_RT_GPL ? size * 17*8 : nametabidx[rt];
160 |     const char* name = nametab + idx + 8*ri;
161 |     for (unsigned i = 0; i < 8; i++)
162 |         dst[i] = name[i+1];
163 |     if (UNLIKELY(rt == FD_RT_VEC && size > 4))
164 |         dst[0] += size - 4;
165 |     return dst + *name;
166 | }
167 | 
168 | const char*
169 | fdi_name(FdInstrType ty) {
170 |     (void) ty;
171 |     return "(invalid)";
172 | }
173 | 
174 | static char*
175 | fd_mnemonic(char buf[DECLARE_RESTRICTED_ARRAY_SIZE(48)], const FdInstr* instr) {
176 | #define FD_DECODE_TABLE_STRTAB1
177 |     static const char* mnemonic_str =
178 | #include <fadec-decode-private.inc>
179 |         // 20 NULL Bytes to prevent out-of-bounds reads
180 |         "\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0";
181 | #undef FD_DECODE_TABLE_STRTAB1
182 | 
183 | #define FD_DECODE_TABLE_STRTAB2
184 |     static const uint16_t mnemonic_offs[] = {
185 | #include <fadec-decode-private.inc>
186 |     };
187 | #undef FD_DECODE_TABLE_STRTAB2
188 | 
189 | #define FD_DECODE_TABLE_STRTAB3
190 |     static const uint8_t mnemonic_lens[] = {
191 | #include <fadec-decode-private.inc>
192 |     };
193 | #undef FD_DECODE_TABLE_STRTAB3
194 | 
195 |     const char* mnem = &mnemonic_str[mnemonic_offs[FD_TYPE(instr)]];
196 |     unsigned mnemlen = mnemonic_lens[FD_TYPE(instr)];
197 | 
198 |     bool prefix_xacq_xrel = false;
199 |     bool prefix_segment = false;
200 | 
201 |     char sizesuffix[4] = {0};
202 |     unsigned sizesuffixlen = 0;
203 | 
204 |     if (UNLIKELY(FD_OP_TYPE(instr, 0) == FD_OT_OFF && FD_OP_SIZELG(instr, 0) == 1))
205 |         sizesuffix[0] = 'w', sizesuffixlen = 1;
206 | 
207 |     switch (FD_TYPE(instr)) {
208 |     case FDI_C_SEP:
209 |         mnem += FD_OPSIZE(instr) & 0xc;
210 |         mnemlen = 3;
211 |         break;
212 |     case FDI_C_EX:
213 |         mnem += FD_OPSIZE(instr) & 0xc;
214 |         mnemlen = FD_OPSIZE(instr) < 4 ? 3 : 4;
215 |         break;
216 |     case FDI_CMPXCHGD:
217 |         switch (FD_OPSIZELG(instr)) {
218 |         default: break;
219 |         case 2: sizesuffix[0] = '8', sizesuffix[1] = 'b', sizesuffixlen = 2; break;
220 |         case 3: sizesuffix[0] = '1', sizesuffix[1] = '6', sizesuffix[2] = 'b', sizesuffixlen = 3; break;
221 |         }
222 |         break;
223 |     case FDI_JCXZ:
224 |         mnemlen = FD_ADDRSIZELG(instr) == 1 ? 4 : 5;
225 |         mnem += 5 * (FD_ADDRSIZELG(instr) - 1);
226 |         break;
227 |     case FDI_PUSH:
228 |         if (FD_OP_SIZELG(instr, 0) == 1 && FD_OP_TYPE(instr, 0) == FD_OT_IMM)
229 |             sizesuffix[0] = 'w', sizesuffixlen = 1;
230 |         FALLTHROUGH();
231 |     case FDI_POP:
232 |         if (FD_OP_SIZELG(instr, 0) == 1 && FD_OP_TYPE(instr, 0) == FD_OT_REG &&
233 |             FD_OP_REG_TYPE(instr, 0) == FD_RT_SEG)
234 |             sizesuffix[0] = 'w', sizesuffixlen = 1;
235 |         break;
236 |     case FDI_XCHG:
237 |         if (FD_OP_TYPE(instr, 0) == FD_OT_MEM)
238 |             prefix_xacq_xrel = true;
239 |         break;
240 |     case FDI_MOV:
241 |         // MOV C6h/C7h can have XRELEASE prefix.
242 |         if (FD_HAS_REP(instr) && FD_OP_TYPE(instr, 0) == FD_OT_MEM &&
243 |             FD_OP_TYPE(instr, 1) == FD_OT_IMM)
244 |             prefix_xacq_xrel = true;
245 |         break;
246 |     case FDI_FXSAVE:
247 |     case FDI_FXRSTOR:
248 |     case FDI_XSAVE:
249 |     case FDI_XSAVEC:
250 |     case FDI_XSAVEOPT:
251 |     case FDI_XSAVES:
252 |     case FDI_XRSTOR:
253 |     case FDI_XRSTORS:
254 |         if (FD_OPSIZELG(instr) == 3)
255 |             sizesuffix[0] = '6', sizesuffix[1] = '4', sizesuffixlen = 2;
256 |         break;
257 |     case FDI_EVX_MOV_G2X:
258 |     case FDI_EVX_MOV_X2G:
259 |     case FDI_EVX_PEXTR:
260 |         sizesuffix[0] = "bwdq"[FD_OP_SIZELG(instr, 0)];
261 |         sizesuffixlen = 1;
262 |         break;
263 |     case FDI_EVX_PBROADCAST:
264 |         sizesuffix[0] = "bwdq"[FD_OP_SIZELG(instr, 1)];
265 |         sizesuffixlen = 1;
266 |         break;
267 |     case FDI_EVX_PINSR:
268 |         sizesuffix[0] = "bwdq"[FD_OP_SIZELG(instr, 2)];
269 |         sizesuffixlen = 1;
270 |         break;
271 |     case FDI_RET:
272 |     case FDI_ENTER:
273 |     case FDI_LEAVE:
274 |         if (FD_OPSIZELG(instr) == 1)
275 |             sizesuffix[0] = 'w', sizesuffixlen = 1;
276 |         break;
277 |     case FDI_LODS:
278 |     case FDI_MOVS:
279 |     case FDI_CMPS:
280 |     case FDI_OUTS:
281 |         prefix_segment = true;
282 |         FALLTHROUGH();
283 |     case FDI_STOS:
284 |     case FDI_SCAS:
285 |     case FDI_INS:
286 |         if (FD_HAS_REP(instr))
287 |             buf = fd_strpcat(buf, fd_stre("rep "));
288 |         if (FD_HAS_REPNZ(instr))
289 |             buf = fd_strpcat(buf, fd_stre("repnz "));
290 |         if (FD_IS64(instr) && FD_ADDRSIZELG(instr) == 2)
291 |             buf = fd_strpcat(buf, fd_stre("addr32 "));
292 |         if (!FD_IS64(instr) && FD_ADDRSIZELG(instr) == 1)
293 |             buf = fd_strpcat(buf, fd_stre("addr16 "));
294 |         FALLTHROUGH();
295 |     case FDI_IN:
296 |     case FDI_OUT:
297 |         if (FD_OP_TYPE(instr, 0) != FD_OT_NONE)
298 |             break;
299 |         FALLTHROUGH();
300 |     case FDI_PUSHA:
301 |     case FDI_POPA:
302 |     case FDI_PUSHF:
303 |     case FDI_POPF:
304 |     case FDI_RETF:
305 |     case FDI_IRET:
306 |         sizesuffix[0] = "bwdq"[FD_OPSIZELG(instr)];
307 |         sizesuffixlen = 1;
308 |         break;
309 |     default: break;
310 |     }
311 | 
312 |     if (UNLIKELY(prefix_xacq_xrel || FD_HAS_LOCK(instr))) {
313 |         if (FD_HAS_REP(instr))
314 |             buf = fd_strpcat(buf, fd_stre("xrelease "));
315 |         if (FD_HAS_REPNZ(instr))
316 |             buf = fd_strpcat(buf, fd_stre("xacquire "));
317 |     }
318 |     if (UNLIKELY(FD_HAS_LOCK(instr)))
319 |         buf = fd_strpcat(buf, fd_stre("lock "));
320 |     if (UNLIKELY(prefix_segment && FD_SEGMENT(instr) != FD_REG_NONE)) {
321 |         *buf++ = "ecsdfg\0"[FD_SEGMENT(instr) & 7];
322 |         *buf++ = 's';
323 |         *buf++ = ' ';
324 |     }
325 | 
326 |     for (unsigned i = 0; i < 20; i++)
327 |         buf[i] = mnem[i];
328 |     buf += mnemlen;
329 |     for (unsigned i = 0; i < 4; i++)
330 |         buf[i] = sizesuffix[i];
331 |     buf += sizesuffixlen;
332 | 
333 |     return buf;
334 | }
335 | 
336 | static char*
337 | fd_format_impl(char buf[DECLARE_RESTRICTED_ARRAY_SIZE(128)], const FdInstr* instr, uint64_t addr) {
338 |     buf = fd_mnemonic(buf, instr);
339 | 
340 |     for (int i = 0; i < 4; i++)
341 |     {
342 |         FdOpType op_type = FD_OP_TYPE(instr, i);
343 |         if (op_type == FD_OT_NONE)
344 |             break;
345 |         if (i > 0)
346 |             *buf++ = ',';
347 |         *buf++ = ' ';
348 | 
349 |         int size = FD_OP_SIZELG(instr, i);
350 | 
351 |         if (op_type == FD_OT_REG) {
352 |             unsigned type = FD_OP_REG_TYPE(instr, i);
353 |             unsigned idx = FD_OP_REG(instr, i);
354 |             buf = fd_strpcatreg(buf, type, idx, size);
355 |         } else if (op_type == FD_OT_MEM || op_type == FD_OT_MEMBCST) {
356 |             unsigned idx_rt = FD_RT_GPL;
357 |             unsigned idx_sz = FD_ADDRSIZELG(instr);
358 |             switch (FD_TYPE(instr)) {
359 |             case FDI_CMPXCHGD: size = FD_OPSIZELG(instr) + 1; break;
360 |             case FDI_BOUND: size += 1; break;
361 |             case FDI_JMPF:
362 |             case FDI_CALLF:
363 |             case FDI_LDS:
364 |             case FDI_LES:
365 |             case FDI_LFS:
366 |             case FDI_LGS:
367 |             case FDI_LSS:
368 |                 size += 6;
369 |                 break;
370 |             case FDI_FLD:
371 |             case FDI_FSTP:
372 |             case FDI_FBLD:
373 |             case FDI_FBSTP:
374 |                 size = size >= 0 ? size : 9;
375 |                 break;
376 |             case FDI_VPGATHERQD:
377 |             case FDI_VGATHERQPS:
378 |             case FDI_EVX_PGATHERQD:
379 |             case FDI_EVX_GATHERQPS:
380 |                 idx_rt = FD_RT_VEC;
381 |                 idx_sz = FD_OP_SIZELG(instr, 0) + 1;
382 |                 break;
383 |             case FDI_EVX_PSCATTERQD:
384 |             case FDI_EVX_SCATTERQPS:
385 |                 idx_rt = FD_RT_VEC;
386 |                 idx_sz = FD_OP_SIZELG(instr, 1) + 1;
387 |                 break;
388 |             case FDI_VPGATHERDQ:
389 |             case FDI_VGATHERDPD:
390 |             case FDI_EVX_PGATHERDQ:
391 |             case FDI_EVX_GATHERDPD:
392 |                 idx_rt = FD_RT_VEC;
393 |                 idx_sz = FD_OP_SIZELG(instr, 0) - 1;
394 |                 break;
395 |             case FDI_EVX_PSCATTERDQ:
396 |             case FDI_EVX_SCATTERDPD:
397 |                 idx_rt = FD_RT_VEC;
398 |                 idx_sz = FD_OP_SIZELG(instr, 1) - 1;
399 |                 break;
400 |             case FDI_VPGATHERDD:
401 |             case FDI_VPGATHERQQ:
402 |             case FDI_VGATHERDPS:
403 |             case FDI_VGATHERQPD:
404 |             case FDI_EVX_PGATHERDD:
405 |             case FDI_EVX_PGATHERQQ:
406 |             case FDI_EVX_GATHERDPS:
407 |             case FDI_EVX_GATHERQPD:
408 |                 idx_rt = FD_RT_VEC;
409 |                 idx_sz = FD_OP_SIZELG(instr, 0);
410 |                 break;
411 |             case FDI_EVX_PSCATTERDD:
412 |             case FDI_EVX_PSCATTERQQ:
413 |             case FDI_EVX_SCATTERDPS:
414 |             case FDI_EVX_SCATTERQPD:
415 |                 idx_rt = FD_RT_VEC;
416 |                 idx_sz = FD_OP_SIZELG(instr, 1);
417 |                 break;
418 |             default: break;
419 |             }
420 | 
421 |             if (op_type == FD_OT_MEMBCST)
422 |                 size = FD_OP_BCSTSZLG(instr, i);
423 | 
424 |             const char* ptrsizes =
425 |                 "\00               "
426 |                 "\11byte ptr       "
427 |                 "\11word ptr       "
428 |                 "\12dword ptr      "
429 |                 "\12qword ptr      "
430 |                 "\14xmmword ptr    "
431 |                 "\14ymmword ptr    "
432 |                 "\14zmmword ptr    "
433 |                 "\12dword ptr      "  // far ptr; word + 2
434 |                 "\12fword ptr      "  // far ptr; dword + 2
435 |                 "\12tbyte ptr      "; // far ptr/FPU; qword + 2
436 |             const char* ptrsize = ptrsizes + 16 * (size + 1);
437 |             buf = fd_strpcat(buf, (struct FdStr) { ptrsize+1, *ptrsize });
438 | 
439 |             unsigned seg = FD_SEGMENT(instr);
440 |             if (seg != FD_REG_NONE) {
441 |                 *buf++ = "ecsdfg\0"[seg & 7];
442 |                 *buf++ = 's';
443 |                 *buf++ = ':';
444 |             }
445 |             *buf++ = '[';
446 | 
447 |             bool has_base = FD_OP_BASE(instr, i) != FD_REG_NONE;
448 |             bool has_idx = FD_OP_INDEX(instr, i) != FD_REG_NONE;
449 |             if (has_base)
450 |                 buf = fd_strpcatreg(buf, FD_RT_GPL, FD_OP_BASE(instr, i), FD_ADDRSIZELG(instr));
451 |             if (has_idx) {
452 |                 if (has_base)
453 |                     *buf++ = '+';
454 |                 *buf++ = '0' + (1 << FD_OP_SCALE(instr, i));
455 |                 *buf++ = '*';
456 |                 buf = fd_strpcatreg(buf, idx_rt, FD_OP_INDEX(instr, i), idx_sz);
457 |             }
458 |             uint64_t disp = FD_OP_DISP(instr, i);
459 |             if (disp && (has_base || has_idx)) {
460 |                 *buf++ = (int64_t) disp < 0 ? '-' : '+';
461 |                 if ((int64_t) disp < 0)
462 |                     disp = -disp;
463 |             }
464 |             if (FD_ADDRSIZELG(instr) == 1)
465 |                 disp &= 0xffff;
466 |             else if (FD_ADDRSIZELG(instr) == 2)
467 |                 disp &= 0xffffffff;
468 |             if (disp || (!has_base && !has_idx))
469 |                 buf = fd_strpcatnum(buf, disp);
470 |             *buf++ = ']';
471 | 
472 |             if (UNLIKELY(op_type == FD_OT_MEMBCST)) {
473 |                 // {1toX}, X = FD_OP_SIZE(instr, i) / BCSTSZ (=> 2/4/8/16/32)
474 |                 unsigned bcstszidx = FD_OP_SIZELG(instr, i) - FD_OP_BCSTSZLG(instr, i) - 1;
475 |                 const char* bcstsizes = "\6{1to2} \6{1to4} \6{1to8} \7{1to16}\7{1to32}         ";
476 |                 const char* bcstsize = bcstsizes + bcstszidx * 8;
477 |                 buf = fd_strpcat(buf, (struct FdStr) { bcstsize+1, *bcstsize });
478 |             }
479 |         } else if (op_type == FD_OT_IMM || op_type == FD_OT_OFF) {
480 |             uint64_t immediate = FD_OP_IMM(instr, i);
481 |             // Some instructions have actually two immediate operands which are
482 |             // decoded as a single operand. Split them here appropriately.
483 |             switch (FD_TYPE(instr)) {
484 |             default:
485 |                 goto nosplitimm;
486 |             case FDI_SSE_EXTRQ:
487 |             case FDI_SSE_INSERTQ:
488 |                 buf = fd_strpcatnum(buf, immediate & 0xff);
489 |                 buf = fd_strpcat(buf, fd_stre(", "));
490 |                 immediate = (immediate >> 8) & 0xff;
491 |                 break;
492 |             case FDI_ENTER:
493 |                 buf = fd_strpcatnum(buf, immediate & 0xffff);
494 |                 buf = fd_strpcat(buf, fd_stre(", "));
495 |                 immediate = (immediate >> 16) & 0xff;
496 |                 break;
497 |             case FDI_JMPF:
498 |             case FDI_CALLF:
499 |                 buf = fd_strpcatnum(buf, (immediate >> (8 << size)) & 0xffff);
500 |                 *buf++ = ':';
501 |                 // immediate is masked below.
502 |                 break;
503 |             }
504 | 
505 |         nosplitimm:
506 |             if (op_type == FD_OT_OFF)
507 |                 immediate += addr + FD_SIZE(instr);
508 |             if (size == 0)
509 |                 immediate &= 0xff;
510 |             else if (size == 1)
511 |                 immediate &= 0xffff;
512 |             else if (size == 2)
513 |                 immediate &= 0xffffffff;
514 |             buf = fd_strpcatnum(buf, immediate);
515 |         }
516 | 
517 |         if (i == 0 && FD_MASKREG(instr)) {
518 |             *buf++ = '{';
519 |             buf = fd_strpcatreg(buf, FD_RT_MASK, FD_MASKREG(instr), 0);
520 |             *buf++ = '}';
521 |             if (FD_MASKZERO(instr))
522 |                 buf = fd_strpcat(buf, fd_stre("{z}"));
523 |         }
524 |     }
525 |     if (UNLIKELY(FD_ROUNDCONTROL(instr) != FD_RC_MXCSR)) {
526 |         switch (FD_ROUNDCONTROL(instr)) {
527 |         case FD_RC_RN: buf = fd_strpcat(buf, fd_stre(", {rn-sae}")); break;
528 |         case FD_RC_RD: buf = fd_strpcat(buf, fd_stre(", {rd-sae}")); break;
529 |         case FD_RC_RU: buf = fd_strpcat(buf, fd_stre(", {ru-sae}")); break;
530 |         case FD_RC_RZ: buf = fd_strpcat(buf, fd_stre(", {rz-sae}")); break;
531 |         case FD_RC_SAE: buf = fd_strpcat(buf, fd_stre(", {sae}")); break;
532 |         default: break; // should not happen
533 |         }
534 |     }
535 |     *buf++ = '\0';
536 |     return buf;
537 | }
538 | 
539 | void
540 | fd_format(const FdInstr* instr, char* buffer, size_t len)
541 | {
542 |     fd_format_abs(instr, 0, buffer, len);
543 | }
544 | 
545 | void
546 | fd_format_abs(const FdInstr* instr, uint64_t addr, char* restrict buffer, size_t len) {
547 |     char tmp[128];
548 |     char* buf = buffer;
549 |     if (UNLIKELY(len < 128)) {
550 |         if (!len)
551 |             return;
552 |         buf = tmp;
553 |     }
554 | 
555 |     char* end = fd_format_impl(buf, instr, addr);
556 | 
557 |     if (buf != buffer) {
558 |         unsigned i;
559 |         for (i = 0; i < (end - tmp) && i < len-1; i++)
560 |             buffer[i] = tmp[i];
561 |         buffer[i] = '\0';
562 |     }
563 | }
564 | 


--------------------------------------------------------------------------------
/meson.build:
--------------------------------------------------------------------------------
  1 | project('fadec', ['c'], default_options: ['warning_level=3', 'c_std=c11'],
  2 |         meson_version: '>=0.49')
  3 | 
  4 | python3 = find_program('python3')
  5 | 
  6 | # Check Python version
  7 | py_version_res = run_command(python3, ['--version'], check: true)
  8 | py_version = py_version_res.stdout().split(' ')[1]
  9 | if not py_version.version_compare('>=3.6')
 10 |   error('Python 3.6 required, got @0@'.format(py_version))
 11 | endif
 12 | 
 13 | has_cpp = add_languages('cpp', required: false)
 14 | 
 15 | cc = meson.get_compiler('c')
 16 | if cc.has_argument('-fstrict-aliasing')
 17 |   add_project_arguments('-fstrict-aliasing', language: 'c')
 18 | endif
 19 | if get_option('warning_level').to_int() >= 3
 20 |   extra_warnings = [
 21 |     '-Wmissing-prototypes', '-Wshadow',  '-Wwrite-strings', '-Wswitch-default',
 22 |     '-Winline', '-Wstrict-prototypes', '-Wundef',
 23 |     # We have strings longer than 4095 characters
 24 |     '-Wno-overlength-strings',
 25 |     # GCC 8 requires an extra option for strict cast alignment checks, Clang
 26 |     # always warns, even on architectures without alignment requirements.
 27 |     '-Wcast-align', '-Wcast-align=strict',
 28 |   ]
 29 |   add_project_arguments(cc.get_supported_arguments(extra_warnings), language: 'c')
 30 | endif
 31 | if cc.get_argument_syntax() == 'msvc'
 32 |   # Disable some warnings to align warnings with GCC and Clang:
 33 |   add_project_arguments('-D_CRT_SECURE_NO_WARNINGS',
 34 |                         '/wd4018', # - Signed/unsigned comparison
 35 |                         '/wd4146', # - Unary minus operator applied to unsigned
 36 |                                    #   type, result still unsigned
 37 |                         '/wd4244', # - Possible loss of data in conversion
 38 |                                    #   from integer type to smaller integer type
 39 |                         '/wd4245', # - Signed/unsigned assignment
 40 |                         '/wd4267', # - Possible loss of data in conversion
 41 |                                    #   from size_t to smaller type
 42 |                         '/wd4310', # - Possible loss of data in conversion
 43 |                                    #   of constant value to smaller type
 44 |                         language: 'c')
 45 | endif
 46 | if cc.get_id() == 'msvc' and has_cpp
 47 |   cxx = meson.get_compiler('cpp')
 48 |   if cxx.get_id() == 'msvc'
 49 |     # Enable standard conformant preprocessor
 50 |     add_project_arguments(cxx.get_supported_arguments(['-Zc:preprocessor']), language: 'cpp')
 51 |   endif
 52 | endif
 53 | 
 54 | sources = []
 55 | headers = []
 56 | components = []
 57 | 
 58 | if get_option('with_decode')
 59 |   components += 'decode'
 60 |   headers += files('fadec.h')
 61 |   sources += files('decode.c', 'format.c')
 62 | endif
 63 | if get_option('with_encode')
 64 |   components += 'encode'
 65 |   headers += files('fadec-enc.h')
 66 |   sources += files('encode.c')
 67 | endif
 68 | if get_option('with_encode2')
 69 |   components += 'encode2'
 70 |   headers += files('fadec-enc2.h')
 71 |   sources += files('encode2.c')
 72 | endif
 73 | 
 74 | generate_args = []
 75 | if get_option('archmode') != 'only64'
 76 |   generate_args += ['--32']
 77 | endif
 78 | if get_option('archmode') != 'only32'
 79 |   generate_args += ['--64']
 80 | endif
 81 | if get_option('with_undoc')
 82 |   generate_args += ['--with-undoc']
 83 | endif
 84 | if not meson.is_subproject()
 85 |   generate_args += ['--stats']
 86 | endif
 87 | 
 88 | tables = []
 89 | foreach component : components
 90 |   tables += custom_target('@0@_table'.format(component),
 91 |                           command: [python3, '@INPUT0@', component,
 92 |                                     '@INPUT1@', '@OUTPUT@'] + generate_args,
 93 |                           input: files('parseinstrs.py', 'instrs.txt'),
 94 |                           output: ['fadec-@0@-public.inc'.format(component),
 95 |                                    'fadec-@0@-private.inc'.format(component)],
 96 |                           install: true,
 97 |                           install_dir: [get_option('includedir'), false])
 98 | endforeach
 99 | 
100 | libfadec = static_library('fadec', sources, tables, install: true)
101 | fadec = declare_dependency(link_with: libfadec,
102 |                            include_directories: include_directories('.'),
103 |                            sources: tables)
104 | install_headers(headers)
105 | 
106 | foreach component : components
107 |   test(component, executable('@0@-test'.format(component),
108 |                              '@0@-test.c'.format(component),
109 |                              dependencies: fadec))
110 |   if component == 'encode2' and has_cpp
111 |     test(component + '-cpp', executable('@0@-test-cpp'.format(component),
112 |                                         '@0@-test.cc'.format(component),
113 |                                         dependencies: fadec))
114 |   endif
115 | endforeach
116 | 
117 | if meson.version().version_compare('>=0.54.0')
118 |   meson.override_dependency('fadec', fadec)
119 | endif
120 | 
121 | pkg = import('pkgconfig')
122 | pkg.generate(libraries: libfadec,
123 |              version: '0.1',
124 |              name: 'fadec',
125 |              filebase: 'fadec',
126 |              description: 'Fast Decoder for x86-32 and x86-64')
127 | 


--------------------------------------------------------------------------------
/meson_options.txt:
--------------------------------------------------------------------------------
1 | option('archmode', type: 'combo', choices: ['both', 'only32', 'only64'])
2 | option('with_undoc', type: 'boolean', value: false)
3 | option('with_decode', type: 'boolean', value: true)
4 | option('with_encode', type: 'boolean', value: true)
5 | # encode2 is off-by-default to reduce size and compile-time
6 | option('with_encode2', type: 'boolean', value: false)
7 | 


--------------------------------------------------------------------------------
/parseinstrs.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/python3
   2 | 
   3 | import argparse
   4 | import bisect
   5 | from collections import OrderedDict, defaultdict, namedtuple, Counter
   6 | from enum import Enum
   7 | from itertools import product
   8 | import re
   9 | import struct
  10 | from typing import NamedTuple, FrozenSet, List, Tuple, Union, Optional, ByteString
  11 | 
  12 | INSTR_FLAGS_FIELDS, INSTR_FLAGS_SIZES = zip(*[
  13 |     ("modrm_idx", 2),
  14 |     ("modreg_idx", 2),
  15 |     ("vexreg_idx", 2), # note: vexreg w/o vex prefix is zeroreg_val
  16 |     ("imm_idx", 2),
  17 |     ("evex_bcst", 1),
  18 |     ("evex_mask", 1),
  19 |     ("zeroreg_val", 1),
  20 |     ("lock", 1),
  21 |     ("imm_control", 3),
  22 |     ("vsib", 1),
  23 |     ("modrm_size", 2),
  24 |     ("modreg_size", 2),
  25 |     ("vexreg_size", 2),
  26 |     ("imm_size", 2),
  27 |     ("legacy", 1),
  28 |     ("unused2", 1),
  29 |     ("size_fix1", 3),
  30 |     ("size_fix2", 2),
  31 |     ("instr_width", 1),
  32 |     ("modrm_ty", 3),
  33 |     ("modreg_ty", 3),
  34 |     ("vexreg_ty", 2),
  35 |     ("imm_ty", 0),
  36 |     ("evex_rc", 2),
  37 |     ("evex_bcst16", 1),
  38 |     ("opsize", 3),
  39 |     ("modrm", 1),
  40 |     ("ign66", 1),
  41 | ][::-1])
  42 | class InstrFlags(namedtuple("InstrFlags", INSTR_FLAGS_FIELDS)):
  43 |     def __new__(cls, **kwargs):
  44 |         init = {**{f: 0 for f in cls._fields}, **kwargs}
  45 |         return super(InstrFlags, cls).__new__(cls, **init)
  46 |     def _encode(self):
  47 |         enc = 0
  48 |         for value, size in zip(self, INSTR_FLAGS_SIZES):
  49 |             enc = enc << size | (value & ((1 << size) - 1))
  50 |         return enc
  51 | 
  52 | ENCODINGS = {
  53 |     "NP": InstrFlags(),
  54 |     "M": InstrFlags(modrm=1, modrm_idx=0^3),
  55 |     "R": InstrFlags(modrm=1, modreg_idx=0^3), # AMX TILEZERO
  56 |     "M1": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=1),
  57 |     "MI": InstrFlags(modrm=1, modrm_idx=0^3, imm_idx=1^3, imm_control=4),
  58 |     "IM": InstrFlags(modrm=1, modrm_idx=1^3, imm_idx=0^3, imm_control=4),
  59 |     "MC": InstrFlags(modrm=1, modrm_idx=0^3, vexreg_idx=1^3, zeroreg_val=1),
  60 |     "MR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3),
  61 |     "RM": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3),
  62 |     "RMA": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3),
  63 |     "MRI": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, imm_idx=2^3, imm_control=4),
  64 |     "RMI": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, imm_idx=2^3, imm_control=4),
  65 |     "MRC": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, vexreg_idx=2^3, zeroreg_val=1),
  66 |     "AM": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3),
  67 |     "MA": InstrFlags(modrm=1, modrm_idx=0^3, vexreg_idx=1^3),
  68 |     "I": InstrFlags(imm_idx=0^3, imm_control=4),
  69 |     "IA": InstrFlags(vexreg_idx=0^3, imm_idx=1^3, imm_control=4),
  70 |     "O": InstrFlags(modrm_idx=0^3),
  71 |     "OI": InstrFlags(modrm_idx=0^3, imm_idx=1^3, imm_control=4),
  72 |     "OA": InstrFlags(modrm_idx=0^3, vexreg_idx=1^3),
  73 |     "S": InstrFlags(modreg_idx=0^3), # segment register in bits 3,4,5
  74 |     "A": InstrFlags(vexreg_idx=0^3),
  75 |     "D": InstrFlags(imm_idx=0^3, imm_control=6),
  76 |     "FD": InstrFlags(vexreg_idx=0^3, imm_idx=1^3, imm_control=2),
  77 |     "TD": InstrFlags(vexreg_idx=1^3, imm_idx=0^3, imm_control=2),
  78 | 
  79 |     "RVM": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3),
  80 |     "RVMI": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=4),
  81 |     "RVMR": InstrFlags(modrm=1, modrm_idx=2^3, modreg_idx=0^3, vexreg_idx=1^3, imm_idx=3^3, imm_control=3),
  82 |     "RMV": InstrFlags(modrm=1, modrm_idx=1^3, modreg_idx=0^3, vexreg_idx=2^3),
  83 |     "VM": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3),
  84 |     "VMI": InstrFlags(modrm=1, modrm_idx=1^3, vexreg_idx=0^3, imm_idx=2^3, imm_control=4),
  85 |     "MVR": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=2^3, vexreg_idx=1^3),
  86 |     "MRV": InstrFlags(modrm=1, modrm_idx=0^3, modreg_idx=1^3, vexreg_idx=2^3),
  87 | }
  88 | ENCODING_OPTYS = ["modrm", "modreg", "vexreg", "imm"]
  89 | ENCODING_OPORDER = { enc: sorted(ENCODING_OPTYS, key=lambda ty: getattr(ENCODINGS[enc], ty+"_idx")^3) for enc in ENCODINGS}
  90 | 
  91 | OPKIND_CANONICALIZE = {
  92 |     "I": "IMM", # immediate
  93 |         "A": "IMM", # Direct address, far jmp
  94 |     "J": "IMM", # RIP-relative address
  95 |     "M": "MEM", # ModRM.r/m selects memory only
  96 |         "O": "MEM", # Direct address, FD/TD encoding
  97 |     "R": "GP", # ModRM.r/m selects GP
  98 |         "B": "GP", # VEX.vvvv selects GP
  99 |         "E": "GP", # ModRM.r/m selects GP or memory
 100 |         "G": "GP", # ModRM.reg selects GP
 101 |     "P": "MMX", # ModRM.reg selects MMX
 102 |         "N": "MMX", # ModRM.r/m selects MMX
 103 |         "Q": "MMX", # ModRM.r/m selects MMX or memory
 104 |     "V": "XMM", # ModRM.reg selects XMM
 105 |         "H": "XMM", # VEX.vvvv selects XMM
 106 |         "L": "XMM", # bits7:4 of imm8 select XMM
 107 |         "U": "XMM", # ModRM.r/m selects XMM
 108 |         "W": "XMM", # ModRM.r/m selects XMM or memory
 109 |     "S": "SEG", # ModRM.reg selects SEG
 110 |     "C": "CR", # ModRM.reg selects CR
 111 |     "D": "DR", # ModRM.reg selects DR
 112 | 
 113 |     # Custom names
 114 |     "F": "FPU", # F is used for RFLAGS by Intel
 115 |     "K": "MASK",
 116 |     "T": "TMM",
 117 |     "Z": "BND",
 118 | }
 119 | OPKIND_SIZES = {
 120 |     "b": 1,
 121 |     "w": 2,
 122 |     "d": 4,
 123 |     "ss": 4, # Scalar single of XMM (d)
 124 |     "q": 8,
 125 |     "sd": 8, # Scalar double of XMM (q)
 126 |     "t": 10, # FPU/ten-byte
 127 |     "dq": 16,
 128 |     "qq": 32,
 129 |     "oq": 64, # oct-quadword
 130 |     "": 0, # for MEMZ
 131 |     "v": -1, # operand size (w/d/q)
 132 |     "y": -1, # operand size (d/q)
 133 |     "z": -1, # w/d (immediates, min(operand size, 4))
 134 |     "a": -1, # z:z
 135 |     "p": -1, # w:z
 136 |     "x": -2, # vector size
 137 |     "h": -3, # half x
 138 |     "f": -4, # fourth x
 139 |     "e": -5, # eighth x
 140 |     "pd": -2, # packed double (x)
 141 |     "ps": -2, # packed single (x)
 142 | 
 143 |     # Custom names
 144 |     "bs": -1, # sign-extended immediate
 145 |     "zd": 4, # z-immediate, but always 4-byte operand
 146 |     "zq": 8, # z-immediate, but always 8-byte operand
 147 | }
 148 | class OpKind(NamedTuple):
 149 |     regkind: str
 150 |     sizestr: str
 151 | 
 152 |     SZ_OP = -1
 153 |     SZ_VEC = -2
 154 |     SZ_VEC_HALF = -3
 155 |     SZ_VEC_QUARTER = -4
 156 |     SZ_VEC_EIGHTH = -5
 157 | 
 158 |     def abssize(self, opsz=None, vecsz=None):
 159 |         res = opsz if self.size == self.SZ_OP else \
 160 |               vecsz if self.size == self.SZ_VEC else \
 161 |               vecsz >> 1 if self.size == self.SZ_VEC_HALF else \
 162 |               vecsz >> 2 if self.size == self.SZ_VEC_QUARTER else \
 163 |               vecsz >> 3 if self.size == self.SZ_VEC_EIGHTH else self.size
 164 |         if res is None:
 165 |             raise Exception("unspecified operand size")
 166 |         return res
 167 |     def immsize(self, opsz):
 168 |         maxsz = 1 if self.sizestr == "bs" else 4 if self.sizestr[0] == "z" else 8
 169 |         return min(maxsz, self.abssize(opsz))
 170 |     @property
 171 |     def kind(self):
 172 |         return OPKIND_CANONICALIZE[self.regkind]
 173 |     @property
 174 |     def size(self):
 175 |         return OPKIND_SIZES[self.sizestr]
 176 |     @classmethod
 177 |     def parse(cls, op):
 178 |         return cls(op[0], op[1:])
 179 | 
 180 |     def __eq__(self, other):
 181 |         # Custom equality for canonicalization of kind/size.
 182 |         return isinstance(other, OpKind) and self.kind == other.kind and self.size == other.size
 183 | 
 184 | class InstrDesc(NamedTuple):
 185 |     mnemonic: str
 186 |     encoding: str
 187 |     operands: Tuple[str, ...]
 188 |     flags: FrozenSet[str]
 189 | 
 190 |     OPKIND_REGTYS = {
 191 |         ("modrm", "GP"): 1,   ("modreg", "GP"): 1,   ("vexreg", "GP"): 1,
 192 |         ("modrm", "XMM"): 0,  ("modreg", "XMM"): 0,  ("vexreg", "XMM"): 0,
 193 |         ("modrm", "MMX"): 5,  ("modreg", "MMX"): 5,
 194 |         ("modrm", "FPU"): 4,                         ("vexreg", "FPU"): 3,
 195 |         ("modrm", "TMM"): 6,  ("modreg", "TMM"): 6,  ("vexreg", "TMM"): 3,
 196 |         ("modrm", "MASK"): 7, ("modreg", "MASK"): 7, ("vexreg", "MASK"): 2,
 197 |                               ("modreg", "SEG"): 3,
 198 |                               ("modreg", "DR"): 0, # handled in code
 199 |                               ("modreg", "CR"): 0, # handled in code
 200 |         ("modrm", "MEM"): 0,
 201 |         ("imm", "MEM"): 0, ("imm", "IMM"): 0, ("imm", "XMM"): 0,
 202 |     }
 203 |     OPKIND_SIZES = {
 204 |         0: 0, 1: 1, 2: 2, 4: 3, 8: 4, 16: 5, 32: 6, 64: 7, 10: 0,
 205 |         # OpKind.SZ_OP: -2, OpKind.SZ_VEC: -3, OpKind.SZ_HALFVEC: -4,
 206 |     }
 207 | 
 208 |     @classmethod
 209 |     def parse(cls, desc):
 210 |         desc = desc.split()
 211 |         mnem, _, compactDesc = desc[5].partition("+")
 212 |         flags = frozenset(desc[6:] + [{
 213 |             "w": "INSTR_WIDTH",
 214 |             "a": "U67",
 215 |             "s": "USEG",
 216 |             "k": "MASK",
 217 |             "b": "BCST",
 218 |             "e": "SAE",
 219 |             "r": "ER",
 220 |         }[c] for c in compactDesc])
 221 |         operands = tuple(OpKind.parse(op) for op in desc[1:5] if op != "-")
 222 |         return cls(mnem, desc[0], operands, flags)
 223 | 
 224 |     def imm_size(self, opsz):
 225 |         flags = ENCODINGS[self.encoding]
 226 |         if flags.imm_control < 3:
 227 |             return 0
 228 |         if flags.imm_control == 3:
 229 |             return 1
 230 |         if self.mnemonic == "ENTER":
 231 |             return 3
 232 |         return self.operands[flags.imm_idx^3].immsize(opsz)
 233 | 
 234 |     def dynsizes(self):
 235 |         dynopsz = set(op.size for op in self.operands if op.size < 0)
 236 |         if {"INSTR_WIDTH", "SZ8"} & self.flags: dynopsz.add(OpKind.SZ_OP)
 237 |         if OpKind.SZ_OP in dynopsz and len(dynopsz) > 1:
 238 |             raise Exception(f"conflicting dynamic operand sizes in {self}")
 239 |         return dynopsz
 240 | 
 241 |     def encode(self, mnem, ign66, modrm):
 242 |         flags = ENCODINGS[self.encoding]
 243 |         extraflags = {}
 244 | 
 245 |         dynopsz = self.dynsizes()
 246 |         # Operand size either refers to vectors or GP, but not both
 247 |         if dynopsz and OpKind.SZ_OP not in dynopsz: # Vector operand size
 248 |             if self.flags & {"SZ8", "D64", "F64", "INSTR_WIDTH", "LOCK", "U66"}:
 249 |                 raise Exception(f"incompatible flags in {self}")
 250 |             # Allow at most the vector size together with one alternative
 251 |             dynsizes = [OpKind.SZ_VEC] + list(dynopsz - {OpKind.SZ_VEC})
 252 |             extraflags["opsize"] = 4 | (OpKind.SZ_VEC - dynsizes[-1])
 253 |             if len(dynsizes) > 2:
 254 |                 raise Exception(f"conflicting vector operand sizes in {self}")
 255 |         else: # either empty or GP operand size
 256 |             dynsizes = [OpKind.SZ_OP]
 257 |             if "SZ8" in self.flags:
 258 |                 dynsizes = []
 259 |             if "D64" in self.flags: extraflags["opsize"] = 2
 260 |             if "F64" in self.flags: extraflags["opsize"] = 3
 261 |             extraflags["lock"] = "LOCK" in self.flags
 262 | 
 263 |         if (self.flags & {"SZ8", "INSTR_WIDTH"} or
 264 |             mnem in ("MOVSX", "MOVZX", "XCHG_NOP", "3DNOW")):
 265 |             extraflags["legacy"] = 1
 266 |             # INSTR_WIDTH defaults to zero, so only enable when SZ8 is unset
 267 |             if "INSTR_WIDTH" in self.flags and "SZ8" not in self.flags:
 268 |                 extraflags["instr_width"] = 1
 269 | 
 270 |         imm_byte = self.imm_size(4) == 1
 271 |         extraflags["imm_control"] = flags.imm_control | imm_byte
 272 | 
 273 |         # Sort fixed sizes encodable in size_fix2 as second element.
 274 |         # But: byte-sized immediates are handled specially and don't cost space.
 275 |         fixed = set(self.OPKIND_SIZES[op.size] for op in self.operands if
 276 |                     op.size >= 0 and not (imm_byte and op.kind == "IMM"))
 277 |         fixed = sorted(fixed, key=lambda x: 1 <= x <= 4)
 278 |         if len(fixed) > 2 or (len(fixed) == 2 and not (1 <= fixed[1] <= 4)):
 279 |             raise Exception(f"invalid fixed sizes {fixed} in {self}")
 280 |         sizes = (fixed + [1, 1])[:2] + dynsizes # See operand_sizes in decode.c.
 281 |         extraflags["size_fix1"] = sizes[0]
 282 |         extraflags["size_fix2"] = sizes[1] - 1
 283 | 
 284 |         for i, opkind in enumerate(self.operands):
 285 |             sz = self.OPKIND_SIZES[opkind.size] if opkind.size >= 0 else opkind.size
 286 |             if opkind.kind == "IMM":
 287 |                 if imm_byte and sz not in [1] + dynsizes[:1]:
 288 |                     raise Exception(f"imm_byte with opsize {sz} in {self}")
 289 |                 extraflags[f"imm_size"] = sz == 1 if imm_byte else sizes.index(sz)
 290 |             else:
 291 |                 opname = ENCODING_OPORDER[self.encoding][i]
 292 |                 extraflags[f"{opname}_size"] = sizes.index(sz)
 293 |                 extraflags[f"{opname}_ty"] = self.OPKIND_REGTYS[opname, opkind.kind]
 294 | 
 295 |         # Miscellaneous Flags
 296 |         if "VSIB" in self.flags:        extraflags["vsib"] = 1
 297 |         if "BCST" in self.flags:        extraflags["evex_bcst"] = 1
 298 |         if "BCST16" in self.flags:      extraflags["evex_bcst16"] = 1
 299 |         if "MASK" in self.flags:        extraflags["evex_mask"] = 1
 300 |         if "SAE" in self.flags:         extraflags["evex_rc"] = 1
 301 |         if "ER" in self.flags:          extraflags["evex_rc"] = 3
 302 |         if modrm:                       extraflags["modrm"] = 1
 303 | 
 304 |         if "U66" not in self.flags and (ign66 or "I66" in self.flags):
 305 |             extraflags["ign66"] = 1
 306 | 
 307 |         enc = flags._replace(**extraflags)._encode()
 308 |         enc = tuple((enc >> i) & 0xffff for i in range(0, 48, 16))
 309 |         # First 2 bytes are the mnemonic, last 6 bytes are the encoding.
 310 |         return f"{{FDI_{mnem}, {enc[0]}, {enc[1]}, {enc[2]}}}"
 311 | 
 312 | class EntryKind(Enum):
 313 |     NONE = 0x00
 314 |     PREFIX = 0x10
 315 |     INSTR = 0x20
 316 |     WEAKINSTR = 0x30
 317 |     TABLE16 = 0x01
 318 |     TABLE8E = 0x11
 319 |     ESCAPE = 0x02
 320 |     TABLE256 = 0x12
 321 |     TABLE_VEX = 0x22
 322 |     TABLE_PREFIX = 0x03
 323 |     TABLE_ROOT = -1
 324 |     @property
 325 |     def is_table(self):
 326 |         return self != EntryKind.INSTR and self != EntryKind.WEAKINSTR and self != EntryKind.PREFIX
 327 | 
 328 | opcode_regex = re.compile(
 329 |      r"^(?:(?P<prefixes>(?P<vex>E?VEX\.)?(?P<legacy>NP|66|F2|F3|NFx)\." +
 330 |                      r"(?:W(?P<rexw>[01])\.)?(?:L(?P<vexl>0|1|12|2|IG)\.)?))?" +
 331 |      r"(?P<escape>0f38|0f3a|0f|M[567]\.|)" +
 332 |      r"(?P<opcode>[0-9a-f]{2})" +
 333 |      r"(?:/(?P<modreg>[0-7]|[rm][0-7]?|[0-7][rm])|(?P<opcext>[c-f][0-9a-f]))?(?P<extended>\+)?$")
 334 | 
 335 | class Opcode(NamedTuple):
 336 |     prefix: Union[None, str] # None/NP/66/F2/F3/NFx
 337 |     escape: int # [0, 0f, 0f38, 0f3a]
 338 |     opc: int
 339 |     extended: bool # Extend opc or opcext in ModRM.rm, if present
 340 |     # Fixed ModRM.mod ("r"/"m"), ModRM.reg, ModRM.rm (opcext + AMX)
 341 |     modrm: Tuple[Union[None, str], Union[None, int], Union[None, int]]
 342 |     vex: int # 0 = legacy, 1 = VEX, 2 = EVEX
 343 |     vexl: Union[str, None] # 0, 1, 12, 2, IG, None = used, both
 344 |     rexw: Union[str, None] # 0, 1, None = both (or ignored)
 345 | 
 346 |     @classmethod
 347 |     def parse(cls, opcode_string):
 348 |         match = opcode_regex.match(opcode_string)
 349 |         if match is None:
 350 |             raise Exception(opcode_string)
 351 |             return None
 352 | 
 353 |         opcext = int(match.group("opcext") or "0", 16)
 354 |         modreg = match.group("modreg")
 355 |         if opcext:
 356 |             modrm = "r", (opcext >> 3) & 7, opcext & 7
 357 |         elif modreg:
 358 |             if modreg[0] in "rm":
 359 |                 modrm = modreg[0], None, int(modreg[1:]) if modreg[1:] else None
 360 |             else:
 361 |                 modrm = modreg[1:] or None, int(modreg[0]), None
 362 |         else:
 363 |             modrm = None, None, None
 364 | 
 365 |         return cls(
 366 |             prefix=match.group("legacy"),
 367 |             escape=["", "0f", "0f38", "0f3a", "M4.", "M5.", "M6.", "M7."].index(match.group("escape")),
 368 |             opc=int(match.group("opcode"), 16),
 369 |             extended=match.group("extended") is not None,
 370 |             modrm=modrm,
 371 |             vex=[None, "VEX.", "EVEX."].index(match.group("vex")),
 372 |             vexl=match.group("vexl"),
 373 |             rexw=match.group("rexw"),
 374 |         )
 375 | 
 376 | def verifyOpcodeDesc(opcode, desc):
 377 |     flags = ENCODINGS[desc.encoding]
 378 |     oporder = ENCODING_OPORDER[desc.encoding]
 379 |     expected_immkinds = ["", "I", "O", "L", "IA", "", "J"][flags.imm_control]
 380 |     fixed_mod = opcode.modrm[0]
 381 |     if opcode.extended or desc.mnemonic in ("MOV_CR2G", "MOV_DR2G", "MOV_G2CR", "MOV_G2DR"):
 382 |         fixed_mod = "r"
 383 |     expected_modrmkinds = {None: "EQWFKT", "r": "RNUFKT", "m": "M"}[fixed_mod]
 384 |     # allow F and R for zeroreg, which we overlap with vexreg
 385 |     expected_vexkinds = "BHKT" if opcode.vex else "BHRF"
 386 |     for i, opkind in enumerate(desc.operands):
 387 |         if oporder[i] == "modrm" and opkind.regkind not in expected_modrmkinds:
 388 |             raise Exception(f"modrm operand-regkind mismatch {opcode}, {desc}")
 389 |         if oporder[i] == "modreg" and opkind.regkind not in "GPVSCDFKT":
 390 |             raise Exception(f"modreg operand-regkind mismatch {opcode}, {desc}")
 391 |         if oporder[i] == "vexreg" and opkind.regkind not in expected_vexkinds:
 392 |             raise Exception(f"vexreg operand-regkind mismatch {opcode}, {desc}")
 393 |         if oporder[i] == "imm" and opkind.regkind not in expected_immkinds:
 394 |             raise Exception(f"imm operand-regkind mismatch {opcode}, {desc}")
 395 |     if "INSTR_WIDTH" in desc.flags and len(desc.operands) > 3:
 396 |         raise Exception(f"+w with four operands {opcode}, {desc}")
 397 |     if opcode.escape == 2 and flags.imm_control != 0:
 398 |         raise Exception(f"0f38 has no immediate operand {opcode}, {desc}")
 399 |     if opcode.escape == 3 and desc.imm_size(4) != 1:
 400 |         raise Exception(f"0f3a must have immediate byte {opcode}, {desc}")
 401 |     if opcode.escape == 0 and opcode.prefix is not None:
 402 |         raise Exception(f"unescaped opcode has prefix {opcode}, {desc}")
 403 |     if opcode.escape == 0 and opcode.vexl is not None:
 404 |         raise Exception(f"unescaped opcode has L specifier {opcode}, {desc}")
 405 |     if opcode.escape == 0 and opcode.rexw is not None:
 406 |         raise Exception(f"unescaped opcode has W specifier {opcode}, {desc}")
 407 |     if opcode.escape == 0 and opcode.vex:
 408 |         raise Exception(f"VEX opcode without escape {opcode}, {desc}")
 409 |     if opcode.vex and opcode.extended:
 410 |         raise Exception(f"VEX/EVEX must not be extended {opcode}, {desc}")
 411 |     if opcode.vex and opcode.prefix not in ("NP", "66", "F2", "F3"):
 412 |         raise Exception(f"VEX/EVEX must have mandatory prefix {opcode}, {desc}")
 413 |     if opcode.vexl == "IG" and desc.dynsizes() - {OpKind.SZ_OP}:
 414 |         raise Exception(f"(E)VEX.LIG with dynamic vector size {opcode}, {desc}")
 415 |     if "VSIB" in desc.flags and opcode.modrm[0] != "m":
 416 |         raise Exception(f"VSIB for non-memory opcode {opcode}, {desc}")
 417 |     if opcode.vex == 2 and flags.vexreg_idx:
 418 |         # Checking this here allows to omit check for V' in decoder.
 419 |         if desc.operands[flags.vexreg_idx ^ 3].kind != "XMM":
 420 |             raise Exception(f"EVEX.vvvv must refer to XMM {opcode}, {desc}")
 421 |     if opcode.vex == 2 and flags.modreg_idx and flags.modreg_idx ^ 3 != 0:
 422 |         # EVEX.z=0 is only checked for mask operands in ModReg
 423 |         if desc.operands[flags.modreg_idx ^ 3].kind == "MASK":
 424 |             raise Exception(f"ModRM.reg mask not first operand {opcode}, {desc}")
 425 |     # Verify tuple type
 426 |     if opcode.vex == 2 and opcode.modrm[0] != "r":
 427 |         tts = [s for s in desc.flags if s.startswith("TUPLE")]
 428 |         if len(tts) != 1:
 429 |             raise Exception(f"missing tuple type in {opcode}, {desc}")
 430 |         if flags.modrm_idx == 3 ^ 3:
 431 |             raise Exception(f"missing memory operand {opcode}, {desc}")
 432 |         # From Intel SDM
 433 |         bcst, evexw, vszs = {
 434 |             "TUPLE_FULL_16":      (2,    "0",  (  16,   32,   64)),
 435 |             "TUPLE_FULL_32":      (4,    "0",  (  16,   32,   64)),
 436 |             "TUPLE_FULL_64":      (8,    "1",  (  16,   32,   64)),
 437 |             "TUPLE_HALF_16":      (2,    "0",  (   8,   16,   32)),
 438 |             "TUPLE_HALF_32":      (4,    "0",  (   8,   16,   32)),
 439 |             "TUPLE_HALF_64":      (8,    "1",  (   8,   16,   32)),
 440 |             "TUPLE_QUARTER_16":   (2,    "0",  (   4,    8,   16)),
 441 |             "TUPLE_FULL_MEM":     (None, None, (  16,   32,   64)),
 442 |             "TUPLE_HALF_MEM":     (None, None, (   8,   16,   32)),
 443 |             "TUPLE_QUARTER_MEM":  (None, None, (   4,    8,   16)),
 444 |             "TUPLE_EIGHTH_MEM":   (None, None, (   2,    4,    8)),
 445 |             "TUPLE1_SCALAR_8":    (None, None, (   1,    1,    1)),
 446 |             "TUPLE1_SCALAR_16":   (None, None, (   2,    2,    2)),
 447 |             "TUPLE1_SCALAR_32":   (None, "0",  (   4,    4,    4)),
 448 |             "TUPLE1_SCALAR_64":   (None, "1",  (   8,    8,    8)),
 449 |             "TUPLE1_SCALAR_OPSZ": (None, None, (   0,    0,    0)),
 450 |             "TUPLE1_FIXED_32":    (None, None, (   4,    4,    4)),
 451 |             "TUPLE1_FIXED_64":    (None, None, (   8,    8,    8)),
 452 |             "TUPLE2_32":          (None, "0",  (   8,    8,    8)),
 453 |             "TUPLE2_64":          (None, "1",  (None,   16,   16)),
 454 |             "TUPLE4_32":          (None, "0",  (None,   16,   16)),
 455 |             "TUPLE4_64":          (None, "1",  (None, None,   32)),
 456 |             "TUPLE8_32":          (None, "0",  (None, None,   32)),
 457 |             "TUPLE_MEM128":       (None, None, (  16,   16,   16)),
 458 |             # TODO: Fix MOVDDUP tuple size :(
 459 |             "TUPLE_MOVDDUP":      (None, None, (  16,   32,   64)),
 460 |         }[tts[0]]
 461 |         if "BCST" in desc.flags:
 462 |             if bcst is None:
 463 |                 raise Exception(f"broadcast on incompatible type {opcode}, {desc}")
 464 |             if ("BCST16" in desc.flags) != (bcst == 2):
 465 |                 raise Exception(f"bcst16 mismatch, should be {bcst} {opcode}, {desc}")
 466 |         # EVEX.W is used to distinguish 4/8-byte broadcast size
 467 |         if evexw and opcode.rexw != evexw:
 468 |             raise Exception(f"incompatible EVEX.W {opcode}, {desc}")
 469 |         for l, tupsz in enumerate(vszs):
 470 |             opsz = desc.operands[flags.modrm_idx ^ 3].abssize(0, 16 << l)
 471 |             if tupsz is not None and opsz != tupsz:
 472 |                 raise Exception(f"memory size {opsz} != {tupsz} {opcode}, {desc}")
 473 | 
 474 | class Trie:
 475 |     KIND_ORDER = (EntryKind.TABLE_ROOT, EntryKind.ESCAPE, EntryKind.TABLE256,
 476 |                   EntryKind.TABLE_PREFIX, EntryKind.TABLE16,
 477 |                   EntryKind.TABLE8E, EntryKind.TABLE_VEX)
 478 |     TABLE_LENGTH = {
 479 |         EntryKind.TABLE_ROOT: 256,
 480 |         EntryKind.ESCAPE: 8,
 481 |         EntryKind.TABLE256: 256,
 482 |         EntryKind.TABLE_PREFIX: 4,
 483 |         EntryKind.TABLE16: 16,
 484 |         EntryKind.TABLE8E: 8,
 485 |         EntryKind.TABLE_VEX: 8,
 486 |     }
 487 | 
 488 |     def __init__(self, root_count):
 489 |         self.trie = []
 490 |         self.trie.append([None] * root_count)
 491 |         self.kindmap = defaultdict(list)
 492 | 
 493 |     def _add_table(self, kind):
 494 |         self.trie.append([None] * self.TABLE_LENGTH[kind])
 495 |         self.kindmap[kind].append(len(self.trie) - 1)
 496 |         return len(self.trie) - 1
 497 | 
 498 |     def _clone(self, elem):
 499 |         if not elem or not elem[0].is_table:
 500 |             return elem
 501 |         new_num = self._add_table(elem[0])
 502 |         self.trie[new_num] = [self._clone(e) for e in self.trie[elem[1]]]
 503 |         return elem[0], new_num
 504 | 
 505 |     def _transform_opcode(self, opc):
 506 |         realopcext = opc.extended and opc.modrm[2] is None
 507 |         topc = [opc.opc + i for i in range(8 if realopcext else 1)]
 508 |         if opc.escape == 0 and opc.opc in (0xc4, 0xc5, 0x62):
 509 |             assert opc.prefix is None
 510 |             assert opc.modrm == ("m", None, None)
 511 |             assert opc.rexw is None
 512 |             assert opc.vexl is None
 513 |             # We do NOT encode /m, this is handled by prefix code.
 514 |             # Order must match KIND_ORDER.
 515 |             return topc, [0], None, None, None, None, None
 516 |         elif opc.escape == 0:
 517 |             troot, tescape, topc = topc, None, None
 518 |         else:
 519 |             troot = [[0x0f], [0xc4, 0xc5], [0x62]][opc.vex]
 520 |             tescape = [opc.escape]
 521 | 
 522 |         tprefix, t16, t8e, tvex = None, None, None, None
 523 |         if opc.prefix == "NFx":
 524 |             tprefix = [0, 1]
 525 |         elif opc.prefix:
 526 |             tprefix = [["NP", "66", "F3", "F2"].index(opc.prefix)]
 527 |         if opc.modrm != (None, None, None):
 528 |             # TODO: optimize for /r and /m specifiers to reduce size
 529 |             mod = {"m": [0], "r": [1], None: [0, 1]}[opc.modrm[0]]
 530 |             reg = [opc.modrm[1]] if opc.modrm[1] is not None else list(range(8))
 531 |             t16 = [x + (y << 1) for x in mod for y in reg]
 532 |             if opc.modrm[2] is not None and not opc.extended:
 533 |                 t8e = [opc.modrm[2]]
 534 |         if opc.rexw is not None or (opc.vexl or "IG") != "IG":
 535 |             rexw = {"0": [0], "1": [1<<0], None: [0, 1<<0]}[opc.rexw]
 536 |             if opc.vex < 2:
 537 |                 vexl = {"0": [0], "1": [1<<1], "IG": [0, 1<<1]}[opc.vexl or "IG"]
 538 |             else:
 539 |                 vexl = {"0": [0], "12": [1<<1, 2<<1], "2": [2<<1], "IG": [0, 1<<1, 2<<1, 3<<1]}[opc.vexl or "IG"]
 540 |             tvex = list(map(sum, product(rexw, vexl)))
 541 |         # Order must match KIND_ORDER.
 542 |         return troot, tescape, topc, tprefix, t16, t8e, tvex
 543 | 
 544 |     def add_opcode(self, opcode, descidx, root_idx, weak=False):
 545 |         opcode = self._transform_opcode(opcode)
 546 |         frontier = [(0, root_idx)]
 547 |         for elem_kind, elem in zip(self.KIND_ORDER, opcode):
 548 |             new_frontier = []
 549 |             for entry_num, entry_idx in frontier:
 550 |                 entry = self.trie[entry_num]
 551 |                 if elem is None:
 552 |                     if entry[entry_idx] is None or entry[entry_idx][0] != elem_kind:
 553 |                         new_frontier.append((entry_num, entry_idx))
 554 |                         continue
 555 |                     elem = list(range(self.TABLE_LENGTH[elem_kind]))
 556 |                 if entry[entry_idx] is None:
 557 |                     new_num = self._add_table(elem_kind)
 558 |                     entry[entry_idx] = elem_kind, new_num
 559 |                 elif entry[entry_idx][0] != elem_kind:
 560 |                     # Need to add a new node here and copy entry one level below
 561 |                     new_num = self._add_table(elem_kind)
 562 |                     # Keep original entry, but clone others recursively
 563 |                     self.trie[new_num][0] = entry[entry_idx]
 564 |                     for i in range(1, len(self.trie[new_num])):
 565 |                         self.trie[new_num][i] = self._clone(entry[entry_idx])
 566 |                     entry[entry_idx] = elem_kind, new_num
 567 |                 for elem_idx in elem:
 568 |                     new_frontier.append((entry[entry_idx][1], elem_idx))
 569 |             frontier = new_frontier
 570 |         for entry_num, entry_idx in frontier:
 571 |             entry = self.trie[entry_num]
 572 |             if not entry[entry_idx] or entry[entry_idx][0] == EntryKind.WEAKINSTR:
 573 |                 kind = EntryKind.INSTR if not weak else EntryKind.WEAKINSTR
 574 |                 entry[entry_idx] = kind, descidx << 2
 575 |             elif not weak:
 576 |                 raise Exception(f"redundant non-weak {opcode}")
 577 | 
 578 |     def add_prefix(self, byte, prefix, root_idx):
 579 |         if self.trie[0][root_idx] is None:
 580 |             self.trie[0][root_idx] = EntryKind.TABLE_ROOT, self._add_table(EntryKind.TABLE_ROOT)
 581 |         self.trie[self.trie[0][root_idx][1]][byte] = EntryKind.PREFIX, prefix
 582 | 
 583 |     def deduplicate(self):
 584 |         synonyms = {}
 585 |         for kind in self.KIND_ORDER[::-1]:
 586 |             entries = {}
 587 |             for num in self.kindmap[kind]:
 588 |                 # Replace previous synonyms
 589 |                 entry = self.trie[num]
 590 |                 for i, elem in enumerate(entry):
 591 |                     if elem and elem[0].is_table and elem[1] in synonyms:
 592 |                         entry[i] = synonyms[elem[1]]
 593 | 
 594 |                 unique_entry = tuple(entry)
 595 |                 if len(set(unique_entry)) == 1:
 596 |                     # Omit kind if all entries point to the same child
 597 |                     synonyms[num] = entry[0]
 598 |                     self.trie[num] = None
 599 |                 elif unique_entry in entries:
 600 |                     # Deduplicate entries of this kind
 601 |                     synonyms[num] = kind, entries[unique_entry]
 602 |                     self.trie[num] = None
 603 |                 else:
 604 |                     entries[unique_entry] = num
 605 | 
 606 |     def compile(self):
 607 |         offsets = [None] * len(self.trie)
 608 |         last_off = 0
 609 |         for num, entry in enumerate(self.trie[1:], start=1):
 610 |             if not entry:
 611 |                 continue
 612 |             offsets[num] = last_off
 613 |             last_off += (len(entry) + 3) & ~3
 614 |         if last_off >= 0x8000:
 615 |             raise Exception(f"maximum table size exceeded: {last_off:#x}")
 616 | 
 617 |         data = [0] * last_off
 618 |         for off, entry in zip(offsets, self.trie):
 619 |             if off is None:
 620 |                 continue
 621 |             for i, elem in enumerate(entry, start=off):
 622 |                 if elem is not None:
 623 |                     value = offsets[elem[1]] if elem[0].is_table else elem[1]
 624 |                     data[i] = value | (elem[0].value & 3)
 625 |         return tuple(data), [offsets[v] for _, v in self.trie[0]]
 626 | 
 627 |     @property
 628 |     def stats(self):
 629 |         return {k.name: sum(self.trie[e] is not None for e in v)
 630 |                 for k, v in self.kindmap.items()}
 631 | 
 632 | 
 633 | def superstring(strs):
 634 |     # This faces the "shortest superstring" problem, which is NP-hard.
 635 |     # Preprocessing: remove any strings which are already completely covered
 636 |     realstrs = []
 637 |     for s in sorted(strs, key=len, reverse=True):
 638 |         for s2 in realstrs:
 639 |             if s in s2:
 640 |                 break
 641 |         else:
 642 |             realstrs.append(s)
 643 | 
 644 |     # Greedy heuristic generally yields acceptable results, though it depends on
 645 |     # the order of the menmonics. More compact results are possible, but the
 646 |     # expectable gains of an optimal result (probably with O(n!)) are small.
 647 |     # First sort strings and later do a binary search for each possible prefix.
 648 |     realstrs.sort()
 649 |     merged = ""
 650 |     while realstrs:
 651 |         for i in range(min(16, len(merged)), 0, -1):
 652 |             idx = bisect.bisect_left(realstrs, merged[-i:])
 653 |             if idx < len(realstrs) and realstrs[idx][:i] == merged[-i:]:
 654 |                 merged += realstrs.pop(idx)[i:]
 655 |                 break
 656 |         else:
 657 |             merged += realstrs.pop()
 658 |     return merged
 659 | 
 660 | def decode_table(entries, args):
 661 |     modes = args.modes
 662 | 
 663 |     trie = Trie(root_count=len(modes))
 664 |     for i, mode in enumerate(modes):
 665 |         # Magic values must match PF_* enum in decode.c.
 666 |         trie.add_prefix(0x66, 0xfffa, i)
 667 |         trie.add_prefix(0x67, 0xfffb, i)
 668 |         trie.add_prefix(0xf0, 0xfffc, i)
 669 |         trie.add_prefix(0xf2, 0xfffd, i)
 670 |         trie.add_prefix(0xf3, 0xfffd, i)
 671 |         trie.add_prefix(0x64, 0xfff9, i)
 672 |         trie.add_prefix(0x65, 0xfff9, i)
 673 |         for seg in (0x26, 0x2e, 0x36, 0x3e):
 674 |             trie.add_prefix(seg, 0xfff8 + (mode <= 32), i)
 675 |         if mode > 32:
 676 |             for rex in range(0x40, 0x50):
 677 |                 trie.add_prefix(rex, 0xfffe, i)
 678 | 
 679 |     # pause is hardcoded together with XCHG_NOP.
 680 |     mnems, descs, desc_map = {"PAUSE"}, [], {}
 681 |     descs.append("{0}") # desc index zero is "invalid"
 682 |     for weak, opcode, desc in entries:
 683 |         ign66 = opcode.prefix in ("NP", "66", "F2", "F3")
 684 |         modrm = opcode.modrm != (None, None, None)
 685 |         mnem = {
 686 |             "PUSH_SEG": "PUSH", "POP_SEG": "POP",
 687 |             "MOV_CR2G": "MOV_CR", "MOV_G2CR": "MOV_CR",
 688 |             "MOV_DR2G": "MOV_DR", "MOV_G2DR": "MOV_DR",
 689 |             "MMX_MOVD_M2G": "MMX_MOVD", "MMX_MOVD_G2M": "MMX_MOVD",
 690 |             "MMX_MOVQ_M2G": "MMX_MOVQ", "MMX_MOVQ_G2M": "MMX_MOVQ",
 691 |             "SSE_MOVD_X2G": "SSE_MOVD", "SSE_MOVD_G2X": "SSE_MOVD",
 692 |             "SSE_MOVQ_X2G": "SSE_MOVQ", "SSE_MOVQ_G2X": "SSE_MOVQ",
 693 |             "VMOVD_X2G": "VMOVD", "VMOVD_G2X": "VMOVD",
 694 |             "VMOVQ_X2G": "VMOVQ", "VMOVQ_G2X": "VMOVQ",
 695 |         }.get(desc.mnemonic, desc.mnemonic)
 696 |         mnems.add(mnem)
 697 |         descenc = desc.encode(mnem, ign66, modrm)
 698 |         desc_idx = desc_map.get(descenc)
 699 |         if desc_idx is None:
 700 |             desc_idx = desc_map[descenc] = len(descs)
 701 |             descs.append(descenc)
 702 |         for i, mode in enumerate(modes):
 703 |             if "IO"[mode <= 32]+"64" not in desc.flags:
 704 |                 trie.add_opcode(opcode, desc_idx, i, weak)
 705 | 
 706 |     trie.deduplicate()
 707 |     table_data, root_offsets = trie.compile()
 708 | 
 709 |     mnems = sorted(mnems)
 710 |     decode_mnems_lines = [f"FD_MNEMONIC({m},{i})\n" for i, m in enumerate(mnems)]
 711 | 
 712 |     mnemonics_intel = [m.replace("SSE_", "").replace("MMX_", "")
 713 |                         .replace("EVX_", "V")
 714 |                         .replace("MOVABS", "MOV").replace("RESERVED_", "")
 715 |                         .replace("JMPF", "JMP FAR").replace("CALLF", "CALL FAR")
 716 |                         .replace("_S2G", "").replace("_G2S", "")
 717 |                         .replace("_X2G", "").replace("_G2X", "")
 718 |                         .replace("_CR", "").replace("_DR", "")
 719 |                         .replace("REP_", "REP ").replace("CMPXCHGD", "CMPXCHG")
 720 |                         .replace("JCXZ", "JCXZ JECXZJRCXZ")
 721 |                         .replace("C_SEP", "CWD CDQ CQO")
 722 |                         .replace("C_EX", "CBW CWDECDQE").replace("XCHG_NOP", "")
 723 |                         .lower() for m in mnems]
 724 |     mnemonics_str = superstring(mnemonics_intel)
 725 | 
 726 |     if args.stats:
 727 |         print(f"Decode stats: Descs -- {len(descs)} ({8*len(descs)} bytes); ",
 728 |               f"Trie -- {2*len(table_data)} bytes, {trie.stats}; "
 729 |               f"Mnems -- {len(mnemonics_str)} + {3*len(mnemonics_intel)} bytes")
 730 | 
 731 |     defines = ["FD_TABLE_OFFSET_%d %d\n"%k for k in zip(modes, root_offsets)]
 732 | 
 733 |     return "".join(decode_mnems_lines), f"""// Auto-generated file -- do not modify!
 734 | #if defined(FD_DECODE_TABLE_DATA)
 735 | {"".join(f"{e:#06x}," for e in table_data)}
 736 | #elif defined(FD_DECODE_TABLE_DESCS)
 737 | {",".join(descs)}
 738 | #elif defined(FD_DECODE_TABLE_STRTAB1)
 739 | "{mnemonics_str}"
 740 | #elif defined(FD_DECODE_TABLE_STRTAB2)
 741 | {",".join(str(mnemonics_str.index(mnem)) for mnem in mnemonics_intel)}
 742 | #elif defined(FD_DECODE_TABLE_STRTAB3)
 743 | {",".join(str(len(mnem)) for mnem in mnemonics_intel)}
 744 | #elif defined(FD_DECODE_TABLE_DEFINES)
 745 | {"".join("#define " + line for line in defines)}
 746 | #else
 747 | #error "unspecified decode table"
 748 | #endif
 749 | """
 750 | 
 751 | class EncodeVariant(NamedTuple):
 752 |     opcode: Opcode
 753 |     desc: InstrDesc
 754 |     evexbcst: bool = False
 755 |     evexmask: int = 0 # 0 = none, 1 = must have mask, 2 = mask + EVEX.z
 756 |     evexsae: int = 0 # 0 = no EVEX.b, 1 = EVEX.b, 2 = EVEX.b + L'L is rounding mode
 757 |     evexdisp8scale: int = 0 # EVEX disp8 shift
 758 |     downgrade: int = 0 # 0 = none, 1 = to VEX, 2 = to VEX flipping REXW
 759 | 
 760 | def encode_mnems(entries):
 761 |     # mapping from (mnem, opsize, ots) -> (opcode, desc)
 762 |     mnemonics = defaultdict(list)
 763 |     # Cannot have PAUSE in instrs.txt, because opcodes in without escape must
 764 |     # not have mandatory prefixes. For decode, this is hardcoded.
 765 |     mnemonics["PAUSE", 0, ""] = [EncodeVariant(Opcode.parse("F3.90"), InstrDesc.parse("NP - - - - NOP"))]
 766 |     for weak, opcode, desc in entries:
 767 |         if "I64" in desc.flags or desc.mnemonic[:9] == "RESERVED_":
 768 |             continue
 769 |         mnem_name = {"MOVABS": "MOV", "XCHG_NOP": "XCHG"}.get(desc.mnemonic, desc.mnemonic)
 770 |         mnem_name = mnem_name.replace("EVX_", "V")
 771 | 
 772 |         opsizes, vecsizes = {0}, {0}
 773 |         prepend_opsize, prepend_vecsize = False, False
 774 |         # Where to put the operand size in the mnemonic
 775 |         separate_opsize = "ENC_SEPSZ" in desc.flags
 776 | 
 777 |         if "ENC_NOSZ" in desc.flags or not desc.dynsizes():
 778 |             pass
 779 |         elif OpKind.SZ_OP in desc.dynsizes():
 780 |             if opcode.rexw is not None:
 781 |                 raise Exception(f"unexpected REXW specifier {desc}")
 782 |             opsizes = {8} if "SZ8" in desc.flags else {16, 32, 64}
 783 |             if opcode.prefix in ("NP", "66", "F2", "F3") and "U66" not in desc.flags:
 784 |                 opsizes -= {16}
 785 |             if "I66" in desc.flags:
 786 |                 opsizes -= {16}
 787 |             if "D64" in desc.flags:
 788 |                 opsizes -= {32}
 789 |             prepend_opsize = not separate_opsize
 790 |             if "F64" in desc.flags:
 791 |                 opsizes = {64}
 792 |                 prepend_opsize = False
 793 |         elif opcode.vex and opcode.vexl != "IG": # vectors; don't care for SSE
 794 |             vecsizes = {128, 256, 512} if opcode.vex == 2 else {128, 256}
 795 |             if opcode.vexl:
 796 |                 vecsizes = {128 << int(c) for c in opcode.vexl}
 797 |             prepend_vecsize = not separate_opsize
 798 | 
 799 |         # All encoding types; reg is r/k (mask); modrm is r/m/b (broadcast)
 800 |         optypes_base = []
 801 |         for i, opkind in enumerate(desc.operands):
 802 |             reg = "k" if opkind.kind == "MASK" else "r"
 803 |             opname = ENCODING_OPORDER[desc.encoding][i]
 804 |             if opname == "modrm":
 805 |                 modrm_type = (opcode.modrm[0] or "rm").replace("r", reg)
 806 |                 if opcode.extended or desc.mnemonic in ("MOV_CR2G", "MOV_DR2G", "MOV_G2CR", "MOV_G2DR"):
 807 |                     modrm_type = reg
 808 |                 if "BCST" in desc.flags:
 809 |                     modrm_type += "b"
 810 |                 optypes_base.append(modrm_type)
 811 |             elif opname == "modreg" or opname == "vexreg":
 812 |                 optypes_base.append(reg)
 813 |             else:
 814 |                 optypes_base.append(" iariioo"[ENCODINGS[desc.encoding].imm_control])
 815 |         optypes = ["".join(x) for x in product(*optypes_base)]
 816 | 
 817 |         prefixes = [("", "")]
 818 |         if "LOCK" in desc.flags:
 819 |             prefixes.append(("LOCK_", "LOCK"))
 820 |         if "ENC_REP" in desc.flags:
 821 |             prefixes.append(("REP_", "F3"))
 822 |         if "ENC_REPCC" in desc.flags:
 823 |             prefixes.append(("REPNZ_", "F2"))
 824 |             prefixes.append(("REPZ_", "F3"))
 825 | 
 826 |         evexmasks = [0]
 827 |         if "MASK" in desc.flags:
 828 |             if "VSIB" in desc.flags:
 829 |                 evexmasks = [1]
 830 |             else:
 831 |                 evexmasks.append(1)
 832 |                 if desc.operands[0].kind != "MASK":
 833 |                     evexmasks.append(2) # maskz only for non-mask destinations
 834 |         evexsaes = [0]
 835 |         if "SAE" in desc.flags:
 836 |             evexsaes.append(1)
 837 |         elif "ER" in desc.flags:
 838 |             evexsaes.append(2)
 839 | 
 840 |         keys = (opsizes, vecsizes, prefixes, optypes, evexmasks, evexsaes)
 841 |         for opsize, vecsize, prefix, ots, evexmask, evexsae in product(*keys):
 842 |             has_memory = "m" in ots or "b" in ots
 843 |             if prefix[1] == "LOCK" and ots[0] != "m":
 844 |                 continue
 845 |             if evexmask == 2 and ots[0] != "r":
 846 |                 continue # EVEX.z must be zero for memory destination
 847 |             if evexsae and (vecsize not in (0, 512) or has_memory):
 848 |                 continue # SAE/ER only works with 512 bit width and no memory
 849 | 
 850 |             spec_opcode = opcode
 851 |             if prefix[1]:
 852 |                 spec_opcode = spec_opcode._replace(prefix=prefix[1])
 853 |             if opsize == 64 and "D64" not in desc.flags and "F64" not in desc.flags:
 854 |                 spec_opcode = spec_opcode._replace(rexw="1")
 855 |             if vecsize == 512:
 856 |                 spec_opcode = spec_opcode._replace(vexl="2")
 857 |             if vecsize == 256:
 858 |                 spec_opcode = spec_opcode._replace(vexl="1")
 859 |             if vecsize == 128:
 860 |                 spec_opcode = spec_opcode._replace(vexl="0")
 861 |             if spec_opcode.vexl == "IG":
 862 |                 spec_opcode = spec_opcode._replace(vexl="0")
 863 |             if ENCODINGS[desc.encoding].modrm_idx:
 864 |                 modrm = ("m" if has_memory else "r",) + spec_opcode.modrm[1:]
 865 |                 spec_opcode = spec_opcode._replace(modrm=modrm)
 866 |             if ENCODINGS[desc.encoding].modrm or None not in opcode.modrm:
 867 |                 assert spec_opcode.modrm[0] in ("r", "m")
 868 | 
 869 |             evexbcst = "b" in ots
 870 |             evexdisp8scale = 0
 871 |             if spec_opcode.vex == 2 and has_memory:
 872 |                 if not evexbcst:
 873 |                     op = desc.operands[ENCODINGS[desc.encoding].modrm_idx^3]
 874 |                     size = op.abssize(opsize//8, vecsize//8)
 875 |                     evexdisp8scale = size.bit_length() - 1
 876 |                 elif "BCST16" in desc.flags:
 877 |                     evexdisp8scale = 1
 878 |                 else:
 879 |                     evexdisp8scale = 2 if spec_opcode.rexw != "1" else 3
 880 | 
 881 |             # Construct mnemonic name
 882 |             name = prefix[0] + mnem_name
 883 | 
 884 |             # Transform MOV_G2X/X2G into MOVD/MOVQ_G2X/X2G. This isn't done for
 885 |             # VEX for historical reasons and there's no reason to break
 886 |             # backwards compatibility. This enables EVEX->VEX fallback.
 887 |             if desc.mnemonic in ("EVX_MOV_G2X", "EVX_MOV_X2G"):
 888 |                 name = name[:-4] + "DQ"[opsize == 64] + name[-4:]
 889 |                 prepend_opsize, opsize = False, 0
 890 |             # For VMOVD with memory operand, there's no need to be explicit
 891 |             # about G2X/X2G, as there's no alternative. For VMOVQ, another
 892 |             # opcode exists, so keep G2X/X2G there for distinguishing.
 893 |             if name in ("VMOVD_G2X", "VMOVD_X2G") and has_memory:
 894 |                 name = name.replace("_G2X", "").replace("_X2G", "")
 895 |             # PEXTR/PBROADCAST/PINSR are stored without size suffix in the table
 896 |             # to avoid having different tables for 32/64 bit mode due to EVEX.W
 897 |             # being ignored in 32-bit mode. Add suffix here.
 898 |             if desc.mnemonic == "EVX_PEXTR":
 899 |                 name += " BW D   Q"[desc.operands[0].abssize(opsize//8, vecsize//8)]
 900 |                 prepend_opsize, opsize = False, 0
 901 |             if desc.mnemonic == "EVX_PBROADCAST":
 902 |                 name += " BW D   Q"[desc.operands[1].abssize(opsize//8, vecsize//8)]
 903 |                 name += "_GP"
 904 |                 prepend_opsize, opsize = False, 0
 905 |             if desc.mnemonic == "EVX_PINSR":
 906 |                 name += " BW D   Q"[desc.operands[2].abssize(opsize//8, vecsize//8)]
 907 |                 prepend_opsize, opsize = False, 0
 908 | 
 909 |             if prepend_opsize and not ("D64" in desc.flags and opsize == 64):
 910 |                 name += f"_{opsize}"[name[-1] not in "0123456789":]
 911 |             if prepend_vecsize:
 912 |                 name += f"_{vecsize}"[name[-1] not in "0123456789":]
 913 |             for ot, op in zip(ots, desc.operands):
 914 |                 name += ot.replace("o", "")
 915 |                 if separate_opsize:
 916 |                     name += f"{op.abssize(opsize//8, vecsize//8)*8}"
 917 |             if "VSIB" not in desc.flags:
 918 |                 # VSIB implies non-zero mask register, so suffix is not required
 919 |                 name += ["", "_mask", "_maskz"][evexmask]
 920 |             name += ["", "_sae", "_er"][evexsae]
 921 |             variant = EncodeVariant(spec_opcode, desc, evexbcst, evexmask, evexsae, evexdisp8scale)
 922 |             mnemonics[name, opsize, ots].append(variant)
 923 |             altname = {
 924 |                 "C_EX16": "CBW", "C_EX32": "CWDE", "C_EX64": "CDQE",
 925 |                 "C_SEP16": "CWD", "C_SEP32": "CDQ", "C_SEP64": "CQO",
 926 |                 "CMPXCHGD32m": "CMPXCHG8Bm", "CMPXCHGD64m": "CMPXCHG16Bm",
 927 |             }.get(name)
 928 |             if altname:
 929 |                 mnemonics[altname, opsize, ots].append(variant)
 930 | 
 931 |     for (mnem, opsize, ots), all_variants in mnemonics.items():
 932 |         dedup = OrderedDict()
 933 |         for i, variant in enumerate(all_variants):
 934 |             PRIO = ["O", "OA", "AO", "AM", "MA", "IA", "OI"]
 935 |             enc_prio = PRIO.index(variant.desc.encoding) if variant.desc.encoding in PRIO else len(PRIO)
 936 |             unique = 0 if variant.desc.encoding != "S" else i
 937 |             # Prefer VEX over EVEX for shorter encoding
 938 |             key = variant.desc.imm_size(opsize//8), variant.opcode.vex, enc_prio, unique
 939 |             if key not in dedup:
 940 |                 dedup[key] = variant
 941 |         variants = [dedup[k] for k in sorted(dedup.keys())]
 942 |         if len(variants) > 1 and any(v.opcode.vex for v in variants):
 943 |             # Case 1: VEX -> EVEX promotion (AVX-512, APX)
 944 |             # Case 2: legacy -> EVEX promotion (APX)
 945 |             # In any case, there should be exactly one EVEX opcode.
 946 |             if len(variants) != 2:
 947 |                 raise Exception(f"VEX/EVEX mnemonic with more than two encodings {mnem} {opcode}")
 948 |             if variants[0].opcode.vex == 2 or variants[1].opcode.vex != 2:
 949 |                 raise Exception(f"EVEX mnemonic not with non-EVEX pair {mnem} {opcode} {variants}")
 950 |             no_evex, evex = variants[0], variants[1]
 951 | 
 952 |             # Make sure that for promotions, only minor things vary.
 953 |             # REX.W is special, EVEX might mandate W1 while VEX mandates W0/WIG.
 954 |             # Technically ok: IG -> IG/IG -> 0/0 -> IG/0 -> 0/1 -> IG/1 -> 1
 955 |             # rexwdowngrade = (no_evex.opcode.rexw is None or
 956 |             #                  no_evex.opcode.rexw == evex.opcode.rexw)
 957 |             #
 958 |             # However, other encoders always use W0 in case of WIG for VEX, and
 959 |             # that's probably most beneficial... so:
 960 |             # Possible downgrades: IG -> IG/IG -> 0/0 -> IG/0 -> 0/1 -> 1
 961 |             # This affects quite a few instructions, so we use an extra bit to
 962 |             # flip EVEX.W to VEX.W.
 963 | 
 964 |             if (no_evex.opcode.prefix != evex.opcode.prefix or
 965 |                 no_evex.opcode.escape != evex.opcode.escape or
 966 |                 no_evex.opcode.opc != evex.opcode.opc or
 967 |                 # reg/mem doesn't matter, it's already fixed in the mnemonic
 968 |                 no_evex.opcode.modrm[1:] != evex.opcode.modrm[1:] or
 969 |                 no_evex.opcode.vexl != evex.opcode.vexl or
 970 |                 # we don't check rexw_flip here, we can always handle it
 971 |                 no_evex.desc.encoding != evex.desc.encoding or
 972 |                 no_evex.desc.operands != evex.desc.operands):
 973 |                 print(mnem, no_evex)
 974 |                 print(mnem, evex)
 975 |                 # Should not happen.
 976 |                 raise Exception("cannot downgrade EVEX?")
 977 |             else:
 978 |                 rexw_flip = (no_evex.opcode.rexw == "1") != (evex.opcode.rexw == "1")
 979 |                 variants = [evex._replace(downgrade=1 if not rexw_flip else 2)]
 980 |         mnemonics[mnem, opsize, ots] = variants
 981 | 
 982 |     return dict(mnemonics)
 983 | 
 984 | def encode_table(entries, args):
 985 |     mnemonics = encode_mnems(entries)
 986 |     mnemonics["NOP", 0, ""] = [EncodeVariant(Opcode.parse("90"), InstrDesc.parse("NP - - - - NOP"))]
 987 |     mnem_map = {}
 988 |     alt_table = [0] # first entry is unused
 989 |     for (mnem, opsize, ots), variants in mnemonics.items():
 990 |         supports_high_regs = []
 991 |         if variants[0][1].mnemonic in ("MOVSX", "MOVZX") or opsize == 8:
 992 |             # Should be the same for all variants
 993 |             desc = variants[0][1]
 994 |             for i, (ot, op) in enumerate(zip(ots, desc.operands)):
 995 |                 if ot == "r" and op.kind == "GP" and op.abssize(opsize//8) == 1:
 996 |                     supports_high_regs.append(i)
 997 | 
 998 |         alt_indices = [i + len(alt_table) for i in range(len(variants) - 1)] + [0]
 999 |         enc_opcs = []
1000 |         for alt, variant in zip(alt_indices, variants):
1001 |             opcode, desc = variant.opcode, variant.desc
1002 |             encoding = ENCODINGS[desc.encoding]
1003 |             opc_i = opcode.opc
1004 |             if None not in opcode.modrm:
1005 |                 opc_i |= 0xc000 | opcode.modrm[1] << 11 | opcode.modrm[2] << 8
1006 |             elif opcode.modrm[1] is not None:
1007 |                 opc_i |= opcode.modrm[1] << 8
1008 |             if opcode.modrm == ("m", None, 4):
1009 |                 opc_i |= 0x2000000000 # FORCE_SIB
1010 |             if not opcode.vex:
1011 |                 assert opcode.escape < 4
1012 |                 opc_i |= opcode.escape * 0x10000
1013 |                 opc_i |= 0x80000 if opcode.prefix == "66" or opsize == 16 else 0
1014 |                 opc_i |= 0x100000 if opcode.prefix == "F2" else 0
1015 |                 opc_i |= 0x200000 if opcode.prefix == "F3" else 0
1016 |             else:
1017 |                 assert opcode.escape < 8
1018 |                 opc_i |= opcode.escape * 0x10000
1019 |                 if opcode.prefix == "66" or opsize == 16:
1020 |                     assert opcode.prefix not in ("F2", "F3")
1021 |                     opc_i |= 0x100000
1022 |                 if opcode.prefix == "F3":
1023 |                     opc_i |= 0x200000
1024 |                 elif opcode.prefix == "F2":
1025 |                     opc_i |= 0x300000
1026 |             opc_i |= 0x400000 if opcode.rexw == "1" else 0
1027 |             if opcode.prefix == "LOCK":
1028 |                 opc_i |= 0x800000
1029 |             elif opcode.vex == 1:
1030 |                 opc_i |= 0x1000000 + 0x800000 * int(opcode.vexl or 0)
1031 |             elif opcode.vex == 2:
1032 |                 opc_i |= 0x2000000
1033 |                 # L'L encodes SAE rounding mode otherwise
1034 |                 if not variant.evexsae:
1035 |                     opc_i |= 0x800000 * int(opcode.vexl or 0)
1036 |             assert not (variant.evexsae and variant.evexbcst)
1037 |             opc_i |= 0x4000000 if variant.evexsae or variant.evexbcst else 0
1038 |             opc_i |= 0x8000000 if "VSIB" in desc.flags else 0
1039 |             opc_i |= 0x1000000000 if variant.evexmask == 2 else 0
1040 |             opc_i |= 0x4000000000 if variant.downgrade in (1, 2) else 0
1041 |             opc_i |= 0x40000000000 if variant.downgrade == 2 else 0
1042 |             opc_i |= 0x8000000000 * variant.evexdisp8scale
1043 |             if alt >= 0x100:
1044 |                 raise Exception("encode alternate bits exhausted")
1045 |             opc_i |= sum(1 << i for i in supports_high_regs) << 45
1046 |             if encoding.imm_control >= 3:
1047 |                 opc_i |= desc.imm_size(opsize//8) << 47
1048 |             elif encoding.imm_control in (1, 2):
1049 |                 # Must be an arbitrary non-zero value, replaced by address size
1050 |                 # for imm_ctl=2 and zero for imm_ctl=1 (constant 1).
1051 |                 opc_i |= 1 << 47
1052 | 
1053 |             enc_encoding = desc.encoding
1054 |             if desc.encoding != "I" and desc.encoding.endswith("I"):
1055 |                 enc_encoding = desc.encoding[:-1]
1056 |             elif desc.encoding == "IA":
1057 |                 enc_encoding = "A"
1058 |             opc_i |= ["NP", "M", "R", "M1", "MC", "MR", "RM", "RMA", "MRC",
1059 |                 "AM", "MA", "I", "O", "OA", "S", "A", "D", "FD", "TD", "IM",
1060 |                 "RVM", "RVMR", "RMV", "VM", "MVR", "MRV",
1061 |             ].index(enc_encoding) << 51
1062 |             opc_i |= alt << 56
1063 |             enc_opcs.append(opc_i)
1064 |         mnem_map[f"FE_{mnem}"] = enc_opcs[0]
1065 |         alt_table += enc_opcs[1:]
1066 | 
1067 |     mnem_tab = "".join(f"#define {m} {v:#x}\n" for m, v in mnem_map.items())
1068 |     alt_tab = "".join(f"[{i}] = {v:#x},\n" for i, v in enumerate(alt_table))
1069 |     return mnem_tab, alt_tab
1070 | 
1071 | def unique(it):
1072 |     vals = set(it)
1073 |     if len(vals) != 1:
1074 |         raise Exception(f"multiple values: {vals}")
1075 |     return next(iter(vals))
1076 | 
1077 | def encode2_gen_legacy(variant: EncodeVariant, opsize: int, supports_high_regs: list[int], imm_expr: str, imm_size_expr: str, has_idx: bool) -> str:
1078 |     opcode = variant.opcode
1079 |     desc = variant.desc
1080 |     flags = ENCODINGS[variant.desc.encoding]
1081 |     code = ""
1082 | 
1083 |     rex_expr = "0" if opcode.rexw != "1" else "0x48"
1084 |     for i in supports_high_regs:
1085 |         rex_expr += f"|(op_reg_idx(op{i}) >= 4 && op_reg_idx(op{i}) <= 15?0x40:0)"
1086 |     if flags.modrm_idx:
1087 |         if opcode.modrm[0] == "m":
1088 |             rex_expr += f"|(op_mem_base(op{flags.modrm_idx^3})&8?0x41:0)"
1089 |             rex_expr += f"|(op_mem_idx(op{flags.modrm_idx^3})&8?0x42:0)"
1090 |         elif desc.operands[flags.modrm_idx^3].kind in ("GP", "XMM"):
1091 |             rex_expr += f"|(op_reg_idx(op{flags.modrm_idx^3})&8?0x41:0)"
1092 |         if flags.modreg_idx:
1093 |             if desc.operands[flags.modreg_idx^3].kind in ("GP", "XMM", "CR", "DR"):
1094 |                 rex_expr += f"|(op_reg_idx(op{flags.modreg_idx^3})&8?0x44:0)"
1095 |     elif flags.modreg_idx: # O encoding
1096 |         if desc.operands[flags.modreg_idx^3].kind in ("GP", "XMM"):
1097 |             rex_expr += f"|(op_reg_idx(op{flags.modreg_idx^3})&8?0x41:0)"
1098 | 
1099 |     if rex_expr != "0":
1100 |         code += f"  unsigned rex = {rex_expr};\n"
1101 |     for i in supports_high_regs:
1102 |         code += f"  if (rex && op_reg_gph(op{i})) return 0;\n"
1103 | 
1104 |     if not has_idx:
1105 |         code += "  unsigned idx = 0;\n"
1106 |     if opcode.prefix == "LOCK":
1107 |         code += f"  buf[idx++] = 0xF0;\n"
1108 |     if opsize == 16 or opcode.prefix == "66":
1109 |         code += "  buf[idx++] = 0x66;\n"
1110 |     if opcode.prefix in ("F2", "F3"):
1111 |         code += f"  buf[idx++] = 0x{opcode.prefix};\n"
1112 |     if opcode.rexw == "1":
1113 |         code += f"  buf[idx++] = rex;\n"
1114 |     elif rex_expr != "0":
1115 |         code += f"  if (rex) buf[idx++] = rex;\n"
1116 |     if opcode.escape:
1117 |         code += f"  buf[idx++] = 0x0F;\n"
1118 |         if opcode.escape == 2:
1119 |             code += f"  buf[idx++] = 0x38;\n"
1120 |         elif opcode.escape == 3:
1121 |             code += f"  buf[idx++] = 0x3A;\n"
1122 |     code += f"  buf[idx++] = {opcode.opc:#x};\n"
1123 |     if None not in opcode.modrm:
1124 |         opcext = 0xc0 | opcode.modrm[1] << 3 | opcode.modrm[2]
1125 |         code += f"  buf[idx++] = {opcext:#x};\n"
1126 | 
1127 |     if flags.modrm:
1128 |         if flags.modreg_idx:
1129 |             modreg = f"op_reg_idx(op{flags.modreg_idx^3})"
1130 |         else:
1131 |             modreg = opcode.modrm[1] or 0
1132 |         if opcode.modrm[0] == "m":
1133 |             assert "VSIB" not in desc.flags
1134 |             assert opcode.modrm[2] is None
1135 |             modrm = f"op{flags.modrm_idx^3}"
1136 |             code += f"  unsigned memoff = enc_mem(buf+idx, idx+{imm_size_expr}, {modrm}, {modreg}, 0, 0);\n"
1137 |             code += f"  if (!memoff) return 0;\n  idx += memoff;\n"
1138 |         else:
1139 |             if flags.modrm_idx:
1140 |                 modrm = f"op_reg_idx(op{flags.modrm_idx^3})"
1141 |             else:
1142 |                 modrm = f"{opcode.modrm[2] or 0}"
1143 |             code += f"  buf[idx++] = 0xC0|({modreg}<<3)|({modrm}&7);\n"
1144 |     elif flags.modrm_idx:
1145 |         code += f"  buf[idx-1] |= op_reg_idx(op{flags.modrm_idx^3}) & 7;\n"
1146 |     if flags.imm_control >= 2:
1147 |         if flags.imm_control == 6:
1148 |             imm_expr += " - idx"
1149 |         code += f"  enc_imm(buf+idx, {imm_expr}, {imm_size_expr});\n"
1150 |         code += f"  return idx + {imm_size_expr};\n"
1151 |     else:
1152 |         code += f"  return idx;\n"
1153 |     return code
1154 | 
1155 | def encode2_gen_vex(variant: EncodeVariant, imm_expr: str, imm_size_expr: str, has_idx: bool) -> str:
1156 |     opcode = variant.opcode
1157 |     flags = ENCODINGS[variant.desc.encoding]
1158 |     code = ""
1159 | 
1160 |     helperopc = opcode.opc << 16
1161 |     helperopc |= ["NP", "66", "F3", "F2"].index(opcode.prefix) << 8
1162 |     helperopc |= 0x8000 if opcode.rexw == "1" else 0
1163 |     if not variant.evexsae:
1164 |         # ER: L'L encodes rounding mode for SAE
1165 |         helperopc |= 0x0020 * int(opcode.vexl or 0) # EVEX.L'L
1166 |     helperopc |= opcode.escape << 10
1167 |     helperopc |= 0x10 if variant.evexsae or variant.evexbcst else 0 # EVEX.b
1168 |     helperopc |= 0x80 if variant.evexmask == 2 else 0 # EVEX.z
1169 |     helperopc |= 0x1000000 if variant.downgrade in (1, 2) else 0
1170 |     helperopc |= 0x2000000 if variant.downgrade == 2 else 0
1171 |     helperopc = f"{helperopc:#x}"
1172 |     if variant.evexsae == 2:
1173 |         helperopc += "|(flags&FE_RC_MASK)"
1174 |     if variant.evexmask:
1175 |         code += "  if (!op_reg_idx(opmask)) return 0;\n"
1176 |         helperopc += "|(op_reg_idx(opmask)&7)"
1177 | 
1178 |     if flags.modreg_idx:
1179 |         modreg = f"op_reg_idx(op{flags.modreg_idx^3})"
1180 |     else:
1181 |         modreg = opcode.modrm[1] or 0
1182 |     vexop = f"op_reg_idx(op{flags.vexreg_idx^3})" if flags.vexreg_idx else 0
1183 |     if not flags.modrm and opcode.modrm == (None, None, None):
1184 |         # No ModRM, prefix only (VZEROUPPER/VZEROALL)
1185 |         assert opcode.vex == 1
1186 |         helperfn, helperargs = "enc_vex_common", f"0, 0, 0, 0"
1187 |     elif opcode.modrm[0] == "m":
1188 |         vsib = "VSIB" in variant.desc.flags
1189 |         helperfn = "enc" + ["", "_vex", "_evex"][opcode.vex] + ["_mem", "_vsib"][vsib]
1190 |         assert opcode.modrm[2] in (None, 4)
1191 |         forcesib = 1 if opcode.modrm[2] == 4 else 0 # AMX
1192 |         modrm = f"op{flags.modrm_idx^3}"
1193 |         ripoff = imm_size_expr + ("" if not has_idx else "+idx")
1194 |         helperargs = (f"{modrm}, {modreg}, {vexop}, {ripoff}, " +
1195 |                       f"{forcesib}, {variant.evexdisp8scale}")
1196 |     else:
1197 |         if flags.modrm_idx:
1198 |             modrm = f"op_reg_idx(op{flags.modrm_idx^3})"
1199 |         else:
1200 |             modrm = f"{opcode.modrm[2] or 0}"
1201 |         suffix = "_reg"
1202 |         if (opcode.vex == 2 and flags.modrm_idx and
1203 |             variant.desc.operands[flags.modrm_idx^3].kind == "XMM"):
1204 |             suffix = "_xmm"
1205 |         helperfn = "enc" + ["", "_vex", "_evex"][opcode.vex] + suffix
1206 |         helperargs = f"{modrm}, {modreg}, {vexop}"
1207 |     bufidx = "buf" if not has_idx else "buf+idx"
1208 |     helpercall = f"{helperfn}({bufidx}, {helperopc}, {helperargs})"
1209 |     if flags.imm_control >= 2:
1210 |         assert flags.imm_control < 6, "jmp with VEX/EVEX?"
1211 |         code += f"  unsigned vexoff = {helpercall};\n"
1212 |         code += f"  enc_imm({bufidx}+vexoff, {imm_expr}, {imm_size_expr});\n"
1213 |         code += f"  return vexoff ? vexoff+{imm_size_expr}{'+idx' if has_idx else ''} : 0;\n"
1214 |     elif has_idx:
1215 |         code += f"  unsigned vexoff = {helpercall};\n"
1216 |         code += f"  return vexoff ? vexoff+idx : 0;\n"
1217 |     else:
1218 |         code += f"  return {helpercall};\n"
1219 |     return code
1220 | 
1221 | def encode2_table(entries, args):
1222 |     mnemonics = encode_mnems(entries)
1223 | 
1224 |     enc_decls, enc_code = "", ""
1225 |     for (mnem, opsize, ots), variants in mnemonics.items():
1226 |         max_imm_size = max(v.desc.imm_size(opsize//8) for v in variants)
1227 | 
1228 |         supports_high_regs = []
1229 |         if variants[0].desc.mnemonic in ("MOVSX", "MOVZX") or opsize == 8:
1230 |             # Should be the same for all variants
1231 |             for i, (ot, op) in enumerate(zip(ots, variants[0].desc.operands)):
1232 |                 if ot == "r" and op.kind == "GP" and op.abssize(opsize//8) == 1:
1233 |                     supports_high_regs.append(i)
1234 |         supports_vsib = unique("VSIB" in v.desc.flags for v in variants)
1235 |         opkinds = unique(tuple(op.kind for op in v.desc.operands) for v in variants)
1236 |         evexmask = unique(v.evexmask for v in variants)
1237 |         evexsae = unique(v.evexsae for v in variants)
1238 | 
1239 |         OPKIND_LUT = {"FPU": "ST", "SEG": "SREG", "MMX": "MM"}
1240 |         reg_tys = [OPKIND_LUT.get(opkind, opkind) for opkind in opkinds]
1241 | 
1242 |         fnname = f"fe64_{mnem}"
1243 |         op_tys = [{
1244 |             "i": f"int{max_imm_size*8 if max_imm_size != 3 else 32}_t",
1245 |             "a": "uintptr_t",
1246 |             "r": f"FeReg{reg_ty if i not in supports_high_regs else 'GPLH'}",
1247 |             "k": "FeRegMASK",
1248 |             "m": "FeMem" if not supports_vsib else "FeMemV",
1249 |             "b": "FeMem",
1250 |             "o": "const void*",
1251 |         }[ot] for i, (ot, reg_ty) in enumerate(zip(ots, reg_tys))]
1252 |         fn_opargs = ", FeRegMASK opmask" if evexmask else ""
1253 |         fn_opargs += "".join(f", {ty} op{i}" for i, ty in enumerate(op_tys))
1254 |         fn_sig = f"unsigned ({fnname})(uint8_t* buf, int flags{fn_opargs})"
1255 |         enc_decls += f"{fn_sig};\n"
1256 |         if supports_high_regs:
1257 |             enc_decls += f"#define fe64_{mnem}(buf, flags"
1258 |             enc_decls += "".join(f", op{i}" for i in range(len(op_tys)))
1259 |             enc_decls += f") {fnname}(buf, flags"
1260 |             enc_decls += "".join(f", FE_MAKE_GPLH(op{i})" if i in supports_high_regs else f", op{i}" for i in range(len(op_tys)))
1261 |             enc_decls += f")\n"
1262 | 
1263 |         code = f"{fn_sig} {{\n"
1264 | 
1265 |         has_memory = unique(v.opcode.modrm[0] == "m" for v in variants)
1266 |         has_useg = unique("USEG" in v.desc.flags for v in variants)
1267 |         has_u67 = unique("U67" in v.desc.flags for v in variants)
1268 |         if has_memory or has_useg:
1269 |             # segment override without addrsize override shouldn't happen
1270 |             assert has_memory or has_u67
1271 |             code += f"  unsigned idx = UNLIKELY(flags & (FE_SEG_MASK|FE_ADDR32)) ? enc_seg67(buf, flags) : 0;\n"
1272 |         elif has_u67:
1273 |             # STOS, SCAS, JCXZ, LOOP, LOOPcc
1274 |             code += f"  unsigned idx = UNLIKELY(flags & FE_ADDR32) ? (*buf=0x67, 1) : 0;\n"
1275 |         else:
1276 |             code += "  (void) flags;\n"
1277 | 
1278 |         # indicate whether an idx variable exists
1279 |         has_idx = has_memory or has_useg or has_u67
1280 | 
1281 |         for i, variant in enumerate(variants):
1282 |             opcode, desc = variant.opcode, variant.desc
1283 |             flags = ENCODINGS[desc.encoding]
1284 | 
1285 |             conds = []
1286 |             # Select usable encoding.
1287 |             if desc.encoding == "S":
1288 |                 # Segment encoding is weird.
1289 |                 conds.append(f"op_reg_idx(op0)=={(opcode.opc>>3)&0x7:#x}")
1290 |             if desc.mnemonic == "XCHG_NOP" and opsize == 32:
1291 |                 # XCHG eax, eax must not be encoded as 90 -- that'd be NOP.
1292 |                 conds.append(f"!(op_reg_idx(op0)==0&&op_reg_idx(op1)==0)")
1293 |             if flags.vexreg_idx and not opcode.vex: # vexreg w/o vex is zeroreg
1294 |                 conds.append(f"op_reg_idx(op{flags.vexreg_idx^3})=={flags.zeroreg_val}")
1295 | 
1296 |             imm_size = desc.imm_size(opsize//8)
1297 |             imm_size_expr = f"{imm_size}"
1298 |             imm_expr = f"(int64_t) op{flags.imm_idx^3}"
1299 |             if flags.imm_control == 1:
1300 |                 conds.append(f"op{flags.imm_idx^3} == 1")
1301 |             elif flags.imm_control == 2:
1302 |                 imm_size_expr = "(flags & FE_ADDR32 ? 4 : 8)"
1303 |                 imm_expr = f"(int64_t) (flags & FE_ADDR32 ? (int32_t) {imm_expr} : {imm_expr})"
1304 |             elif flags.imm_control == 3:
1305 |                 imm_expr = f"op_reg_idx(op{flags.imm_idx^3}) << 4"
1306 |                 code += f"  if (op_reg_idx(op{flags.imm_idx^3}) >= 16) return 0;\n"
1307 |             elif flags.imm_control == 4 and imm_size == 3: # ENTER
1308 |                 code += f"  if ((uint32_t) op{flags.imm_idx^3} >= 0x1000000) return 0;\n"
1309 |             elif flags.imm_control == 4 and imm_size < max_imm_size:
1310 |                 conds.append(f"op_imm_n({imm_expr}, {imm_size})")
1311 |             elif flags.imm_control == 6:
1312 |                 imm_expr = f"{imm_expr} - (int64_t) buf - {imm_size}"
1313 |                 if i != len(variants) - 1: # only Jcc+JMP
1314 |                     conds.append(f"!(flags & FE_JMPL)")
1315 |                     # assume one-byte opcode without escape/prefixes
1316 |                     conds.append(f"op_imm_n({imm_expr}-1, {imm_size})")
1317 | 
1318 |             if conds:
1319 |                 code += f"  if ({'&&'.join(conds)}) {{\n"
1320 | 
1321 |             if opcode.vex:
1322 |                 code += encode2_gen_vex(variant, imm_expr, imm_size_expr, has_idx)
1323 |             else:
1324 |                 code += encode2_gen_legacy(variant, opsize, supports_high_regs, imm_expr, imm_size_expr, has_idx)
1325 | 
1326 |             if conds:
1327 |                 code += "  }\n"
1328 |             else:
1329 |                 break
1330 |         else:
1331 |             code += "  return 0;\n"
1332 | 
1333 |         enc_code += code + "}\n"
1334 | 
1335 |     return enc_decls, enc_code
1336 | 
1337 | 
1338 | if __name__ == "__main__":
1339 |     generators = {
1340 |         "decode": decode_table,
1341 |         "encode": encode_table,
1342 |         "encode2": encode2_table,
1343 |     }
1344 | 
1345 |     parser = argparse.ArgumentParser()
1346 |     parser.add_argument("--32", dest="modes", action="append_const", const=32)
1347 |     parser.add_argument("--64", dest="modes", action="append_const", const=64)
1348 |     parser.add_argument("--with-undoc", action="store_true")
1349 |     parser.add_argument("--stats", action="store_true")
1350 |     parser.add_argument("mode", choices=generators.keys())
1351 |     parser.add_argument("table", type=argparse.FileType('r'))
1352 |     parser.add_argument("out_public", type=argparse.FileType('w'))
1353 |     parser.add_argument("out_private", type=argparse.FileType('w'))
1354 |     args = parser.parse_args()
1355 | 
1356 |     entries = []
1357 |     for line in args.table.read().splitlines():
1358 |         if not line or line[0] == "#": continue
1359 |         line, weak = (line, False) if line[0] != "*" else (line[1:], True)
1360 |         opcode_string, desc_string = tuple(line.split(maxsplit=1))
1361 |         opcode, desc = Opcode.parse(opcode_string), InstrDesc.parse(desc_string)
1362 |         verifyOpcodeDesc(opcode, desc)
1363 |         if "UNDOC" not in desc.flags or args.with_undoc:
1364 |             entries.append((weak, opcode, desc))
1365 | 
1366 |     res_public, res_private = generators[args.mode](entries, args)
1367 |     args.out_public.write(res_public)
1368 |     args.out_private.write(res_private)
1369 | 


--------------------------------------------------------------------------------