├── .gitignore ├── CMakeLists.txt ├── LICENSE ├── README.md ├── src ├── main.c ├── queue.c ├── queue.h ├── utils │ ├── function_length.c │ ├── function_length.h │ ├── test_functions.c │ └── test_functions.h ├── vector.c ├── vector.h ├── x64id.c └── x64id.h └── test ├── CMakeLists.txt ├── asm_sources.tar.xz ├── main_test.c ├── x64_op1.bin ├── x64_op2.bin ├── x64_op38.bin ├── x64_op3a.bin ├── x86_op1.bin ├── x86_op2.bin ├── x86_op2_vex.bin ├── x86_op38.bin └── x86_op3a.bin /.gitignore: -------------------------------------------------------------------------------- 1 | cmake-build-debug/ 2 | cmake-build-release/ 3 | cmake-build-release-visual-studio 4 | .idea/ 5 | bin/ 6 | Makefile 7 | cmake_install.cmake 8 | CMakeFiles/ 9 | CMakeCache.txt 10 | *.obj 11 | *.exe 12 | *.json 13 | .fleet/ 14 | .vs/ 15 | out/ 16 | .cache/ 17 | x64id 18 | x64id_test -------------------------------------------------------------------------------- /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | project(x64id C) 3 | 4 | set(CMAKE_C_STANDARD 11) 5 | set(CMAKE_C_FLAGS "-O2") 6 | 7 | set(COMMONS src/x64id.c src/utils/function_length.c src/vector.c src/queue.c src/utils/test_functions.c) 8 | set(SOURCES src/main.c) 9 | 10 | add_executable(x64id ${COMMONS} ${SOURCES}) 11 | 12 | include(test/CMakeLists.txt) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [2020] [Marco 'DispatchCode' Crivellari] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) 2 | 3 | # x64ID ~ x64 Instruction Decoder 4 | 5 | A x86/x64 machine code decoder. It is useful to get instructions' length and identify each of its fields. 6 | 7 | Here some scenarios where x64ID can be used: 8 | 9 | - write your disassembler from scratch 10 | - as a base for a VM protection [[1]](#user-content-res1). 11 | - reverse engineering scenarios 12 | - swapping instructions with others (eg. substitution like `MOV EAX, 0` with `XOR EAX, EAX`) 13 | - get mnemonic rapresentation (*currently not implemented*) 14 | - others (as ideas will come to mind...) 15 | 16 | ___ 17 | 18 | - [x64ID ~ x64 Instruction Decoder](#x64id--machine-code-analyzer) 19 | * [Supported architectures and features](#supported-architectures-and-features) 20 | + [Features on development](#features-on-development) 21 | * [API](#api) 22 | + [Instruction struct](#instruction-struct) 23 | - [`REX` union](#rex-union) 24 | - [`ModRm` union](#modrm-union) 25 | - [`SIB` union](#sib-union) 26 | - [`vex_info` struct](#vex_info-struct) 27 | * [Examples](#examples) 28 | - [A practical example: sum of two vectors using SIMD instruction](#a-practical-example-sum-of-two-vectors-using-simd-instruction) 29 | - [Another example: architecture x64, VEX prefix with YMM register](#another-example-architecture-x64-vex-prefix-with-ymm-register) 30 | * [Enabling / Disabling features](#enabling--disabling-features) 31 | * [Function Length detection](#find-function-length) 🌟*New❗*🌟 32 | * [Tests](#tests) 33 | * [Useful resources](#useful-resources) 34 | * [Notes](#notes) 35 | 36 | ## Supported architectures and features 37 | 38 | **Architectures**: 39 | 40 | ✅ x86
41 | ✅ x64
42 | 43 | **Opcodes**: 44 | 45 | ✅ 1-byte OPs
46 | ✅ 2-byte OPs
47 | ✅ 3-byte OPs, 0x38 and 0x3A
48 | 49 | **Fields**: 50 | 51 | ✅ prefixes
52 | ✅ VEX prefix (0xC4, 0xC5)
53 | ✅ ModRm
54 | ✅ REX prefix
55 | ✅ SIB
56 | ✅ Imm
57 | ✅ Disp
58 | ❌ XOP prefix
59 | 60 | **Instruction Set**: 61 | 62 | ✅ x86 & x64
63 | ✅ SIMD extension
64 | ✅ AVX extension
65 | ❌ AVX-512 (EVEX prefix)
66 | ❌ 3DNow!
67 | 68 | ### Features on development 69 | 70 | 🎯 XOP support
71 | 🎯 AVX-512 (EVEX prefix)
72 | 🎯 Machine code to assembly mnemonics
73 | 🎯 Others (as ideas will come to mind...) 74 | 75 | ## API 76 | 77 | x64ID exposes only one function and some structs to complete its goal: 78 | 79 | ```C 80 | int x64id_decode(struct instruction *instr, enum supported_architecture arch, char *data_src, int offset); 81 | ``` 82 | 83 | | Parameter | Type | Explanation | Required | 84 | |----------------|:--------:|-------------|:--------:| 85 | | `instr` | `struct instruction` | A reference to the struct that will contain the analysis result. See [below](#instruction-struct) for more informations. | YES | 86 | | `arch` | `enum supported_architecture` | The achitecture type. Use `1` for `x86` and `2` for `x64` | YES | 87 | | `data_source` | `char*` | A data buffer with the data to be analyzed | YES | 88 | | `offset` | `int` | An offset to be added to the starting address of `data_buffer` | NO | 89 | 90 | **Return**: 91 | 92 | `x64id_decode` returns the length of the decoded instruction. Its value can also be accessed from `instr.length`. 93 | 94 | > :information_source: **Notes** 95 | > Internally, x64ID does not use dynamic allocation to avoid overhead. 96 | 97 | ___ 98 | 99 | ### Instruction struct 100 | 101 | Here below how you can use the struct. More infos and the other structs can be found in the [header file](https://github.com/DispatchCode/Machine-Code-Analyzer/blob/master/src/x64id.h#L355). 102 | 103 | | Field Name | Type | Description | 104 | |-------------------|:-----------------:|-------------| 105 | | `prefixes` | `uint8_t[4]` | Store the prefixes of the instrucion, like Segment Override, Address Size and 2 and 3-byte escape opcodes (0x0f, 0x38, 0x3A) | 106 | | `rex` | `union` | Union of five fields: `value`, `rex_b`, `rex_r`, `rex_x`, `rex_w` (see [below](#rex-union)). | 107 | | `op` | `uint8_t` | The opcode of the instruction | 108 | | `modrm` | `union` | Union of four fields: `value`, `rm`, `reg` and `mod` (see [below](#modrm-union)). | 109 | | `disp` | `uint64_t` | Displacement field | 110 | | `imm` | `uint64_t` | Immediate value (a number) | 111 | | `label` | `uint32_t` | Address of Jcc/JMP, if present | 112 | | `_vex` | `struct vex_info` | Available only if [`_ENABLED_VEX_INFO`](#enabling--disabling-features) is defined. Described [below](#vex_info-struct). | 113 | | `instr` | `uint8_t[15]` | Available only if [`_ENABLE_RAW_BYTES`](#enabling--disabling-features) is defined. | 114 | | `sib` | `union` | Union of four fields: `value`, `base`, `index` and `scaled` (see [below](#sib-union)). | 115 | | `vex` | `uint8_t[3]` | 0xC4 or 0xC5 followed by 1 or 2 bytes | 116 | | `length` | `int` | The instruction length (in bytes) | 117 | | `disp_len` | `int` | The displacement size (in bytes) | 118 | | `imm_len` | `int` | The imm size | 119 | | `vex_cnt` | `int8_t` | Count how many VEX prefixes are available | 120 | | `prefix_cnt` | `int8_t` | Count how many prefixes are available | 121 | | `set_prefix` | `uint16_t` | A field against which is possible to check if a determined prefix (belonging to `prefixes` enum) is present. | 122 | | `set_field` | `uint16_t` | A field against which is possible to check if a determined feature (belonging to `instruction_feature` enum) is available (e.g. FPU, SIB, DISP,...) | 123 | | `jcc_type` | `uint8_t` | The type of jump: Jcc or JMP with 1 or 2-bytes (refer to jmp_type enum) 124 | 125 | ___ 126 | 127 | #### `REX` union 128 | 129 | | Field Name | Type | Description | 130 | |-------------------|:----------:|-------------| 131 | | `rex.value` | `uint8_t` | The `rex` prefix if present (x64 only) | 132 | | `rex.bits.rex_b` | `uint8_t` | `rex_b` field | 133 | | `rex.bits.rex_x` | `uint8_t` | `rex_x` field | 134 | | `rex.bits.rex_r` | `uint8_t` | `rex_r` field | 135 | | `rex.bits.rex_w` | `uint8_t` | `rex_w` field | 136 | 137 | For more information on REX prefix, refer to section *2.2.1 REX Prefixes* of the Intel Developer Manual Vol.2 [[2]](#user-content-res2). 138 | 139 | ___ 140 | 141 | #### `ModRm` union 142 | 143 | | Field Name | Type | Description | 144 | |-------------------|:----------:|-------------| 145 | | `modrm.value` | `uint8_t` | The ModRm value | 146 | | `modrm.bits.rm` | `uint8_t` | The `rm` part of ModRm | 147 | | `modrm.bits.reg` | `uint8_t` | The `reg` part of ModRm | 148 | | `modrm.bits.mod` | `uint8_t` | The `mod` part of ModRm. When mod=11b source and destination are registers, otherwise one of the operands involves memory access (displacement field) | 149 | 150 | More information on ModRm field can be found at the section *2.1.3 ModR/M and SIB Bytes* of the Intel Developer Manual Vol.2 [[2]](#user-content-res2). 151 | 152 | ___ 153 | 154 | #### `SIB` union 155 | 156 | | Field Name | Type | Description | 157 | |-------------------|:----------:|-------------| 158 | | `sib.value` | `uint8_t` | If present, is the Scaled Index Base | 159 | | `sib.bits.base` | `uint8_t` | `base` field | 160 | | `sib.bits.index` | `uint8_t` | `index` field | 161 | | `sib.bits.scaled` | `uint8_t` | `scaled` field | 162 | 163 | For more information refer to section *2.1.5 Addressing-Mode Encoding of ModR/M and SIB Bytes* of the Intel Developer Manual Vol.2 [[2]](#user-content-res2). 164 | 165 | ___ 166 | 167 | #### `vex_info` struct 168 | 169 | | Field Name | Type | Description | 170 | |----------------------|:-----------------:|-------------| 171 | | `type` | `uint8_t` | `0xC4` used when 3-byte prefix is present or `0xC5` used when 2-byte prefix is present | 172 | | `vexc5b` | `struct` | | 173 | | `_vex.val5` | `uint8_t` | The byte after `0xC5` with its filds described below | 174 | | `_vex.vexc5b.vex_pp` | `uint8_t` | Equivalent to a SIMD prefix: `00`: none, `01`: 0x66, `02`: 0xF3, `03`: 0xF2 | 175 | | `_vex.vexc5b.vex_l` | `uint8_t` | 0 for 128-bit vector or 1 for 256-bit vector | 176 | | `_vex.vexc5b.vex_v` | `uint8_t` | An additional operand for the instruction | 177 | | `_vex.vexc5b.vex_r` | `uint8_t` | | 178 | | `_vex.val4` | `uint16_t` | | 179 | | `_vex.vexc4b.vex_pp` | `uint8_t` | | 180 | | `_vex.vexc4b.vex_l` | `uint8_t` | | 181 | | `_vex.vexc4b.vex_v` | `uint8_t` | | 182 | | `_vex.vexc4b.vex_r` | `uint8_t` | | 183 | | `_vex.vexc4b.vex_m` | `uint8_t` | Values: 00001: implied 0F leading opcode byte, 00010: implied 0F 38 leading opcode bytes, 00011: implied 0F 3A leading opcode bytes. Other values will #UD. | 184 | | `_vex.vexc4b.vex_b` | `uint8_t` | | 185 | | `_vex.vexc4b.vex_x` | `uint8_t` | | 186 | | `_vex.vexc4b.vex_r` | `uint8_t` | | 187 | 188 | For all the details about VEX prefix look at section **2.3.5 The VEX Prefix** of the Intel Developer Manual Vol.2 [[2]](#user-content-res2). 189 | ___ 190 | 191 | ## Examples 192 | 193 | #### A practical example: sum of two vectors using SIMD instruction 194 | 195 | Lets have a pratical example, the sum of two vectors (using inline assembly): 196 | 197 | ```C 198 | // ... omitted code ... 199 | int vect1[LEN] = {1,2,3,4,5,6,7,8,9,10,11,12}; 200 | int vect2[LEN] = {1,2,3,4,5,6,7,8,9,10,11,12}; 201 | int res_vect1[LEN]; 202 | 203 | __asm 204 | { 205 | lea eax, vect1 206 | lea ebx, vect2 207 | xor ecx, ecx 208 | 209 | _while: 210 | cmp ecx, LEN * 4 211 | jge _end 212 | 213 | movups xmm0, [eax + ecx] 214 | movups xmm1, [ebx + ecx] 215 | addps xmm0, xmm1 216 | movups [res_vect1 + ecx], xmm0 217 | add ecx, 4 218 | jmp _while 219 | 220 | _end: 221 | } 222 | // ... omitted code ... 223 | ``` 224 | 225 | Compiling through MS Compiler (with `/Ot` flag), the result will be what follows: 226 | 227 | ```Assembly 228 | CPU Disasm 229 | Address Hex dump Command Comments 230 | 008910BC |. C785 68FFFFFF 00000000 MOV DWORD PTR SS:[LOCAL.38],0 231 | 008910C6 |. 8D45 CC LEA EAX,[LOCAL.13] 232 | 008910C9 |. 8D5D 9C LEA EBX,[LOCAL.25] 233 | 008910CC |. 33C9 XOR ECX,ECX 234 | 008910CE |> 83F9 30 /CMP ECX,30 235 | 008910D1 |. 7D 18 |JGE SHORT 008910EB 236 | 008910D3 |. 0F100408 |MOVUPS XMM0,DQWORD PTR DS:[ECX+EAX] 237 | 008910D7 |. 0F100C0B |MOVUPS XMM1,DQWORD PTR DS:[ECX+EBX] 238 | 008910DB |. 0F58C1 |ADDPS XMM0,XMM1 239 | 008910DE |. 0F11840D 6CFFFFFF |MOVUPS DQWORD PTR SS:[ECX+EBP-94],XMM0 240 | 008910E6 |. 83C1 04 |ADD ECX,4 241 | 008910E9 |.^ EB E3 \JMP SHORT 008910CE 242 | 243 | ``` 244 | 245 | We can write a sample code that uses x64ID to read and print the instructions. 246 | 247 | ```C 248 | int offset = 0x4bc; 249 | int parse_bytes = 0x2a; 250 | int byte_reads = 0; 251 | 252 | while(byte_reads <= parse_bytes) { 253 | struct instruction instr; 254 | x64id_decode(&instr, arch, (char*)data_buffer, offset); 255 | 256 | for(int i=0; i :information_source: **Notes** 386 | > Jump Table are not handled; be careful when you use switch case and compiling with MSVC (GCC/MinGw seems use other techniques). 387 | > Handling jmp table require heuristics (eg. as IDA do and other tools) and more info on the target. 388 | ## Tests 389 | 390 | After googling for a better solution, I came back with one of the first things I was thinking: assembly. 391 | 392 | Tests have been written using NASM and must be compiled using the "bin" flag: 393 | 394 | ```sh 395 | nasm -f bin 396 | ``` 397 | 398 | The tested instructions are the following: 399 | 400 | * x86: `1-byte OP`, `2-byte OP`, `3-byte OP` and `2-byte OP with VEX prefix` 401 | * x64: `1-byte OP`, `2-byte OP`, `3-byte OP`; some of which have `VEX prefix` 402 | 403 | Tests have been written by hand using the Intel Developer Manual book [[2]](#user-content-res2). 404 | I can't guarantee a 100% coverage, however all the opcodes have been tested. 405 | 406 | ## Useful resources 407 | 408 | - [X86-64 Instruction Encoding](https://wiki.osdev.org/X86-64_Instruction_Encoding) 409 | - [x86_64 Instruction Table](https://c9x.me/x86/) 410 | - [Compiler Explorer](https://godbolt.org/) 411 | 412 | ## Notes 413 | 414 | [1] By VM protection is meant a code obscator that converts x86/x64 machine code into "virtual opcodes" that are understandable by a VM. Two commercial examples can be [VMProtect](https://vmpsoft.com/) and [CodeVirtualizer](https://www.oreans.com/codevirtualizer.php) 415 | 416 | [2] [Intel Developer Manual (2nd book)](https://software.intel.com/content/dam/develop/public/us/en/documents/334569-sdm-vol-2d.pdf) 417 | 418 | ___ 419 | 420 | _Crafted with ❤ by DispatchCode. Documentation created along with [Alexander Cerutti](https://github.com/alexandercerutti)_ 421 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "utils/function_length.h" 5 | #include "utils/test_functions.h" 6 | 7 | void instruction_info(struct instruction instr) 8 | { 9 | #ifdef _ENABLE_RAW_BYTES 10 | printf("RAW bytes (hex): "); 11 | for(int i=0; ilength, func_info->pVisited->tos); 126 | printf("\nAddresses of instructions that has been decoded:\n"); 127 | 128 | for(int i=0; ipVisited->tos; i++) { 129 | if(i % 8 == 0) 130 | printf("\n"); 131 | printf("%X, ",func_info->pVisited->vect[i]); 132 | 133 | } 134 | printf("\n"); 135 | 136 | int offset = 0; 137 | uint32_t start = func_info->pVisited->vect[0]; 138 | uint32_t end = func_info->pVisited->vect[func_info->pVisited->tos-1]; 139 | 140 | printf("\nStart disassembly the addresses range: [0x%X, 0x%X]\n\n", start, end); 141 | 142 | while(offset <= func_info->length) { 143 | struct instruction instr; 144 | x64id_decode(&instr, arch, (char*)example4, offset); 145 | printf("Instr. VA: 0x%X\n",(uint32_t)((uint32_t)example4+offset)); 146 | instruction_info(instr); 147 | 148 | offset += instr.length; 149 | } 150 | 151 | vector_free(func_info->pVisited); 152 | free(func_info); 153 | } 154 | 155 | int main(int argc, char *argv[]) 156 | { 157 | if(argc == 2) 158 | { 159 | in_memory(atoi(argv[1])); 160 | } 161 | else if(argc == 3) 162 | { 163 | binary_file(argv[1], atoi(argv[2])); 164 | } 165 | else 166 | { 167 | printf("How to launch the application?\n"); 168 | printf("1. binary file, 2 parameters required\n"); 169 | printf("\tmain \n"); 170 | printf("2. in-memory analysis, 1 parameter required\n"); 171 | printf("\tmain \n"); 172 | printf("\n\nArchitecture must be 1 (x86) or 2 (x64)."); 173 | exit(-1); 174 | } 175 | } -------------------------------------------------------------------------------- /src/queue.c: -------------------------------------------------------------------------------- 1 | /* 2 | * Queue (FIFO) 3 | * 4 | */ 5 | 6 | #include "queue.h" 7 | 8 | queue* queue_init() 9 | { 10 | queue *q = calloc(1, sizeof(queue)); 11 | q->queue = calloc(QUEUE_INIT, sizeof(uint64_t)); 12 | q->size = QUEUE_INIT; 13 | q->tos = 0; 14 | 15 | return q; 16 | } 17 | 18 | void queue_enqueue(queue *q, uint64_t value) 19 | { 20 | if(q->tos == q->size) 21 | { 22 | q->size <<= 1; 23 | q->queue = realloc(q->queue, sizeof(uint64_t)*q->size); 24 | } 25 | 26 | q->queue[q->tos++] = value; 27 | } 28 | 29 | uint64_t queue_dequeue(queue *q) 30 | { 31 | uint64_t value = q->queue[0]; 32 | q->tos--; 33 | memcpy(q->queue, q->queue+1, sizeof(uint64_t)*q->tos); 34 | return value; 35 | } 36 | 37 | int queue_empty(queue *q) 38 | { 39 | return q->tos == 0; 40 | } 41 | 42 | int queue_size(queue *q) { 43 | return q->tos; 44 | } 45 | 46 | int queue_find(queue *q, uint64_t value) 47 | { 48 | for(int i=0; itos; i++) 49 | if(q->queue[i] == value) 50 | return 1; 51 | return 0; 52 | } 53 | 54 | void queue_free(queue *q) 55 | { 56 | if(q) 57 | { 58 | if (q->queue) 59 | free(q->queue); 60 | free(q); 61 | } 62 | } -------------------------------------------------------------------------------- /src/queue.h: -------------------------------------------------------------------------------- 1 | #ifndef x64ID_QUEUE_H 2 | #define x64ID_QUEUE_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define QUEUE_INIT 20 9 | 10 | typedef struct { 11 | uint64_t*queue; 12 | int size; 13 | int tos; 14 | } queue; 15 | 16 | queue* queue_init(); 17 | void queue_enqueue(queue *q, uint64_t value); 18 | uint64_t queue_dequeue(queue *q); 19 | void queue_free(queue *q); 20 | int queue_empty(queue *q); 21 | int queue_find(queue *q, uint64_t value); 22 | int queue_size(queue *q); 23 | 24 | #endif //x64ID_QUEUE_H 25 | -------------------------------------------------------------------------------- /src/utils/function_length.c: -------------------------------------------------------------------------------- 1 | #include "function_length.h" 2 | 3 | 4 | vector* instrFlowLength(char *pMemory, const enum supported_architecture arch) 5 | { 6 | int bytes_len = 0; 7 | uint64_t addr = (uint64_t)pMemory; 8 | 9 | queue *future_paths = queue_init(); 10 | vector *visited = vector_init(); 11 | 12 | char *tmp_addr = pMemory; 13 | while(true) 14 | { 15 | struct instruction instr; 16 | x64id_decode(&instr, arch, tmp_addr, 0); 17 | /* 18 | for (int i = 0; i < instr.length; i++) { 19 | printf("%X ", instr.instr[i]); 20 | } 21 | printf("\n"); 22 | */ 23 | if(instr.op == 0xc3 || instr.op == 0xCC) // RET or INT3 24 | { 25 | vector_push_back(visited, addr); 26 | if(queue_empty(future_paths)) 27 | { 28 | queue_free(future_paths); 29 | return visited; 30 | } 31 | 32 | uint64_t next_addr = queue_dequeue(future_paths); 33 | tmp_addr = (char *)next_addr; 34 | addr = next_addr; 35 | continue; 36 | } 37 | 38 | if(vector_find(visited, addr)) 39 | { 40 | if(queue_empty(future_paths)) 41 | { 42 | queue_free(future_paths); 43 | return visited; 44 | } 45 | 46 | uint64_t next_addr = queue_dequeue(future_paths); 47 | tmp_addr = (char *)next_addr; 48 | addr = next_addr; 49 | continue; 50 | } 51 | 52 | vector_push_back(visited, addr); 53 | bytes_len += instr.length; 54 | addr += instr.length; 55 | tmp_addr += instr.length; 56 | 57 | if (instr.jcc_type == JCC_FAR || instr.jcc_type == JCC_SHORT) 58 | { 59 | if (!queue_find(future_paths, instr.label)) 60 | { 61 | queue_enqueue(future_paths, instr.label); 62 | } 63 | } 64 | 65 | if (instr.jcc_type == JMP_FAR || instr.jcc_type == JMP_SHORT) 66 | { 67 | addr = instr.label; 68 | tmp_addr = (char *)addr; 69 | } 70 | } 71 | } 72 | 73 | int compare(const void * n1, const void * n2) 74 | { 75 | return *(uint64_t*)n1 - *(uint64_t*)n2; 76 | } 77 | 78 | pFunctionInfo getFunctionLength(char *buffer, enum supported_architecture arch) 79 | { 80 | pFunctionInfo f_info = calloc(1,sizeof(functionInfo)); 81 | vector *visited = instrFlowLength(buffer, arch); 82 | 83 | qsort(visited->vect, visited->tos, sizeof(uint64_t), compare); 84 | 85 | uint64_t min = visited->vect[0]; 86 | uint64_t max = visited->vect[visited->tos-1]; 87 | 88 | f_info->pVisited = visited; 89 | f_info->length = (int)(max-min); 90 | 91 | return f_info; 92 | } -------------------------------------------------------------------------------- /src/utils/function_length.h: -------------------------------------------------------------------------------- 1 | #ifndef x64ID_FUNCTION_LENGTH_H 2 | #define x64ID_FUNCTION_LENGTH_H 3 | 4 | #include 5 | 6 | #include "../vector.h" 7 | #include "../queue.h" 8 | #include "../x64id.h" 9 | 10 | typedef struct { 11 | vector *pVisited; 12 | int length; 13 | } functionInfo,*pFunctionInfo; 14 | 15 | pFunctionInfo getFunctionLength(char *buffer, enum supported_architecture arch); 16 | 17 | #endif //x64ID_FUNCTION_LENGTH_H 18 | -------------------------------------------------------------------------------- /src/utils/test_functions.c: -------------------------------------------------------------------------------- 1 | // 2 | // TEST FUNCTIONS, just random code 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | void bubble_sort(int *array) { 10 | printf("Bubble sort"); 11 | for(int i=0; i<10; i++) { 12 | for(int j=i+1; j<10; j++) { 13 | if(array[i] > array[j]) { 14 | int tmp = array[i]; 15 | array[i] = array[j]; 16 | array[j] = tmp; 17 | } 18 | } 19 | } 20 | } 21 | 22 | int example(int n) { 23 | printf("example"); 24 | int c = 0; 25 | for(int i=0; i<10; i++) { 26 | if(i*2 == n) 27 | return 1; 28 | c++; 29 | } 30 | return -1; 31 | } 32 | 33 | int example1(int n) { 34 | printf("example 1"); 35 | int c = 0; 36 | for(int i=1; i<10; i++) { 37 | for(int j=i; jvect = calloc(VECTOR_INIT, sizeof(uint64_t)); 12 | v->size = VECTOR_INIT; 13 | v->tos = 0; 14 | 15 | return v; 16 | } 17 | 18 | void vector_push_back(vector *v, uint64_t value) 19 | { 20 | if(v->tos == v->size) 21 | { 22 | v->size <<= 1; 23 | v->vect = realloc(v->vect, sizeof(uint64_t)*v->size); 24 | } 25 | v->vect[v->tos++] = value; 26 | } 27 | 28 | int vector_find(vector *v, uint64_t value) 29 | { 30 | for(int i=0; i < v->tos; i++) 31 | if(v->vect[i] == value) 32 | return 1; 33 | return 0; 34 | } 35 | 36 | void vector_free(vector *v) 37 | { 38 | if(v) 39 | { 40 | if (v->vect) 41 | free(v->vect); 42 | free(v); 43 | } 44 | } -------------------------------------------------------------------------------- /src/vector.h: -------------------------------------------------------------------------------- 1 | #ifndef x64ID_VECTOR_H 2 | #define x64ID_VECTOR_H 3 | 4 | #include 5 | #include 6 | 7 | #define VECTOR_INIT 20 8 | 9 | typedef struct { 10 | uint64_t*vect; 11 | int size; 12 | int tos; 13 | } vector; 14 | 15 | vector* vector_init(); 16 | void vector_push_back(vector *v, uint64_t value); 17 | void vector_free(vector *v); 18 | int vector_find(vector *v, uint64_t value); 19 | 20 | #endif //x64ID_VECTOR_H 21 | -------------------------------------------------------------------------------- /src/x64id.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "x64id.h" 3 | 4 | static size_t *imm_table[4] = {0, imm_byte_2b,imm_byte_3b_38,imm_byte_3b_3A }; 5 | static size_t *modrm_table[4] = {0, modrm_2b,modreg_3b_38,modreg_3b_3A }; 6 | 7 | static inline void x64id_vex_decode(struct instruction *instr, enum supported_architecture arch, const char *data, uint8_t vex_size) { 8 | memcpy(instr->vex, (data+instr->length), vex_size); 9 | instr->vex_cnt += vex_size; 10 | instr->length += vex_size; 11 | 12 | instr->op = *(data + instr->length); 13 | instr->length++; 14 | 15 | instr->set_prefix |= VEX; 16 | 17 | if(instr->vex[0] == 0xC5) { 18 | #ifdef _ENABLE_VEX_INFO 19 | instr->_vex.type = instr->vex[0]; 20 | instr->_vex.val5 = instr->vex[1]; 21 | #endif 22 | 23 | x64id_decode_modrm(instr, arch, data, modrm_2b, imm_byte_2b, NULL); 24 | } 25 | else if(instr->vex[0] == 0xC4) { 26 | 27 | #ifdef _ENABLE_VEX_INFO 28 | instr->_vex.type = instr->vex[0]; 29 | memcpy(&instr->_vex.val4, &instr->vex[1],2); 30 | #endif 31 | 32 | int8_t index = instr->vex[1] & 0x3; 33 | x64id_decode_modrm(instr, arch, data, modrm_table[index], imm_table[index], NULL); 34 | } 35 | // TODO XOP, 0x8F 36 | 37 | } 38 | 39 | static inline int x64id_vex_size(struct instruction *instr, enum supported_architecture arch, const char *data) { 40 | uint8_t curr_byte = (uint8_t) *(data + instr->length); 41 | uint8_t next_byte = (uint8_t) *(data + instr->length + 1); 42 | 43 | // 3-byte VEX prefix 44 | if ((arch == X86 && curr_byte == 0xC4 && (next_byte >> 6) == 3) || (arch == X64 && curr_byte == 0xC4)) 45 | return 3; 46 | // 2-byte VEX prefix 47 | else if ((arch == X86 && curr_byte == 0xC5 && (next_byte & 0x80)) || (arch == X64 && curr_byte == 0xC5)) 48 | return 2; 49 | 50 | return 0; 51 | } 52 | 53 | static inline bool x64id_check_sib(uint8_t mod, uint8_t rm) { 54 | return mod < 3 && rm == 4; 55 | } 56 | 57 | static inline int x64id_displacement_size(uint8_t mod, uint8_t rm) { 58 | if((mod == 0x02) || (rm == 0x05 && !mod)) 59 | return 4; 60 | else if(mod == 0x01) 61 | return 1; 62 | return 0; 63 | } 64 | 65 | static inline int x64id_imm_size(struct instruction *instr, size_t val, enum supported_architecture arch) { 66 | switch (val) { 67 | case b: 68 | return 1; 69 | case v: 70 | if(arch == X64 && instr->set_prefix & OP64) 71 | return 8; 72 | if(instr->set_prefix & OS) 73 | return 2; 74 | return 4; 75 | case z: 76 | case z1: 77 | if(instr->set_prefix & OS) 78 | return 2; 79 | return 4; 80 | case p: 81 | if(instr->set_prefix & OS) { 82 | if (arch == X86) 83 | return 4; 84 | return 8; 85 | } 86 | return 6; 87 | case w: 88 | return 2; 89 | case wb: 90 | return 3; // TODO ENTER iw, ib 91 | case gr3b: 92 | if(!instr->modrm.bits.reg) 93 | return 1; 94 | return 0; 95 | case gr3z: 96 | if(!instr->modrm.bits.reg) 97 | { 98 | if(instr->set_prefix & OS) 99 | return 2; 100 | return 4; 101 | } 102 | return 0; 103 | 104 | default: 105 | return 0; 106 | } 107 | } 108 | 109 | static void x64id_decode_modrm(struct instruction *instr, enum supported_architecture arch, const char *start_data, const size_t *modrm_table, const size_t *imm_table, const size_t *jcc_table) { 110 | size_t val; 111 | if((val = modrm_table[instr->op])) { 112 | instr->set_field |= MODRM; 113 | 114 | if(val == X87_FPU) 115 | instr->set_field |= FPU; 116 | 117 | uint8_t curr = *(start_data + instr->length); 118 | 119 | instr->modrm.value = curr; 120 | instr->length++; 121 | 122 | uint8_t mod_val = instr->modrm.bits.mod, rm_val = instr->modrm.bits.rm; 123 | 124 | if(x64id_check_sib(instr->modrm.bits.mod,instr->modrm.bits.rm)) { 125 | instr->set_field |= SIB; 126 | 127 | instr->sib.value = (uint8_t) *(start_data + instr->length); 128 | instr->length++; 129 | 130 | if(instr->sib.bits.base == 0x05) { 131 | instr->set_field |= DISP; 132 | mod_val = instr->modrm.bits.mod; 133 | rm_val = instr->sib.bits.base; 134 | } 135 | } 136 | 137 | instr->disp_len = x64id_displacement_size(mod_val, rm_val); 138 | if(instr->disp_len || instr->set_field & DISP) { 139 | memcpy(&instr->disp, (start_data + instr->length), instr->disp_len); 140 | instr->length += instr->disp_len; 141 | instr->set_field |= DISP; 142 | } 143 | } 144 | 145 | instr->imm_len = x64id_imm_size(instr, imm_table[instr->op], arch); 146 | if(instr->imm_len) { 147 | instr->set_field |= IMM; 148 | memcpy(&instr->imm, (start_data + instr->length), instr->imm_len); 149 | instr->length += instr->imm_len; 150 | } 151 | 152 | uint16_t value = 0; 153 | if(jcc_table != NULL && ((value = jcc_table[instr->op]))) { 154 | switch(value) { 155 | case j1: 156 | instr->jcc_type = JMP_SHORT; 157 | break; 158 | case j2: 159 | instr->jcc_type = JMP_FAR; 160 | break; 161 | case jc1: 162 | instr->jcc_type = JCC_SHORT; 163 | break; 164 | case jc2: 165 | instr->jcc_type = JCC_FAR; 166 | default: 167 | break; // avoid compiler warnings 168 | } 169 | 170 | // 1-byte 171 | if(value & 0x10) 172 | instr->label = (uint64_t)start_data + ((int8_t)instr->imm) + instr->length; 173 | // 4-byte 174 | else 175 | instr->label = (uint64_t)start_data + ((int64_t)instr->imm) + instr->length; 176 | } 177 | } 178 | 179 | static int x64id_decode_2b(struct instruction *instr, enum supported_architecture arch, const char *data_src) 180 | { 181 | instr->set_prefix |= ESCAPE; 182 | uint8_t curr = *(data_src + instr->length); 183 | 184 | if(curr == 0x3A || curr == 0x38) 185 | { 186 | instr->set_prefix |= OP3B; 187 | 188 | instr->prefixes[instr->prefix_cnt++] = curr; 189 | instr->length++; 190 | instr->op = *(data_src + instr->length); 191 | instr->length++; 192 | 193 | if(curr == 0x3A) 194 | x64id_decode_modrm(instr, arch, data_src, modreg_3b_3A, imm_byte_3b_3A, NULL); 195 | else 196 | x64id_decode_modrm(instr, arch, data_src, modreg_3b_38, imm_byte_3b_38, NULL); 197 | 198 | return instr->length; 199 | } 200 | 201 | instr->op = curr; 202 | instr->length++; 203 | 204 | x64id_decode_modrm(instr, arch, data_src, modrm_2b, imm_byte_2b, op2b_labels); 205 | 206 | return instr->length; 207 | } 208 | 209 | int x64id_decode(struct instruction *instr, enum supported_architecture arch, char *data, int offset) { 210 | memset(instr, 0, sizeof(struct instruction)); 211 | 212 | char *start_data = (data + offset); 213 | uint8_t curr = *start_data; 214 | 215 | while(x86_64_prefix[curr] & arch) 216 | { 217 | switch(curr) { 218 | case 0x26: 219 | instr->set_prefix |= ES; 220 | break; 221 | case 0x2E: 222 | instr->set_prefix |= CS; 223 | break; 224 | case 0x36: 225 | instr->set_prefix |= SS; 226 | break; 227 | case 0x3E: 228 | instr->set_prefix |= DS; 229 | break; 230 | case 0x48: 231 | case 0x49: 232 | if(arch == X64) 233 | instr->set_prefix |= OP64; 234 | break; 235 | case 0x64: 236 | instr->set_prefix |= FS; 237 | break; 238 | case 0x65: 239 | instr->set_prefix |= GS; 240 | break; 241 | case 0x66: 242 | instr->set_prefix |= OS; 243 | break; 244 | case 0x67: 245 | instr->set_prefix |= AS; 246 | break; 247 | } 248 | 249 | instr->set_field |= PREFIX; 250 | instr->prefixes[instr->prefix_cnt] = curr; 251 | instr->prefix_cnt++; 252 | instr->length++; 253 | 254 | // Rex prefix 255 | // TODO 64-bit mode: IF OP == 90h and REX.B == 1, 256 | // then the instruction is XCHG r8, rAX 257 | if(arch == X64 && (curr >= 0x40 && curr <= 0x4F)) 258 | { 259 | instr->rex.value = curr; 260 | instr->set_field |= REX; 261 | } 262 | else if(curr == 0x0F) 263 | { 264 | x64id_decode_2b(instr, arch, start_data); 265 | #ifdef _ENABLE_RAW_BYTES 266 | memcpy(instr->instr, start_data, instr->length); 267 | #endif 268 | return instr->length; 269 | } 270 | 271 | curr = (uint8_t) *(start_data + instr->length); 272 | } 273 | 274 | size_t vex_size = x64id_vex_size(instr, arch, start_data); 275 | if(vex_size) 276 | x64id_vex_decode(instr, arch, start_data, vex_size); 277 | else 278 | { 279 | instr->length++; 280 | instr->op = curr; 281 | x64id_decode_modrm(instr, arch, start_data, modrm_1b, imm_byte_1b, op1b_labels); 282 | } 283 | 284 | #ifdef _ENABLE_RAW_BYTES 285 | memcpy(instr->instr, start_data, instr->length); 286 | #endif 287 | 288 | return instr->length; 289 | } -------------------------------------------------------------------------------- /src/x64id.h: -------------------------------------------------------------------------------- 1 | #ifndef x64id_H 2 | #define x64id_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | #define _ENABLE_RAW_BYTES 9 | #define _ENABLE_VEX_INFO 10 | 11 | enum supported_architecture { 12 | X86 = 1, 13 | X64 = 2 14 | }; 15 | 16 | enum decode_status { 17 | x64id_ERROR = 0, 18 | }; 19 | 20 | #define ALL (X86 | X64) 21 | 22 | // 23 | // instruction prefix look-up table 24 | static size_t x86_64_prefix[256] = { 25 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 26 | /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ALL, 27 | /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 28 | /* 20 */ 0, 0, 0, 0, 0, 0, ALL,0, 0, 0, 0, 0, 0, 0, ALL,0, 29 | /* 30 */ 0, 0, 0, 0, 0, 0, ALL,0, 0, 0, 0, 0, 0, 0, ALL,0, 30 | /* 40 */ X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64, // REX prefixes 31 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 32 | /* 60 */ 0, 0, 0, 0, ALL,ALL,ALL,ALL,0, 0, 0, 0, 0, 0, 0, 0, 33 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 34 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 35 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 36 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 37 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 38 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 39 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 41 | /* F0 */ ALL,0,ALL, ALL,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 42 | }; 43 | 44 | // 45 | // 1-byte lookup table 46 | // 47 | #define X87_FPU 2 48 | 49 | static size_t modrm_1b[256] = { 50 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 51 | /* 00 */ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 52 | /* 10 */ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 53 | /* 20 */ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 54 | /* 30 */ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 55 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 56 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 57 | /* 60 */ 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 58 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 59 | /* 80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 60 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 61 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 62 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 63 | /* C0 */ 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 64 | /* D0 */ 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, // 2 = Coprocessor Escape 65 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 66 | /* F0 */ 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1 67 | }; 68 | 69 | #define b 1 // byte 70 | #define v 2 // word, dword or qword (64bit mode), depending on OS attribute 71 | #define z 3 // word for 16bit OS or dword for 32/64-bit OS 72 | #define p 4 // 32-bit, 48-bit, or 80-bit pointer, depending on operand-size attribute 73 | #define z1 6 // word for 16bit OS or dword for 32/64-bit OS 74 | #define w 7 // word 75 | #define wb 8 // word, byte 76 | #define gr3b 9 // byte (imm exists only if mod.reg == 0) 77 | #define gr3z 10 // word, dword depending on OS (imm exists only if mod.reg == 0) 78 | 79 | static size_t imm_byte_1b[256] = { 80 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 81 | /* 00 */ 0, 0, 0, 0, b, z, 0, 0, 0, 0, 0, 0, b, z, 0, 0, 82 | /* 10 */ 0, 0, 0, 0, b, z, 0, 0, 0, 0, 0, 0, b, z, 0, 0, 83 | /* 20 */ 0, 0, 0, 0, b, z, 0, 0, 0, 0, 0, 0, b, z, 0, 0, 84 | /* 30 */ 0, 0, 0, 0, b, z, 0, 0, 0, 0, 0, 0, b, z, 0, 0, 85 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 86 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 87 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, z, z, b, b, 0, 0, 0, 0, 88 | /* 70 */ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, 89 | /* 80 */ b, z, b, b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 90 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, p, 0, 0, 0, 0, 0, 91 | /* A0 */ z1,z1,z1,z1, 0, 0, 0, 0, b, z, 0, 0, 0, 0, 0, 0, 92 | /* B0 */ b, b, b, b, b, b, b, b, v, v, v, v, v, v, v, v, 93 | /* C0 */ b, b, w, 0, 0, 0, b, z, wb, 0, w, 0, 0, b, 0, 0, 94 | /* D0 */ 0, 0, 0, 0, b, b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 95 | /* E0 */ b, b, b, b, b, b, b, b, z, z, p, b, 0, 0, 0, 0, 96 | /* F0 */ 0, 0, 0, 0, 0, 0, gr3b, gr3z, 0, 0, 0, 0, 0, 0, 0, 0 97 | }; 98 | 99 | /* 100 | * first byte: 101 | * - 1: 1-byte 102 | * - 2: 4-byte 103 | * 104 | * second byte (LSB): 105 | * - 1: Jcc 106 | * - 2: JMP 107 | * 108 | */ 109 | #define j1 0x12 110 | #define j2 0x22 111 | #define jc1 0x11 112 | #define jc2 0x21 113 | 114 | // check if the OP is Jcc or JMP 115 | static size_t op1b_labels[256] = { 116 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 117 | /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 118 | /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 119 | /* 20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 121 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 122 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 123 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 124 | /* 70 */ jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, 125 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 126 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 128 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 129 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 130 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 131 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, j2, 0, j1, 0, 0, 0, 0, 132 | /* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 133 | }; 134 | 135 | // 136 | // 2-byte OP look-up table 137 | 138 | // 0x0f 139 | #define OE 0x01 140 | // 0x66 0x0f 141 | #define O66 0x02 142 | // 0xf2 0x0f 143 | #define OF2 0x04 144 | // 0xf3 0x0f 145 | #define OF3 0x08 146 | 147 | #define P1 (OE) 148 | #define P2 (O66 | OE) 149 | #define P4 (OF3 | OE) 150 | #define P5 (O66 | OF2) 151 | #define P6 (OE | O66 | OF3) 152 | #define P7 (OE | O66 | OF2 | OF3) 153 | #define P8 (O66 | OF2 | OF3) 154 | 155 | static size_t modrm_2b[256] = { 156 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 157 | /* 00 */ P1,P1,P1,P1,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 158 | /* 10 */ P7,P7,P7,P2,P2,P2,P6,P2,P1,0, 0, 0, 0, 0, 0, P1, 159 | /* 20 */ P1,P1,P1,P1,0, 0, 0, 0, P2,P2,P7,P2,P7,P7,P2,P2, 160 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 161 | /* 40 */ P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1, 162 | /* 50 */ P2,P7,P4,P4,P2,P2,P2,P2,P7,P7,P7,P6,P7,P7,P7,P7, 163 | /* 60 */ P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,O66,O66,P2,P2, 164 | /* 70 */ P7,P1,P1,P1,P2,P2,P2,P1,P1,P1, 0, 0,P5,P5,P6,P6, 165 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 166 | /* 90 */ P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1, 167 | /* A0 */ 0, 0, 0, P1,P1,P1, 0, 0, 0, 0, 0,P1,P1,P1,P1,P1, 168 | /* B0 */ P1,P1,P1,P1,P1,P1,P1,P1,OF3,P1,P1,P1,P4,P4,P1,P1, 169 | /* C0 */ P1,P1,P7,P1,P2,P2,P2,P1, 0, 0, 0, 0, 0, 0, 0, 0, 170 | /* D0 */ P5,P2,P2,P2,P2,P2,P8,P2,P2,P2,P2,P2,P2,P2,P2,P2, 171 | /* E0 */ P2,P2,P2,P2,P2,P2,P8,P2,P2,P2,P2,P2,P2,P2,P2,P2, 172 | /* F0 */ OF2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2, 0 173 | }; 174 | 175 | static size_t imm_byte_2b[256] = { 176 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 177 | /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 178 | /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 179 | /* 20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 180 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 181 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 182 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 183 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 184 | /* 70 */ b, b, b, b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 185 | /* 80 */ z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, 186 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 187 | /* A0 */ 0, 0, 0, 0, b, 0, 0, 0, 0, 0, 0, 0, b, 0, 0, 0, 188 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, b, 0, 0, 0, 0, 0, 189 | /* C0 */ 0, 0, b, 0, b, b, b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 190 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 191 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 192 | /* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 193 | }; 194 | 195 | // check if the OP is Jcc or JMP 196 | static size_t op2b_labels[256] = { 197 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 198 | /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 199 | /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 200 | /* 20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 201 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 202 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 203 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 204 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 205 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 206 | /* 80 */ jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, 207 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 208 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 209 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 210 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 211 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 212 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 213 | /* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 214 | }; 215 | 216 | // 217 | // 3-byte OP look-up table 218 | 219 | #define OP3 (OE | OF2 | O66) 220 | #define OP2 (OE | OF2 | OF3) 221 | #define OP4 (O66 | OF2 | OF3) 222 | 223 | // 224 | // 3-byte OP look-up table (0x38) 225 | static size_t modreg_3b_38[256] = { 226 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 227 | /* 00 */ P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,O66,O66,O66,O66, 228 | /* 10 */ O66,0,0,O66,O66,O66,O66,O66,0,0,0,0,O66,O66,O66,0, 229 | /* 20 */ O66,O66,O66,O66,O66,O66,0,0,O66,O66,O66,O66,O66,O66,O66,O66, 230 | /* 30 */ O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66, 231 | /* 40 */ O66,O66, 0, 0, 0, O66,O66,O66, 0, 0, 0, 0, 0, 0, 0, 0, 232 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 233 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 234 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 235 | /* 80 */ O66,O66,O66,0, 0, 0, 0, 0, 0, 0, 0, 0,O66,0,O66,0, 236 | /* 90 */ O66,O66,O66,O66, 0, 0,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66, 237 | /* A0 */ 0, 0, 0, 0, 0, 0, O66,O66,O66,O66,O66,O66,O66,O66,O66,O66, 238 | /* B0 */ 0, 0, 0, 0, 0, 0, O66,O66,O66,O66,O66,O66,O66,O66,O66,O66, 239 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, O66,O66,O66,O66,O66,O66,O66,O66, 240 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, O66, O66, O66, O66, O66, 241 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, O66, O66, O66, O66, O66, 242 | /* F0 */ OP3, OP3, OE, 0, 0, OP2, OP4, P7, 0, 0, 0, 0, 0, 0, 0, 0 243 | }; 244 | 245 | static size_t imm_byte_3b_38[256] = { 246 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 247 | /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 248 | /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 249 | /* 20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 250 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 251 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 252 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 253 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 254 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 255 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 256 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 257 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 258 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 259 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 260 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 261 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 262 | /* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 263 | }; 264 | 265 | #define OP5 (OE | O66) 266 | 267 | // 268 | // 3-byte OP look-up table (0x3A) 269 | 270 | static size_t modreg_3b_3A[256] = { 271 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 272 | /* 00 */ O66, O66, O66, 0, O66, O66, O66, 0, O66, O66, O66, O66,O66 , O66, O66, OP5, 273 | /* 10 */ 0, 0, 0, 0, O66, O66, O66, O66, O66, O66, 0, 0, 0, O66, 0, 0, 274 | /* 20 */ O66, O66, O66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 275 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, O66, O66, 0, 0, 0, 0, 0, 0, 276 | /* 40 */ O66, O66, O66, 0, O66, 0, O66, 0, 0, 0, O66, O66, O66, 0, 0, 0, 277 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 278 | /* 60 */ O66, O66, O66, O66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 279 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 280 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 281 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 282 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 283 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 284 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, O66, 0, 0, 0, 285 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, O66, 286 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 287 | /* F0 */ OF2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 288 | }; 289 | 290 | static size_t imm_byte_3b_3A[256] = { 291 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F 292 | /* 00 */ 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 293 | /* 10 */ 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0, 294 | /* 20 */ 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 295 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 296 | /* 40 */ 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0, 297 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 298 | /* 60 */ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 299 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 300 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 301 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 302 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 303 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 304 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 305 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 306 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 307 | /* F0 */ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 308 | }; 309 | 310 | enum jmp_type { 311 | JCC_SHORT = 1, // 1-byte JCC 312 | JCC_FAR = 2, // 2-byte JCC, 4bytes imm 313 | JMP_SHORT = 4, // 1-byte JMP 314 | JMP_FAR = 8, // 4-byte JMP 315 | }; 316 | 317 | enum prefixes { 318 | ES = 1, // 0x26 319 | CS = 2, // 0x2E 320 | SS = 4, // 0x36 321 | DS = 8, // 0x3E 322 | FS = 16, // 0x64 323 | GS = 32, // 0x65 324 | OS = 64, // 0x66 325 | AS = 128,// 0x67 326 | REPNE = 256, 327 | REPE = 512, 328 | OP64 = 1024, 329 | VEX = 2048 330 | }; 331 | 332 | enum instruction_feature { 333 | PREFIX = 1, 334 | ESCAPE = 2, // 0x0F 335 | OP = 4, 336 | OP3B = 8, 337 | MODRM = 16, 338 | SIB = 32, 339 | REX = 64, 340 | DISP = 128, 341 | IMM = 512, 342 | FPU = 1024, 343 | }; 344 | 345 | /* 346 | * VEX FORMAT 347 | * 348 | * 3-byte VEX bit 7 0 765 4 0 7 6 3 2 0 349 | * 11000100 RXB m-mmmm W vvvv L pp 350 | * 351 | * 2-byte VEX bit 7 0 7 6 3 2 0 352 | * 11000101 R vvvv L pp 353 | * 354 | * pp: equivalent to a SIMD prefix 355 | * 00 : None 356 | * 01 : 0x66 357 | * 02 : 0xF3 358 | * 03 : 0xF2 359 | * 360 | * m-mmmm: 361 | * 00000: Reserved for future use (will #UD) 362 | * 00001: implied 0F leading opcode byte 363 | * 00010: implied 0F 38 leading opcode bytes 364 | * 00011: implied 0F 3A leading opcode bytes 365 | * 00100-11111: Reserved for future use (will #UD) 366 | * 367 | */ 368 | 369 | #ifdef _ENABLE_VEX_INFO 370 | struct vex_info { 371 | struct { 372 | uint8_t type; 373 | union { 374 | struct byte2 { 375 | uint8_t vex_pp: 2; 376 | uint8_t vex_l: 1; 377 | uint8_t vex_v: 4; 378 | uint8_t vex_r: 1; 379 | } vexc5b; 380 | uint8_t val5; 381 | }; 382 | union { 383 | struct byte3 { 384 | uint8_t vex_m : 5; 385 | uint8_t vex_b : 1; 386 | uint8_t vex_x : 1; 387 | uint8_t vex_r : 1; 388 | 389 | uint8_t vex_pp : 2; 390 | uint8_t vex_l : 1; 391 | uint8_t vex_v : 4; 392 | uint8_t vex_w : 1; 393 | } vexc4b; 394 | uint16_t val4; 395 | }; 396 | }; 397 | }; 398 | #endif 399 | 400 | struct instruction { 401 | uint64_t disp; 402 | uint64_t imm; 403 | uint64_t label; 404 | 405 | #ifdef _ENABLE_VEX_INFO 406 | struct vex_info _vex; 407 | #endif 408 | 409 | #ifdef _ENABLE_RAW_BYTES 410 | uint8_t instr[15]; 411 | #endif 412 | 413 | uint8_t prefixes[4]; 414 | uint8_t op; 415 | 416 | union 417 | { 418 | struct 419 | { 420 | uint8_t rm : 3; 421 | uint8_t reg : 3; 422 | uint8_t mod : 2; 423 | } bits; 424 | uint8_t value; 425 | } modrm; 426 | 427 | union 428 | { 429 | struct 430 | { 431 | uint8_t rex_b : 1; 432 | uint8_t rex_x : 1; 433 | uint8_t rex_r : 1; 434 | uint8_t rex_w : 1; 435 | } bits; 436 | uint8_t value; 437 | } rex; 438 | 439 | union 440 | { 441 | struct 442 | { 443 | uint8_t base : 3; 444 | uint8_t index : 3; 445 | uint8_t scaled : 2; 446 | } bits; 447 | uint8_t value; 448 | } sib; 449 | 450 | uint8_t vex[3]; 451 | 452 | int length; 453 | int disp_len; 454 | int imm_len; 455 | 456 | uint16_t set_prefix; // bit mask 457 | uint16_t set_field; 458 | uint8_t jcc_type; 459 | 460 | int8_t vex_cnt; 461 | int8_t prefix_cnt; 462 | }; 463 | 464 | 465 | // 466 | // Functions 467 | // 468 | int x64id_decode(struct instruction *instr, enum supported_architecture arch, char *data_src, int offset); 469 | static void x64id_decode_modrm(struct instruction *instr, enum supported_architecture arch, const char *data_src, const size_t *modrm_table, const size_t *imm_table, const size_t *jcc_table); 470 | static inline bool x64id_check_sib(uint8_t mod, uint8_t rm); 471 | static inline int x64id_displacement_size(uint8_t mod, uint8_t rm); 472 | static inline int x64id_imm_size(struct instruction *instr, size_t val, enum supported_architecture arch); 473 | static int x64id_decode_2b(struct instruction *instr, enum supported_architecture arch, const char *data_src); 474 | static inline int x64id_vex_size(struct instruction *instr, enum supported_architecture arch, const char *data); 475 | static inline void x64id_vex_decode(struct instruction *instr, enum supported_architecture arch, const char *data, uint8_t vex_size); 476 | 477 | #endif //x64id_H 478 | -------------------------------------------------------------------------------- /test/CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.16) 2 | project(x64id C) 3 | 4 | set(COMMONS ${CMAKE_CURRENT_SOURCE_DIR}/src/x64id.c ${CMAKE_CURRENT_SOURCE_DIR}/src/utils/function_length.c ${CMAKE_CURRENT_SOURCE_DIR}/src/vector.c ${CMAKE_CURRENT_SOURCE_DIR}/src/queue.c) 5 | 6 | set(TEST_SOURCES test/main_test.c) 7 | 8 | add_executable(x64id_test ${COMMONS} ${TEST_SOURCES}) 9 | 10 | -------------------------------------------------------------------------------- /test/asm_sources.tar.xz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/DispatchCode/x64-Instruction-Decoder/44900ff87246e608d994377393cc990ff5cf07d9/test/asm_sources.tar.xz -------------------------------------------------------------------------------- /test/main_test.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include "../src/x64id.h" 4 | 5 | /* 6 | * Please, take a look at README before edit this file or tests (.asm) files 7 | */ 8 | 9 | #define BUFFER_SIZE 16 10 | 11 | void instruction_info(struct instruction instr) 12 | { 13 | #ifdef _ENABLE_RAW_BYTES 14 | printf("RAW bytes (hex): "); 15 | for(int i=0; i "); 94 | printf("\n\nArchitecture must be 1 (x86) or 2 (x64)."); 95 | exit(-1); 96 | } 97 | 98 | FILE *hfile = fopen(argv[1], "rb"); 99 | int arch = atoi(argv[2]); 100 | 101 | if (hfile == NULL) { 102 | printf("ERROR: cannot open file!\n"); 103 | exit(-1); 104 | } 105 | 106 | printf("\n\tSELECTED_ARCHITECTURE: %d\n", arch); 107 | 108 | struct instruction instr = {0}; 109 | printf("Structure size: %d-bytes\n", sizeof(struct instruction)); 110 | int reads; 111 | int offset = 0; 112 | 113 | uint8_t buf[BUFFER_SIZE]; 114 | 115 | int instr_count = 0; 116 | while(!feof(hfile)) { 117 | reads = fread(buf, sizeof(char), BUFFER_SIZE, hfile); 118 | int cc_bytes = count_bytes(buf); 119 | if(cc_bytes == 0) 120 | continue; // skip, it's just an invalid/not encoded OP 121 | 122 | printf("\nBytes from file: "); 123 | for(int i=0; i