├── .gitignore
├── CMakeLists.txt
├── LICENSE
├── README.md
├── src
├── main.c
├── queue.c
├── queue.h
├── utils
│ ├── function_length.c
│ ├── function_length.h
│ ├── test_functions.c
│ └── test_functions.h
├── vector.c
├── vector.h
├── x64id.c
└── x64id.h
└── test
├── CMakeLists.txt
├── asm_sources.tar.xz
├── main_test.c
├── x64_op1.bin
├── x64_op2.bin
├── x64_op38.bin
├── x64_op3a.bin
├── x86_op1.bin
├── x86_op2.bin
├── x86_op2_vex.bin
├── x86_op38.bin
└── x86_op3a.bin
/.gitignore:
--------------------------------------------------------------------------------
1 | cmake-build-debug/
2 | cmake-build-release/
3 | cmake-build-release-visual-studio
4 | .idea/
5 | bin/
6 | Makefile
7 | cmake_install.cmake
8 | CMakeFiles/
9 | CMakeCache.txt
10 | *.obj
11 | *.exe
12 | *.json
13 | .fleet/
14 | .vs/
15 | out/
16 | .cache/
17 | x64id
18 | x64id_test
--------------------------------------------------------------------------------
/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.16)
2 | project(x64id C)
3 |
4 | set(CMAKE_C_STANDARD 11)
5 | set(CMAKE_C_FLAGS "-O2")
6 |
7 | set(COMMONS src/x64id.c src/utils/function_length.c src/vector.c src/queue.c src/utils/test_functions.c)
8 | set(SOURCES src/main.c)
9 |
10 | add_executable(x64id ${COMMONS} ${SOURCES})
11 |
12 | include(test/CMakeLists.txt)
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | Apache License
2 | Version 2.0, January 2004
3 | http://www.apache.org/licenses/
4 |
5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
6 |
7 | 1. Definitions.
8 |
9 | "License" shall mean the terms and conditions for use, reproduction,
10 | and distribution as defined by Sections 1 through 9 of this document.
11 |
12 | "Licensor" shall mean the copyright owner or entity authorized by
13 | the copyright owner that is granting the License.
14 |
15 | "Legal Entity" shall mean the union of the acting entity and all
16 | other entities that control, are controlled by, or are under common
17 | control with that entity. For the purposes of this definition,
18 | "control" means (i) the power, direct or indirect, to cause the
19 | direction or management of such entity, whether by contract or
20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the
21 | outstanding shares, or (iii) beneficial ownership of such entity.
22 |
23 | "You" (or "Your") shall mean an individual or Legal Entity
24 | exercising permissions granted by this License.
25 |
26 | "Source" form shall mean the preferred form for making modifications,
27 | including but not limited to software source code, documentation
28 | source, and configuration files.
29 |
30 | "Object" form shall mean any form resulting from mechanical
31 | transformation or translation of a Source form, including but
32 | not limited to compiled object code, generated documentation,
33 | and conversions to other media types.
34 |
35 | "Work" shall mean the work of authorship, whether in Source or
36 | Object form, made available under the License, as indicated by a
37 | copyright notice that is included in or attached to the work
38 | (an example is provided in the Appendix below).
39 |
40 | "Derivative Works" shall mean any work, whether in Source or Object
41 | form, that is based on (or derived from) the Work and for which the
42 | editorial revisions, annotations, elaborations, or other modifications
43 | represent, as a whole, an original work of authorship. For the purposes
44 | of this License, Derivative Works shall not include works that remain
45 | separable from, or merely link (or bind by name) to the interfaces of,
46 | the Work and Derivative Works thereof.
47 |
48 | "Contribution" shall mean any work of authorship, including
49 | the original version of the Work and any modifications or additions
50 | to that Work or Derivative Works thereof, that is intentionally
51 | submitted to Licensor for inclusion in the Work by the copyright owner
52 | or by an individual or Legal Entity authorized to submit on behalf of
53 | the copyright owner. For the purposes of this definition, "submitted"
54 | means any form of electronic, verbal, or written communication sent
55 | to the Licensor or its representatives, including but not limited to
56 | communication on electronic mailing lists, source code control systems,
57 | and issue tracking systems that are managed by, or on behalf of, the
58 | Licensor for the purpose of discussing and improving the Work, but
59 | excluding communication that is conspicuously marked or otherwise
60 | designated in writing by the copyright owner as "Not a Contribution."
61 |
62 | "Contributor" shall mean Licensor and any individual or Legal Entity
63 | on behalf of whom a Contribution has been received by Licensor and
64 | subsequently incorporated within the Work.
65 |
66 | 2. Grant of Copyright License. Subject to the terms and conditions of
67 | this License, each Contributor hereby grants to You a perpetual,
68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
69 | copyright license to reproduce, prepare Derivative Works of,
70 | publicly display, publicly perform, sublicense, and distribute the
71 | Work and such Derivative Works in Source or Object form.
72 |
73 | 3. Grant of Patent License. Subject to the terms and conditions of
74 | this License, each Contributor hereby grants to You a perpetual,
75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable
76 | (except as stated in this section) patent license to make, have made,
77 | use, offer to sell, sell, import, and otherwise transfer the Work,
78 | where such license applies only to those patent claims licensable
79 | by such Contributor that are necessarily infringed by their
80 | Contribution(s) alone or by combination of their Contribution(s)
81 | with the Work to which such Contribution(s) was submitted. If You
82 | institute patent litigation against any entity (including a
83 | cross-claim or counterclaim in a lawsuit) alleging that the Work
84 | or a Contribution incorporated within the Work constitutes direct
85 | or contributory patent infringement, then any patent licenses
86 | granted to You under this License for that Work shall terminate
87 | as of the date such litigation is filed.
88 |
89 | 4. Redistribution. You may reproduce and distribute copies of the
90 | Work or Derivative Works thereof in any medium, with or without
91 | modifications, and in Source or Object form, provided that You
92 | meet the following conditions:
93 |
94 | (a) You must give any other recipients of the Work or
95 | Derivative Works a copy of this License; and
96 |
97 | (b) You must cause any modified files to carry prominent notices
98 | stating that You changed the files; and
99 |
100 | (c) You must retain, in the Source form of any Derivative Works
101 | that You distribute, all copyright, patent, trademark, and
102 | attribution notices from the Source form of the Work,
103 | excluding those notices that do not pertain to any part of
104 | the Derivative Works; and
105 |
106 | (d) If the Work includes a "NOTICE" text file as part of its
107 | distribution, then any Derivative Works that You distribute must
108 | include a readable copy of the attribution notices contained
109 | within such NOTICE file, excluding those notices that do not
110 | pertain to any part of the Derivative Works, in at least one
111 | of the following places: within a NOTICE text file distributed
112 | as part of the Derivative Works; within the Source form or
113 | documentation, if provided along with the Derivative Works; or,
114 | within a display generated by the Derivative Works, if and
115 | wherever such third-party notices normally appear. The contents
116 | of the NOTICE file are for informational purposes only and
117 | do not modify the License. You may add Your own attribution
118 | notices within Derivative Works that You distribute, alongside
119 | or as an addendum to the NOTICE text from the Work, provided
120 | that such additional attribution notices cannot be construed
121 | as modifying the License.
122 |
123 | You may add Your own copyright statement to Your modifications and
124 | may provide additional or different license terms and conditions
125 | for use, reproduction, or distribution of Your modifications, or
126 | for any such Derivative Works as a whole, provided Your use,
127 | reproduction, and distribution of the Work otherwise complies with
128 | the conditions stated in this License.
129 |
130 | 5. Submission of Contributions. Unless You explicitly state otherwise,
131 | any Contribution intentionally submitted for inclusion in the Work
132 | by You to the Licensor shall be under the terms and conditions of
133 | this License, without any additional terms or conditions.
134 | Notwithstanding the above, nothing herein shall supersede or modify
135 | the terms of any separate license agreement you may have executed
136 | with Licensor regarding such Contributions.
137 |
138 | 6. Trademarks. This License does not grant permission to use the trade
139 | names, trademarks, service marks, or product names of the Licensor,
140 | except as required for reasonable and customary use in describing the
141 | origin of the Work and reproducing the content of the NOTICE file.
142 |
143 | 7. Disclaimer of Warranty. Unless required by applicable law or
144 | agreed to in writing, Licensor provides the Work (and each
145 | Contributor provides its Contributions) on an "AS IS" BASIS,
146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
147 | implied, including, without limitation, any warranties or conditions
148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
149 | PARTICULAR PURPOSE. You are solely responsible for determining the
150 | appropriateness of using or redistributing the Work and assume any
151 | risks associated with Your exercise of permissions under this License.
152 |
153 | 8. Limitation of Liability. In no event and under no legal theory,
154 | whether in tort (including negligence), contract, or otherwise,
155 | unless required by applicable law (such as deliberate and grossly
156 | negligent acts) or agreed to in writing, shall any Contributor be
157 | liable to You for damages, including any direct, indirect, special,
158 | incidental, or consequential damages of any character arising as a
159 | result of this License or out of the use or inability to use the
160 | Work (including but not limited to damages for loss of goodwill,
161 | work stoppage, computer failure or malfunction, or any and all
162 | other commercial damages or losses), even if such Contributor
163 | has been advised of the possibility of such damages.
164 |
165 | 9. Accepting Warranty or Additional Liability. While redistributing
166 | the Work or Derivative Works thereof, You may choose to offer,
167 | and charge a fee for, acceptance of support, warranty, indemnity,
168 | or other liability obligations and/or rights consistent with this
169 | License. However, in accepting such obligations, You may act only
170 | on Your own behalf and on Your sole responsibility, not on behalf
171 | of any other Contributor, and only if You agree to indemnify,
172 | defend, and hold each Contributor harmless for any liability
173 | incurred by, or claims asserted against, such Contributor by reason
174 | of your accepting any such warranty or additional liability.
175 |
176 | END OF TERMS AND CONDITIONS
177 |
178 | APPENDIX: How to apply the Apache License to your work.
179 |
180 | To apply the Apache License to your work, attach the following
181 | boilerplate notice, with the fields enclosed by brackets "[]"
182 | replaced with your own identifying information. (Don't include
183 | the brackets!) The text should be enclosed in the appropriate
184 | comment syntax for the file format. We also recommend that a
185 | file or class name and description of purpose be included on the
186 | same "printed page" as the copyright notice for easier
187 | identification within third-party archives.
188 |
189 | Copyright [2020] [Marco 'DispatchCode' Crivellari]
190 |
191 | Licensed under the Apache License, Version 2.0 (the "License");
192 | you may not use this file except in compliance with the License.
193 | You may obtain a copy of the License at
194 |
195 | http://www.apache.org/licenses/LICENSE-2.0
196 |
197 | Unless required by applicable law or agreed to in writing, software
198 | distributed under the License is distributed on an "AS IS" BASIS,
199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
200 | See the License for the specific language governing permissions and
201 | limitations under the License.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | [](https://opensource.org/licenses/Apache-2.0)
2 |
3 | # x64ID ~ x64 Instruction Decoder
4 |
5 | A x86/x64 machine code decoder. It is useful to get instructions' length and identify each of its fields.
6 |
7 | Here some scenarios where x64ID can be used:
8 |
9 | - write your disassembler from scratch
10 | - as a base for a VM protection [[1]](#user-content-res1).
11 | - reverse engineering scenarios
12 | - swapping instructions with others (eg. substitution like `MOV EAX, 0` with `XOR EAX, EAX`)
13 | - get mnemonic rapresentation (*currently not implemented*)
14 | - others (as ideas will come to mind...)
15 |
16 | ___
17 |
18 | - [x64ID ~ x64 Instruction Decoder](#x64id--machine-code-analyzer)
19 | * [Supported architectures and features](#supported-architectures-and-features)
20 | + [Features on development](#features-on-development)
21 | * [API](#api)
22 | + [Instruction struct](#instruction-struct)
23 | - [`REX` union](#rex-union)
24 | - [`ModRm` union](#modrm-union)
25 | - [`SIB` union](#sib-union)
26 | - [`vex_info` struct](#vex_info-struct)
27 | * [Examples](#examples)
28 | - [A practical example: sum of two vectors using SIMD instruction](#a-practical-example-sum-of-two-vectors-using-simd-instruction)
29 | - [Another example: architecture x64, VEX prefix with YMM register](#another-example-architecture-x64-vex-prefix-with-ymm-register)
30 | * [Enabling / Disabling features](#enabling--disabling-features)
31 | * [Function Length detection](#find-function-length) 🌟*New❗*🌟
32 | * [Tests](#tests)
33 | * [Useful resources](#useful-resources)
34 | * [Notes](#notes)
35 |
36 | ## Supported architectures and features
37 |
38 | **Architectures**:
39 |
40 | ✅ x86
41 | ✅ x64
42 |
43 | **Opcodes**:
44 |
45 | ✅ 1-byte OPs
46 | ✅ 2-byte OPs
47 | ✅ 3-byte OPs, 0x38 and 0x3A
48 |
49 | **Fields**:
50 |
51 | ✅ prefixes
52 | ✅ VEX prefix (0xC4, 0xC5)
53 | ✅ ModRm
54 | ✅ REX prefix
55 | ✅ SIB
56 | ✅ Imm
57 | ✅ Disp
58 | ❌ XOP prefix
59 |
60 | **Instruction Set**:
61 |
62 | ✅ x86 & x64
63 | ✅ SIMD extension
64 | ✅ AVX extension
65 | ❌ AVX-512 (EVEX prefix)
66 | ❌ 3DNow!
67 |
68 | ### Features on development
69 |
70 | 🎯 XOP support
71 | 🎯 AVX-512 (EVEX prefix)
72 | 🎯 Machine code to assembly mnemonics
73 | 🎯 Others (as ideas will come to mind...)
74 |
75 | ## API
76 |
77 | x64ID exposes only one function and some structs to complete its goal:
78 |
79 | ```C
80 | int x64id_decode(struct instruction *instr, enum supported_architecture arch, char *data_src, int offset);
81 | ```
82 |
83 | | Parameter | Type | Explanation | Required |
84 | |----------------|:--------:|-------------|:--------:|
85 | | `instr` | `struct instruction` | A reference to the struct that will contain the analysis result. See [below](#instruction-struct) for more informations. | YES |
86 | | `arch` | `enum supported_architecture` | The achitecture type. Use `1` for `x86` and `2` for `x64` | YES |
87 | | `data_source` | `char*` | A data buffer with the data to be analyzed | YES |
88 | | `offset` | `int` | An offset to be added to the starting address of `data_buffer` | NO |
89 |
90 | **Return**:
91 |
92 | `x64id_decode` returns the length of the decoded instruction. Its value can also be accessed from `instr.length`.
93 |
94 | > :information_source: **Notes**
95 | > Internally, x64ID does not use dynamic allocation to avoid overhead.
96 |
97 | ___
98 |
99 | ### Instruction struct
100 |
101 | Here below how you can use the struct. More infos and the other structs can be found in the [header file](https://github.com/DispatchCode/Machine-Code-Analyzer/blob/master/src/x64id.h#L355).
102 |
103 | | Field Name | Type | Description |
104 | |-------------------|:-----------------:|-------------|
105 | | `prefixes` | `uint8_t[4]` | Store the prefixes of the instrucion, like Segment Override, Address Size and 2 and 3-byte escape opcodes (0x0f, 0x38, 0x3A) |
106 | | `rex` | `union` | Union of five fields: `value`, `rex_b`, `rex_r`, `rex_x`, `rex_w` (see [below](#rex-union)). |
107 | | `op` | `uint8_t` | The opcode of the instruction |
108 | | `modrm` | `union` | Union of four fields: `value`, `rm`, `reg` and `mod` (see [below](#modrm-union)). |
109 | | `disp` | `uint64_t` | Displacement field |
110 | | `imm` | `uint64_t` | Immediate value (a number) |
111 | | `label` | `uint32_t` | Address of Jcc/JMP, if present |
112 | | `_vex` | `struct vex_info` | Available only if [`_ENABLED_VEX_INFO`](#enabling--disabling-features) is defined. Described [below](#vex_info-struct). |
113 | | `instr` | `uint8_t[15]` | Available only if [`_ENABLE_RAW_BYTES`](#enabling--disabling-features) is defined. |
114 | | `sib` | `union` | Union of four fields: `value`, `base`, `index` and `scaled` (see [below](#sib-union)). |
115 | | `vex` | `uint8_t[3]` | 0xC4 or 0xC5 followed by 1 or 2 bytes |
116 | | `length` | `int` | The instruction length (in bytes) |
117 | | `disp_len` | `int` | The displacement size (in bytes) |
118 | | `imm_len` | `int` | The imm size |
119 | | `vex_cnt` | `int8_t` | Count how many VEX prefixes are available |
120 | | `prefix_cnt` | `int8_t` | Count how many prefixes are available |
121 | | `set_prefix` | `uint16_t` | A field against which is possible to check if a determined prefix (belonging to `prefixes` enum) is present. |
122 | | `set_field` | `uint16_t` | A field against which is possible to check if a determined feature (belonging to `instruction_feature` enum) is available (e.g. FPU, SIB, DISP,...) |
123 | | `jcc_type` | `uint8_t` | The type of jump: Jcc or JMP with 1 or 2-bytes (refer to jmp_type enum)
124 |
125 | ___
126 |
127 | #### `REX` union
128 |
129 | | Field Name | Type | Description |
130 | |-------------------|:----------:|-------------|
131 | | `rex.value` | `uint8_t` | The `rex` prefix if present (x64 only) |
132 | | `rex.bits.rex_b` | `uint8_t` | `rex_b` field |
133 | | `rex.bits.rex_x` | `uint8_t` | `rex_x` field |
134 | | `rex.bits.rex_r` | `uint8_t` | `rex_r` field |
135 | | `rex.bits.rex_w` | `uint8_t` | `rex_w` field |
136 |
137 | For more information on REX prefix, refer to section *2.2.1 REX Prefixes* of the Intel Developer Manual Vol.2 [[2]](#user-content-res2).
138 |
139 | ___
140 |
141 | #### `ModRm` union
142 |
143 | | Field Name | Type | Description |
144 | |-------------------|:----------:|-------------|
145 | | `modrm.value` | `uint8_t` | The ModRm value |
146 | | `modrm.bits.rm` | `uint8_t` | The `rm` part of ModRm |
147 | | `modrm.bits.reg` | `uint8_t` | The `reg` part of ModRm |
148 | | `modrm.bits.mod` | `uint8_t` | The `mod` part of ModRm. When mod=11b source and destination are registers, otherwise one of the operands involves memory access (displacement field) |
149 |
150 | More information on ModRm field can be found at the section *2.1.3 ModR/M and SIB Bytes* of the Intel Developer Manual Vol.2 [[2]](#user-content-res2).
151 |
152 | ___
153 |
154 | #### `SIB` union
155 |
156 | | Field Name | Type | Description |
157 | |-------------------|:----------:|-------------|
158 | | `sib.value` | `uint8_t` | If present, is the Scaled Index Base |
159 | | `sib.bits.base` | `uint8_t` | `base` field |
160 | | `sib.bits.index` | `uint8_t` | `index` field |
161 | | `sib.bits.scaled` | `uint8_t` | `scaled` field |
162 |
163 | For more information refer to section *2.1.5 Addressing-Mode Encoding of ModR/M and SIB Bytes* of the Intel Developer Manual Vol.2 [[2]](#user-content-res2).
164 |
165 | ___
166 |
167 | #### `vex_info` struct
168 |
169 | | Field Name | Type | Description |
170 | |----------------------|:-----------------:|-------------|
171 | | `type` | `uint8_t` | `0xC4` used when 3-byte prefix is present or `0xC5` used when 2-byte prefix is present |
172 | | `vexc5b` | `struct` | |
173 | | `_vex.val5` | `uint8_t` | The byte after `0xC5` with its filds described below |
174 | | `_vex.vexc5b.vex_pp` | `uint8_t` | Equivalent to a SIMD prefix: `00`: none, `01`: 0x66, `02`: 0xF3, `03`: 0xF2 |
175 | | `_vex.vexc5b.vex_l` | `uint8_t` | 0 for 128-bit vector or 1 for 256-bit vector |
176 | | `_vex.vexc5b.vex_v` | `uint8_t` | An additional operand for the instruction |
177 | | `_vex.vexc5b.vex_r` | `uint8_t` | |
178 | | `_vex.val4` | `uint16_t` | |
179 | | `_vex.vexc4b.vex_pp` | `uint8_t` | |
180 | | `_vex.vexc4b.vex_l` | `uint8_t` | |
181 | | `_vex.vexc4b.vex_v` | `uint8_t` | |
182 | | `_vex.vexc4b.vex_r` | `uint8_t` | |
183 | | `_vex.vexc4b.vex_m` | `uint8_t` | Values: 00001: implied 0F leading opcode byte, 00010: implied 0F 38 leading opcode bytes, 00011: implied 0F 3A leading opcode bytes. Other values will #UD. |
184 | | `_vex.vexc4b.vex_b` | `uint8_t` | |
185 | | `_vex.vexc4b.vex_x` | `uint8_t` | |
186 | | `_vex.vexc4b.vex_r` | `uint8_t` | |
187 |
188 | For all the details about VEX prefix look at section **2.3.5 The VEX Prefix** of the Intel Developer Manual Vol.2 [[2]](#user-content-res2).
189 | ___
190 |
191 | ## Examples
192 |
193 | #### A practical example: sum of two vectors using SIMD instruction
194 |
195 | Lets have a pratical example, the sum of two vectors (using inline assembly):
196 |
197 | ```C
198 | // ... omitted code ...
199 | int vect1[LEN] = {1,2,3,4,5,6,7,8,9,10,11,12};
200 | int vect2[LEN] = {1,2,3,4,5,6,7,8,9,10,11,12};
201 | int res_vect1[LEN];
202 |
203 | __asm
204 | {
205 | lea eax, vect1
206 | lea ebx, vect2
207 | xor ecx, ecx
208 |
209 | _while:
210 | cmp ecx, LEN * 4
211 | jge _end
212 |
213 | movups xmm0, [eax + ecx]
214 | movups xmm1, [ebx + ecx]
215 | addps xmm0, xmm1
216 | movups [res_vect1 + ecx], xmm0
217 | add ecx, 4
218 | jmp _while
219 |
220 | _end:
221 | }
222 | // ... omitted code ...
223 | ```
224 |
225 | Compiling through MS Compiler (with `/Ot` flag), the result will be what follows:
226 |
227 | ```Assembly
228 | CPU Disasm
229 | Address Hex dump Command Comments
230 | 008910BC |. C785 68FFFFFF 00000000 MOV DWORD PTR SS:[LOCAL.38],0
231 | 008910C6 |. 8D45 CC LEA EAX,[LOCAL.13]
232 | 008910C9 |. 8D5D 9C LEA EBX,[LOCAL.25]
233 | 008910CC |. 33C9 XOR ECX,ECX
234 | 008910CE |> 83F9 30 /CMP ECX,30
235 | 008910D1 |. 7D 18 |JGE SHORT 008910EB
236 | 008910D3 |. 0F100408 |MOVUPS XMM0,DQWORD PTR DS:[ECX+EAX]
237 | 008910D7 |. 0F100C0B |MOVUPS XMM1,DQWORD PTR DS:[ECX+EBX]
238 | 008910DB |. 0F58C1 |ADDPS XMM0,XMM1
239 | 008910DE |. 0F11840D 6CFFFFFF |MOVUPS DQWORD PTR SS:[ECX+EBP-94],XMM0
240 | 008910E6 |. 83C1 04 |ADD ECX,4
241 | 008910E9 |.^ EB E3 \JMP SHORT 008910CE
242 |
243 | ```
244 |
245 | We can write a sample code that uses x64ID to read and print the instructions.
246 |
247 | ```C
248 | int offset = 0x4bc;
249 | int parse_bytes = 0x2a;
250 | int byte_reads = 0;
251 |
252 | while(byte_reads <= parse_bytes) {
253 | struct instruction instr;
254 | x64id_decode(&instr, arch, (char*)data_buffer, offset);
255 |
256 | for(int i=0; i :information_source: **Notes**
386 | > Jump Table are not handled; be careful when you use switch case and compiling with MSVC (GCC/MinGw seems use other techniques).
387 | > Handling jmp table require heuristics (eg. as IDA do and other tools) and more info on the target.
388 | ## Tests
389 |
390 | After googling for a better solution, I came back with one of the first things I was thinking: assembly.
391 |
392 | Tests have been written using NASM and must be compiled using the "bin" flag:
393 |
394 | ```sh
395 | nasm -f bin
396 | ```
397 |
398 | The tested instructions are the following:
399 |
400 | * x86: `1-byte OP`, `2-byte OP`, `3-byte OP` and `2-byte OP with VEX prefix`
401 | * x64: `1-byte OP`, `2-byte OP`, `3-byte OP`; some of which have `VEX prefix`
402 |
403 | Tests have been written by hand using the Intel Developer Manual book [[2]](#user-content-res2).
404 | I can't guarantee a 100% coverage, however all the opcodes have been tested.
405 |
406 | ## Useful resources
407 |
408 | - [X86-64 Instruction Encoding](https://wiki.osdev.org/X86-64_Instruction_Encoding)
409 | - [x86_64 Instruction Table](https://c9x.me/x86/)
410 | - [Compiler Explorer](https://godbolt.org/)
411 |
412 | ## Notes
413 |
414 | [1] By VM protection is meant a code obscator that converts x86/x64 machine code into "virtual opcodes" that are understandable by a VM. Two commercial examples can be [VMProtect](https://vmpsoft.com/) and [CodeVirtualizer](https://www.oreans.com/codevirtualizer.php)
415 |
416 | [2] [Intel Developer Manual (2nd book)](https://software.intel.com/content/dam/develop/public/us/en/documents/334569-sdm-vol-2d.pdf)
417 |
418 | ___
419 |
420 | _Crafted with ❤ by DispatchCode. Documentation created along with [Alexander Cerutti](https://github.com/alexandercerutti)_
421 |
--------------------------------------------------------------------------------
/src/main.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 |
4 | #include "utils/function_length.h"
5 | #include "utils/test_functions.h"
6 |
7 | void instruction_info(struct instruction instr)
8 | {
9 | #ifdef _ENABLE_RAW_BYTES
10 | printf("RAW bytes (hex): ");
11 | for(int i=0; ilength, func_info->pVisited->tos);
126 | printf("\nAddresses of instructions that has been decoded:\n");
127 |
128 | for(int i=0; ipVisited->tos; i++) {
129 | if(i % 8 == 0)
130 | printf("\n");
131 | printf("%X, ",func_info->pVisited->vect[i]);
132 |
133 | }
134 | printf("\n");
135 |
136 | int offset = 0;
137 | uint32_t start = func_info->pVisited->vect[0];
138 | uint32_t end = func_info->pVisited->vect[func_info->pVisited->tos-1];
139 |
140 | printf("\nStart disassembly the addresses range: [0x%X, 0x%X]\n\n", start, end);
141 |
142 | while(offset <= func_info->length) {
143 | struct instruction instr;
144 | x64id_decode(&instr, arch, (char*)example4, offset);
145 | printf("Instr. VA: 0x%X\n",(uint32_t)((uint32_t)example4+offset));
146 | instruction_info(instr);
147 |
148 | offset += instr.length;
149 | }
150 |
151 | vector_free(func_info->pVisited);
152 | free(func_info);
153 | }
154 |
155 | int main(int argc, char *argv[])
156 | {
157 | if(argc == 2)
158 | {
159 | in_memory(atoi(argv[1]));
160 | }
161 | else if(argc == 3)
162 | {
163 | binary_file(argv[1], atoi(argv[2]));
164 | }
165 | else
166 | {
167 | printf("How to launch the application?\n");
168 | printf("1. binary file, 2 parameters required\n");
169 | printf("\tmain \n");
170 | printf("2. in-memory analysis, 1 parameter required\n");
171 | printf("\tmain \n");
172 | printf("\n\nArchitecture must be 1 (x86) or 2 (x64).");
173 | exit(-1);
174 | }
175 | }
--------------------------------------------------------------------------------
/src/queue.c:
--------------------------------------------------------------------------------
1 | /*
2 | * Queue (FIFO)
3 | *
4 | */
5 |
6 | #include "queue.h"
7 |
8 | queue* queue_init()
9 | {
10 | queue *q = calloc(1, sizeof(queue));
11 | q->queue = calloc(QUEUE_INIT, sizeof(uint64_t));
12 | q->size = QUEUE_INIT;
13 | q->tos = 0;
14 |
15 | return q;
16 | }
17 |
18 | void queue_enqueue(queue *q, uint64_t value)
19 | {
20 | if(q->tos == q->size)
21 | {
22 | q->size <<= 1;
23 | q->queue = realloc(q->queue, sizeof(uint64_t)*q->size);
24 | }
25 |
26 | q->queue[q->tos++] = value;
27 | }
28 |
29 | uint64_t queue_dequeue(queue *q)
30 | {
31 | uint64_t value = q->queue[0];
32 | q->tos--;
33 | memcpy(q->queue, q->queue+1, sizeof(uint64_t)*q->tos);
34 | return value;
35 | }
36 |
37 | int queue_empty(queue *q)
38 | {
39 | return q->tos == 0;
40 | }
41 |
42 | int queue_size(queue *q) {
43 | return q->tos;
44 | }
45 |
46 | int queue_find(queue *q, uint64_t value)
47 | {
48 | for(int i=0; itos; i++)
49 | if(q->queue[i] == value)
50 | return 1;
51 | return 0;
52 | }
53 |
54 | void queue_free(queue *q)
55 | {
56 | if(q)
57 | {
58 | if (q->queue)
59 | free(q->queue);
60 | free(q);
61 | }
62 | }
--------------------------------------------------------------------------------
/src/queue.h:
--------------------------------------------------------------------------------
1 | #ifndef x64ID_QUEUE_H
2 | #define x64ID_QUEUE_H
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | #define QUEUE_INIT 20
9 |
10 | typedef struct {
11 | uint64_t*queue;
12 | int size;
13 | int tos;
14 | } queue;
15 |
16 | queue* queue_init();
17 | void queue_enqueue(queue *q, uint64_t value);
18 | uint64_t queue_dequeue(queue *q);
19 | void queue_free(queue *q);
20 | int queue_empty(queue *q);
21 | int queue_find(queue *q, uint64_t value);
22 | int queue_size(queue *q);
23 |
24 | #endif //x64ID_QUEUE_H
25 |
--------------------------------------------------------------------------------
/src/utils/function_length.c:
--------------------------------------------------------------------------------
1 | #include "function_length.h"
2 |
3 |
4 | vector* instrFlowLength(char *pMemory, const enum supported_architecture arch)
5 | {
6 | int bytes_len = 0;
7 | uint64_t addr = (uint64_t)pMemory;
8 |
9 | queue *future_paths = queue_init();
10 | vector *visited = vector_init();
11 |
12 | char *tmp_addr = pMemory;
13 | while(true)
14 | {
15 | struct instruction instr;
16 | x64id_decode(&instr, arch, tmp_addr, 0);
17 | /*
18 | for (int i = 0; i < instr.length; i++) {
19 | printf("%X ", instr.instr[i]);
20 | }
21 | printf("\n");
22 | */
23 | if(instr.op == 0xc3 || instr.op == 0xCC) // RET or INT3
24 | {
25 | vector_push_back(visited, addr);
26 | if(queue_empty(future_paths))
27 | {
28 | queue_free(future_paths);
29 | return visited;
30 | }
31 |
32 | uint64_t next_addr = queue_dequeue(future_paths);
33 | tmp_addr = (char *)next_addr;
34 | addr = next_addr;
35 | continue;
36 | }
37 |
38 | if(vector_find(visited, addr))
39 | {
40 | if(queue_empty(future_paths))
41 | {
42 | queue_free(future_paths);
43 | return visited;
44 | }
45 |
46 | uint64_t next_addr = queue_dequeue(future_paths);
47 | tmp_addr = (char *)next_addr;
48 | addr = next_addr;
49 | continue;
50 | }
51 |
52 | vector_push_back(visited, addr);
53 | bytes_len += instr.length;
54 | addr += instr.length;
55 | tmp_addr += instr.length;
56 |
57 | if (instr.jcc_type == JCC_FAR || instr.jcc_type == JCC_SHORT)
58 | {
59 | if (!queue_find(future_paths, instr.label))
60 | {
61 | queue_enqueue(future_paths, instr.label);
62 | }
63 | }
64 |
65 | if (instr.jcc_type == JMP_FAR || instr.jcc_type == JMP_SHORT)
66 | {
67 | addr = instr.label;
68 | tmp_addr = (char *)addr;
69 | }
70 | }
71 | }
72 |
73 | int compare(const void * n1, const void * n2)
74 | {
75 | return *(uint64_t*)n1 - *(uint64_t*)n2;
76 | }
77 |
78 | pFunctionInfo getFunctionLength(char *buffer, enum supported_architecture arch)
79 | {
80 | pFunctionInfo f_info = calloc(1,sizeof(functionInfo));
81 | vector *visited = instrFlowLength(buffer, arch);
82 |
83 | qsort(visited->vect, visited->tos, sizeof(uint64_t), compare);
84 |
85 | uint64_t min = visited->vect[0];
86 | uint64_t max = visited->vect[visited->tos-1];
87 |
88 | f_info->pVisited = visited;
89 | f_info->length = (int)(max-min);
90 |
91 | return f_info;
92 | }
--------------------------------------------------------------------------------
/src/utils/function_length.h:
--------------------------------------------------------------------------------
1 | #ifndef x64ID_FUNCTION_LENGTH_H
2 | #define x64ID_FUNCTION_LENGTH_H
3 |
4 | #include
5 |
6 | #include "../vector.h"
7 | #include "../queue.h"
8 | #include "../x64id.h"
9 |
10 | typedef struct {
11 | vector *pVisited;
12 | int length;
13 | } functionInfo,*pFunctionInfo;
14 |
15 | pFunctionInfo getFunctionLength(char *buffer, enum supported_architecture arch);
16 |
17 | #endif //x64ID_FUNCTION_LENGTH_H
18 |
--------------------------------------------------------------------------------
/src/utils/test_functions.c:
--------------------------------------------------------------------------------
1 | //
2 | // TEST FUNCTIONS, just random code
3 | //
4 |
5 | #include
6 | #include
7 | #include
8 |
9 | void bubble_sort(int *array) {
10 | printf("Bubble sort");
11 | for(int i=0; i<10; i++) {
12 | for(int j=i+1; j<10; j++) {
13 | if(array[i] > array[j]) {
14 | int tmp = array[i];
15 | array[i] = array[j];
16 | array[j] = tmp;
17 | }
18 | }
19 | }
20 | }
21 |
22 | int example(int n) {
23 | printf("example");
24 | int c = 0;
25 | for(int i=0; i<10; i++) {
26 | if(i*2 == n)
27 | return 1;
28 | c++;
29 | }
30 | return -1;
31 | }
32 |
33 | int example1(int n) {
34 | printf("example 1");
35 | int c = 0;
36 | for(int i=1; i<10; i++) {
37 | for(int j=i; jvect = calloc(VECTOR_INIT, sizeof(uint64_t));
12 | v->size = VECTOR_INIT;
13 | v->tos = 0;
14 |
15 | return v;
16 | }
17 |
18 | void vector_push_back(vector *v, uint64_t value)
19 | {
20 | if(v->tos == v->size)
21 | {
22 | v->size <<= 1;
23 | v->vect = realloc(v->vect, sizeof(uint64_t)*v->size);
24 | }
25 | v->vect[v->tos++] = value;
26 | }
27 |
28 | int vector_find(vector *v, uint64_t value)
29 | {
30 | for(int i=0; i < v->tos; i++)
31 | if(v->vect[i] == value)
32 | return 1;
33 | return 0;
34 | }
35 |
36 | void vector_free(vector *v)
37 | {
38 | if(v)
39 | {
40 | if (v->vect)
41 | free(v->vect);
42 | free(v);
43 | }
44 | }
--------------------------------------------------------------------------------
/src/vector.h:
--------------------------------------------------------------------------------
1 | #ifndef x64ID_VECTOR_H
2 | #define x64ID_VECTOR_H
3 |
4 | #include
5 | #include
6 |
7 | #define VECTOR_INIT 20
8 |
9 | typedef struct {
10 | uint64_t*vect;
11 | int size;
12 | int tos;
13 | } vector;
14 |
15 | vector* vector_init();
16 | void vector_push_back(vector *v, uint64_t value);
17 | void vector_free(vector *v);
18 | int vector_find(vector *v, uint64_t value);
19 |
20 | #endif //x64ID_VECTOR_H
21 |
--------------------------------------------------------------------------------
/src/x64id.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include "x64id.h"
3 |
4 | static size_t *imm_table[4] = {0, imm_byte_2b,imm_byte_3b_38,imm_byte_3b_3A };
5 | static size_t *modrm_table[4] = {0, modrm_2b,modreg_3b_38,modreg_3b_3A };
6 |
7 | static inline void x64id_vex_decode(struct instruction *instr, enum supported_architecture arch, const char *data, uint8_t vex_size) {
8 | memcpy(instr->vex, (data+instr->length), vex_size);
9 | instr->vex_cnt += vex_size;
10 | instr->length += vex_size;
11 |
12 | instr->op = *(data + instr->length);
13 | instr->length++;
14 |
15 | instr->set_prefix |= VEX;
16 |
17 | if(instr->vex[0] == 0xC5) {
18 | #ifdef _ENABLE_VEX_INFO
19 | instr->_vex.type = instr->vex[0];
20 | instr->_vex.val5 = instr->vex[1];
21 | #endif
22 |
23 | x64id_decode_modrm(instr, arch, data, modrm_2b, imm_byte_2b, NULL);
24 | }
25 | else if(instr->vex[0] == 0xC4) {
26 |
27 | #ifdef _ENABLE_VEX_INFO
28 | instr->_vex.type = instr->vex[0];
29 | memcpy(&instr->_vex.val4, &instr->vex[1],2);
30 | #endif
31 |
32 | int8_t index = instr->vex[1] & 0x3;
33 | x64id_decode_modrm(instr, arch, data, modrm_table[index], imm_table[index], NULL);
34 | }
35 | // TODO XOP, 0x8F
36 |
37 | }
38 |
39 | static inline int x64id_vex_size(struct instruction *instr, enum supported_architecture arch, const char *data) {
40 | uint8_t curr_byte = (uint8_t) *(data + instr->length);
41 | uint8_t next_byte = (uint8_t) *(data + instr->length + 1);
42 |
43 | // 3-byte VEX prefix
44 | if ((arch == X86 && curr_byte == 0xC4 && (next_byte >> 6) == 3) || (arch == X64 && curr_byte == 0xC4))
45 | return 3;
46 | // 2-byte VEX prefix
47 | else if ((arch == X86 && curr_byte == 0xC5 && (next_byte & 0x80)) || (arch == X64 && curr_byte == 0xC5))
48 | return 2;
49 |
50 | return 0;
51 | }
52 |
53 | static inline bool x64id_check_sib(uint8_t mod, uint8_t rm) {
54 | return mod < 3 && rm == 4;
55 | }
56 |
57 | static inline int x64id_displacement_size(uint8_t mod, uint8_t rm) {
58 | if((mod == 0x02) || (rm == 0x05 && !mod))
59 | return 4;
60 | else if(mod == 0x01)
61 | return 1;
62 | return 0;
63 | }
64 |
65 | static inline int x64id_imm_size(struct instruction *instr, size_t val, enum supported_architecture arch) {
66 | switch (val) {
67 | case b:
68 | return 1;
69 | case v:
70 | if(arch == X64 && instr->set_prefix & OP64)
71 | return 8;
72 | if(instr->set_prefix & OS)
73 | return 2;
74 | return 4;
75 | case z:
76 | case z1:
77 | if(instr->set_prefix & OS)
78 | return 2;
79 | return 4;
80 | case p:
81 | if(instr->set_prefix & OS) {
82 | if (arch == X86)
83 | return 4;
84 | return 8;
85 | }
86 | return 6;
87 | case w:
88 | return 2;
89 | case wb:
90 | return 3; // TODO ENTER iw, ib
91 | case gr3b:
92 | if(!instr->modrm.bits.reg)
93 | return 1;
94 | return 0;
95 | case gr3z:
96 | if(!instr->modrm.bits.reg)
97 | {
98 | if(instr->set_prefix & OS)
99 | return 2;
100 | return 4;
101 | }
102 | return 0;
103 |
104 | default:
105 | return 0;
106 | }
107 | }
108 |
109 | static void x64id_decode_modrm(struct instruction *instr, enum supported_architecture arch, const char *start_data, const size_t *modrm_table, const size_t *imm_table, const size_t *jcc_table) {
110 | size_t val;
111 | if((val = modrm_table[instr->op])) {
112 | instr->set_field |= MODRM;
113 |
114 | if(val == X87_FPU)
115 | instr->set_field |= FPU;
116 |
117 | uint8_t curr = *(start_data + instr->length);
118 |
119 | instr->modrm.value = curr;
120 | instr->length++;
121 |
122 | uint8_t mod_val = instr->modrm.bits.mod, rm_val = instr->modrm.bits.rm;
123 |
124 | if(x64id_check_sib(instr->modrm.bits.mod,instr->modrm.bits.rm)) {
125 | instr->set_field |= SIB;
126 |
127 | instr->sib.value = (uint8_t) *(start_data + instr->length);
128 | instr->length++;
129 |
130 | if(instr->sib.bits.base == 0x05) {
131 | instr->set_field |= DISP;
132 | mod_val = instr->modrm.bits.mod;
133 | rm_val = instr->sib.bits.base;
134 | }
135 | }
136 |
137 | instr->disp_len = x64id_displacement_size(mod_val, rm_val);
138 | if(instr->disp_len || instr->set_field & DISP) {
139 | memcpy(&instr->disp, (start_data + instr->length), instr->disp_len);
140 | instr->length += instr->disp_len;
141 | instr->set_field |= DISP;
142 | }
143 | }
144 |
145 | instr->imm_len = x64id_imm_size(instr, imm_table[instr->op], arch);
146 | if(instr->imm_len) {
147 | instr->set_field |= IMM;
148 | memcpy(&instr->imm, (start_data + instr->length), instr->imm_len);
149 | instr->length += instr->imm_len;
150 | }
151 |
152 | uint16_t value = 0;
153 | if(jcc_table != NULL && ((value = jcc_table[instr->op]))) {
154 | switch(value) {
155 | case j1:
156 | instr->jcc_type = JMP_SHORT;
157 | break;
158 | case j2:
159 | instr->jcc_type = JMP_FAR;
160 | break;
161 | case jc1:
162 | instr->jcc_type = JCC_SHORT;
163 | break;
164 | case jc2:
165 | instr->jcc_type = JCC_FAR;
166 | default:
167 | break; // avoid compiler warnings
168 | }
169 |
170 | // 1-byte
171 | if(value & 0x10)
172 | instr->label = (uint64_t)start_data + ((int8_t)instr->imm) + instr->length;
173 | // 4-byte
174 | else
175 | instr->label = (uint64_t)start_data + ((int64_t)instr->imm) + instr->length;
176 | }
177 | }
178 |
179 | static int x64id_decode_2b(struct instruction *instr, enum supported_architecture arch, const char *data_src)
180 | {
181 | instr->set_prefix |= ESCAPE;
182 | uint8_t curr = *(data_src + instr->length);
183 |
184 | if(curr == 0x3A || curr == 0x38)
185 | {
186 | instr->set_prefix |= OP3B;
187 |
188 | instr->prefixes[instr->prefix_cnt++] = curr;
189 | instr->length++;
190 | instr->op = *(data_src + instr->length);
191 | instr->length++;
192 |
193 | if(curr == 0x3A)
194 | x64id_decode_modrm(instr, arch, data_src, modreg_3b_3A, imm_byte_3b_3A, NULL);
195 | else
196 | x64id_decode_modrm(instr, arch, data_src, modreg_3b_38, imm_byte_3b_38, NULL);
197 |
198 | return instr->length;
199 | }
200 |
201 | instr->op = curr;
202 | instr->length++;
203 |
204 | x64id_decode_modrm(instr, arch, data_src, modrm_2b, imm_byte_2b, op2b_labels);
205 |
206 | return instr->length;
207 | }
208 |
209 | int x64id_decode(struct instruction *instr, enum supported_architecture arch, char *data, int offset) {
210 | memset(instr, 0, sizeof(struct instruction));
211 |
212 | char *start_data = (data + offset);
213 | uint8_t curr = *start_data;
214 |
215 | while(x86_64_prefix[curr] & arch)
216 | {
217 | switch(curr) {
218 | case 0x26:
219 | instr->set_prefix |= ES;
220 | break;
221 | case 0x2E:
222 | instr->set_prefix |= CS;
223 | break;
224 | case 0x36:
225 | instr->set_prefix |= SS;
226 | break;
227 | case 0x3E:
228 | instr->set_prefix |= DS;
229 | break;
230 | case 0x48:
231 | case 0x49:
232 | if(arch == X64)
233 | instr->set_prefix |= OP64;
234 | break;
235 | case 0x64:
236 | instr->set_prefix |= FS;
237 | break;
238 | case 0x65:
239 | instr->set_prefix |= GS;
240 | break;
241 | case 0x66:
242 | instr->set_prefix |= OS;
243 | break;
244 | case 0x67:
245 | instr->set_prefix |= AS;
246 | break;
247 | }
248 |
249 | instr->set_field |= PREFIX;
250 | instr->prefixes[instr->prefix_cnt] = curr;
251 | instr->prefix_cnt++;
252 | instr->length++;
253 |
254 | // Rex prefix
255 | // TODO 64-bit mode: IF OP == 90h and REX.B == 1,
256 | // then the instruction is XCHG r8, rAX
257 | if(arch == X64 && (curr >= 0x40 && curr <= 0x4F))
258 | {
259 | instr->rex.value = curr;
260 | instr->set_field |= REX;
261 | }
262 | else if(curr == 0x0F)
263 | {
264 | x64id_decode_2b(instr, arch, start_data);
265 | #ifdef _ENABLE_RAW_BYTES
266 | memcpy(instr->instr, start_data, instr->length);
267 | #endif
268 | return instr->length;
269 | }
270 |
271 | curr = (uint8_t) *(start_data + instr->length);
272 | }
273 |
274 | size_t vex_size = x64id_vex_size(instr, arch, start_data);
275 | if(vex_size)
276 | x64id_vex_decode(instr, arch, start_data, vex_size);
277 | else
278 | {
279 | instr->length++;
280 | instr->op = curr;
281 | x64id_decode_modrm(instr, arch, start_data, modrm_1b, imm_byte_1b, op1b_labels);
282 | }
283 |
284 | #ifdef _ENABLE_RAW_BYTES
285 | memcpy(instr->instr, start_data, instr->length);
286 | #endif
287 |
288 | return instr->length;
289 | }
--------------------------------------------------------------------------------
/src/x64id.h:
--------------------------------------------------------------------------------
1 | #ifndef x64id_H
2 | #define x64id_H
3 |
4 | #include
5 | #include
6 | #include
7 |
8 | #define _ENABLE_RAW_BYTES
9 | #define _ENABLE_VEX_INFO
10 |
11 | enum supported_architecture {
12 | X86 = 1,
13 | X64 = 2
14 | };
15 |
16 | enum decode_status {
17 | x64id_ERROR = 0,
18 | };
19 |
20 | #define ALL (X86 | X64)
21 |
22 | //
23 | // instruction prefix look-up table
24 | static size_t x86_64_prefix[256] = {
25 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
26 | /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, ALL,
27 | /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
28 | /* 20 */ 0, 0, 0, 0, 0, 0, ALL,0, 0, 0, 0, 0, 0, 0, ALL,0,
29 | /* 30 */ 0, 0, 0, 0, 0, 0, ALL,0, 0, 0, 0, 0, 0, 0, ALL,0,
30 | /* 40 */ X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64,X64, // REX prefixes
31 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
32 | /* 60 */ 0, 0, 0, 0, ALL,ALL,ALL,ALL,0, 0, 0, 0, 0, 0, 0, 0,
33 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
34 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
35 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
36 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
37 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
38 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
39 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
40 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
41 | /* F0 */ ALL,0,ALL, ALL,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
42 | };
43 |
44 | //
45 | // 1-byte lookup table
46 | //
47 | #define X87_FPU 2
48 |
49 | static size_t modrm_1b[256] = {
50 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
51 | /* 00 */ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
52 | /* 10 */ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
53 | /* 20 */ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
54 | /* 30 */ 1, 1, 1, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0,
55 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
56 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
57 | /* 60 */ 0, 0, 1, 1, 0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0,
58 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
59 | /* 80 */ 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
60 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
61 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
62 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
63 | /* C0 */ 1, 1, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0,
64 | /* D0 */ 1, 1, 1, 1, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, // 2 = Coprocessor Escape
65 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
66 | /* F0 */ 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1
67 | };
68 |
69 | #define b 1 // byte
70 | #define v 2 // word, dword or qword (64bit mode), depending on OS attribute
71 | #define z 3 // word for 16bit OS or dword for 32/64-bit OS
72 | #define p 4 // 32-bit, 48-bit, or 80-bit pointer, depending on operand-size attribute
73 | #define z1 6 // word for 16bit OS or dword for 32/64-bit OS
74 | #define w 7 // word
75 | #define wb 8 // word, byte
76 | #define gr3b 9 // byte (imm exists only if mod.reg == 0)
77 | #define gr3z 10 // word, dword depending on OS (imm exists only if mod.reg == 0)
78 |
79 | static size_t imm_byte_1b[256] = {
80 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
81 | /* 00 */ 0, 0, 0, 0, b, z, 0, 0, 0, 0, 0, 0, b, z, 0, 0,
82 | /* 10 */ 0, 0, 0, 0, b, z, 0, 0, 0, 0, 0, 0, b, z, 0, 0,
83 | /* 20 */ 0, 0, 0, 0, b, z, 0, 0, 0, 0, 0, 0, b, z, 0, 0,
84 | /* 30 */ 0, 0, 0, 0, b, z, 0, 0, 0, 0, 0, 0, b, z, 0, 0,
85 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
86 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
87 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, z, z, b, b, 0, 0, 0, 0,
88 | /* 70 */ b, b, b, b, b, b, b, b, b, b, b, b, b, b, b, b,
89 | /* 80 */ b, z, b, b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
90 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, p, 0, 0, 0, 0, 0,
91 | /* A0 */ z1,z1,z1,z1, 0, 0, 0, 0, b, z, 0, 0, 0, 0, 0, 0,
92 | /* B0 */ b, b, b, b, b, b, b, b, v, v, v, v, v, v, v, v,
93 | /* C0 */ b, b, w, 0, 0, 0, b, z, wb, 0, w, 0, 0, b, 0, 0,
94 | /* D0 */ 0, 0, 0, 0, b, b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
95 | /* E0 */ b, b, b, b, b, b, b, b, z, z, p, b, 0, 0, 0, 0,
96 | /* F0 */ 0, 0, 0, 0, 0, 0, gr3b, gr3z, 0, 0, 0, 0, 0, 0, 0, 0
97 | };
98 |
99 | /*
100 | * first byte:
101 | * - 1: 1-byte
102 | * - 2: 4-byte
103 | *
104 | * second byte (LSB):
105 | * - 1: Jcc
106 | * - 2: JMP
107 | *
108 | */
109 | #define j1 0x12
110 | #define j2 0x22
111 | #define jc1 0x11
112 | #define jc2 0x21
113 |
114 | // check if the OP is Jcc or JMP
115 | static size_t op1b_labels[256] = {
116 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
117 | /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
118 | /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
119 | /* 20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
120 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
121 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
122 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
123 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
124 | /* 70 */ jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1, jc1,
125 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
126 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
127 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
128 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
129 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
130 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
131 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, j2, 0, j1, 0, 0, 0, 0,
132 | /* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
133 | };
134 |
135 | //
136 | // 2-byte OP look-up table
137 |
138 | // 0x0f
139 | #define OE 0x01
140 | // 0x66 0x0f
141 | #define O66 0x02
142 | // 0xf2 0x0f
143 | #define OF2 0x04
144 | // 0xf3 0x0f
145 | #define OF3 0x08
146 |
147 | #define P1 (OE)
148 | #define P2 (O66 | OE)
149 | #define P4 (OF3 | OE)
150 | #define P5 (O66 | OF2)
151 | #define P6 (OE | O66 | OF3)
152 | #define P7 (OE | O66 | OF2 | OF3)
153 | #define P8 (O66 | OF2 | OF3)
154 |
155 | static size_t modrm_2b[256] = {
156 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
157 | /* 00 */ P1,P1,P1,P1,0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
158 | /* 10 */ P7,P7,P7,P2,P2,P2,P6,P2,P1,0, 0, 0, 0, 0, 0, P1,
159 | /* 20 */ P1,P1,P1,P1,0, 0, 0, 0, P2,P2,P7,P2,P7,P7,P2,P2,
160 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
161 | /* 40 */ P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,
162 | /* 50 */ P2,P7,P4,P4,P2,P2,P2,P2,P7,P7,P7,P6,P7,P7,P7,P7,
163 | /* 60 */ P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,O66,O66,P2,P2,
164 | /* 70 */ P7,P1,P1,P1,P2,P2,P2,P1,P1,P1, 0, 0,P5,P5,P6,P6,
165 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
166 | /* 90 */ P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,P1,
167 | /* A0 */ 0, 0, 0, P1,P1,P1, 0, 0, 0, 0, 0,P1,P1,P1,P1,P1,
168 | /* B0 */ P1,P1,P1,P1,P1,P1,P1,P1,OF3,P1,P1,P1,P4,P4,P1,P1,
169 | /* C0 */ P1,P1,P7,P1,P2,P2,P2,P1, 0, 0, 0, 0, 0, 0, 0, 0,
170 | /* D0 */ P5,P2,P2,P2,P2,P2,P8,P2,P2,P2,P2,P2,P2,P2,P2,P2,
171 | /* E0 */ P2,P2,P2,P2,P2,P2,P8,P2,P2,P2,P2,P2,P2,P2,P2,P2,
172 | /* F0 */ OF2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2, 0
173 | };
174 |
175 | static size_t imm_byte_2b[256] = {
176 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
177 | /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
178 | /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
179 | /* 20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
180 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
181 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
182 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
183 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
184 | /* 70 */ b, b, b, b, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
185 | /* 80 */ z, z, z, z, z, z, z, z, z, z, z, z, z, z, z, z,
186 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
187 | /* A0 */ 0, 0, 0, 0, b, 0, 0, 0, 0, 0, 0, 0, b, 0, 0, 0,
188 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, b, 0, 0, 0, 0, 0,
189 | /* C0 */ 0, 0, b, 0, b, b, b, 0, 0, 0, 0, 0, 0, 0, 0, 0,
190 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
191 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
192 | /* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
193 | };
194 |
195 | // check if the OP is Jcc or JMP
196 | static size_t op2b_labels[256] = {
197 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
198 | /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
199 | /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
200 | /* 20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
201 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
202 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
203 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
204 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
205 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
206 | /* 80 */ jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2, jc2,
207 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
208 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
209 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
210 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
211 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
212 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
213 | /* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
214 | };
215 |
216 | //
217 | // 3-byte OP look-up table
218 |
219 | #define OP3 (OE | OF2 | O66)
220 | #define OP2 (OE | OF2 | OF3)
221 | #define OP4 (O66 | OF2 | OF3)
222 |
223 | //
224 | // 3-byte OP look-up table (0x38)
225 | static size_t modreg_3b_38[256] = {
226 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
227 | /* 00 */ P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,P2,O66,O66,O66,O66,
228 | /* 10 */ O66,0,0,O66,O66,O66,O66,O66,0,0,0,0,O66,O66,O66,0,
229 | /* 20 */ O66,O66,O66,O66,O66,O66,0,0,O66,O66,O66,O66,O66,O66,O66,O66,
230 | /* 30 */ O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,
231 | /* 40 */ O66,O66, 0, 0, 0, O66,O66,O66, 0, 0, 0, 0, 0, 0, 0, 0,
232 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
233 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
234 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
235 | /* 80 */ O66,O66,O66,0, 0, 0, 0, 0, 0, 0, 0, 0,O66,0,O66,0,
236 | /* 90 */ O66,O66,O66,O66, 0, 0,O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,
237 | /* A0 */ 0, 0, 0, 0, 0, 0, O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,
238 | /* B0 */ 0, 0, 0, 0, 0, 0, O66,O66,O66,O66,O66,O66,O66,O66,O66,O66,
239 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, O66,O66,O66,O66,O66,O66,O66,O66,
240 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, O66, O66, O66, O66, O66,
241 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, O66, O66, O66, O66, O66,
242 | /* F0 */ OP3, OP3, OE, 0, 0, OP2, OP4, P7, 0, 0, 0, 0, 0, 0, 0, 0
243 | };
244 |
245 | static size_t imm_byte_3b_38[256] = {
246 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
247 | /* 00 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
248 | /* 10 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
249 | /* 20 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
250 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
251 | /* 40 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
252 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
253 | /* 60 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
254 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
255 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
256 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
257 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
258 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
259 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
260 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
261 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
262 | /* F0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
263 | };
264 |
265 | #define OP5 (OE | O66)
266 |
267 | //
268 | // 3-byte OP look-up table (0x3A)
269 |
270 | static size_t modreg_3b_3A[256] = {
271 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
272 | /* 00 */ O66, O66, O66, 0, O66, O66, O66, 0, O66, O66, O66, O66,O66 , O66, O66, OP5,
273 | /* 10 */ 0, 0, 0, 0, O66, O66, O66, O66, O66, O66, 0, 0, 0, O66, 0, 0,
274 | /* 20 */ O66, O66, O66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
275 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, O66, O66, 0, 0, 0, 0, 0, 0,
276 | /* 40 */ O66, O66, O66, 0, O66, 0, O66, 0, 0, 0, O66, O66, O66, 0, 0, 0,
277 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
278 | /* 60 */ O66, O66, O66, O66, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
279 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
280 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
281 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
282 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
283 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
284 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, O66, 0, 0, 0,
285 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, O66,
286 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
287 | /* F0 */ OF2, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
288 | };
289 |
290 | static size_t imm_byte_3b_3A[256] = {
291 | // 00 01 02 03 04 05 06 07 08 09 0A 0B 0C 0D 0E 0F
292 | /* 00 */ 1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1,
293 | /* 10 */ 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 0, 0,
294 | /* 20 */ 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
295 | /* 30 */ 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0,
296 | /* 40 */ 1, 1, 1, 0, 1, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 0,
297 | /* 50 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
298 | /* 60 */ 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
299 | /* 70 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
300 | /* 80 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
301 | /* 90 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
302 | /* A0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
303 | /* B0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
304 | /* C0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0,
305 | /* D0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
306 | /* E0 */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
307 | /* F0 */ 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
308 | };
309 |
310 | enum jmp_type {
311 | JCC_SHORT = 1, // 1-byte JCC
312 | JCC_FAR = 2, // 2-byte JCC, 4bytes imm
313 | JMP_SHORT = 4, // 1-byte JMP
314 | JMP_FAR = 8, // 4-byte JMP
315 | };
316 |
317 | enum prefixes {
318 | ES = 1, // 0x26
319 | CS = 2, // 0x2E
320 | SS = 4, // 0x36
321 | DS = 8, // 0x3E
322 | FS = 16, // 0x64
323 | GS = 32, // 0x65
324 | OS = 64, // 0x66
325 | AS = 128,// 0x67
326 | REPNE = 256,
327 | REPE = 512,
328 | OP64 = 1024,
329 | VEX = 2048
330 | };
331 |
332 | enum instruction_feature {
333 | PREFIX = 1,
334 | ESCAPE = 2, // 0x0F
335 | OP = 4,
336 | OP3B = 8,
337 | MODRM = 16,
338 | SIB = 32,
339 | REX = 64,
340 | DISP = 128,
341 | IMM = 512,
342 | FPU = 1024,
343 | };
344 |
345 | /*
346 | * VEX FORMAT
347 | *
348 | * 3-byte VEX bit 7 0 765 4 0 7 6 3 2 0
349 | * 11000100 RXB m-mmmm W vvvv L pp
350 | *
351 | * 2-byte VEX bit 7 0 7 6 3 2 0
352 | * 11000101 R vvvv L pp
353 | *
354 | * pp: equivalent to a SIMD prefix
355 | * 00 : None
356 | * 01 : 0x66
357 | * 02 : 0xF3
358 | * 03 : 0xF2
359 | *
360 | * m-mmmm:
361 | * 00000: Reserved for future use (will #UD)
362 | * 00001: implied 0F leading opcode byte
363 | * 00010: implied 0F 38 leading opcode bytes
364 | * 00011: implied 0F 3A leading opcode bytes
365 | * 00100-11111: Reserved for future use (will #UD)
366 | *
367 | */
368 |
369 | #ifdef _ENABLE_VEX_INFO
370 | struct vex_info {
371 | struct {
372 | uint8_t type;
373 | union {
374 | struct byte2 {
375 | uint8_t vex_pp: 2;
376 | uint8_t vex_l: 1;
377 | uint8_t vex_v: 4;
378 | uint8_t vex_r: 1;
379 | } vexc5b;
380 | uint8_t val5;
381 | };
382 | union {
383 | struct byte3 {
384 | uint8_t vex_m : 5;
385 | uint8_t vex_b : 1;
386 | uint8_t vex_x : 1;
387 | uint8_t vex_r : 1;
388 |
389 | uint8_t vex_pp : 2;
390 | uint8_t vex_l : 1;
391 | uint8_t vex_v : 4;
392 | uint8_t vex_w : 1;
393 | } vexc4b;
394 | uint16_t val4;
395 | };
396 | };
397 | };
398 | #endif
399 |
400 | struct instruction {
401 | uint64_t disp;
402 | uint64_t imm;
403 | uint64_t label;
404 |
405 | #ifdef _ENABLE_VEX_INFO
406 | struct vex_info _vex;
407 | #endif
408 |
409 | #ifdef _ENABLE_RAW_BYTES
410 | uint8_t instr[15];
411 | #endif
412 |
413 | uint8_t prefixes[4];
414 | uint8_t op;
415 |
416 | union
417 | {
418 | struct
419 | {
420 | uint8_t rm : 3;
421 | uint8_t reg : 3;
422 | uint8_t mod : 2;
423 | } bits;
424 | uint8_t value;
425 | } modrm;
426 |
427 | union
428 | {
429 | struct
430 | {
431 | uint8_t rex_b : 1;
432 | uint8_t rex_x : 1;
433 | uint8_t rex_r : 1;
434 | uint8_t rex_w : 1;
435 | } bits;
436 | uint8_t value;
437 | } rex;
438 |
439 | union
440 | {
441 | struct
442 | {
443 | uint8_t base : 3;
444 | uint8_t index : 3;
445 | uint8_t scaled : 2;
446 | } bits;
447 | uint8_t value;
448 | } sib;
449 |
450 | uint8_t vex[3];
451 |
452 | int length;
453 | int disp_len;
454 | int imm_len;
455 |
456 | uint16_t set_prefix; // bit mask
457 | uint16_t set_field;
458 | uint8_t jcc_type;
459 |
460 | int8_t vex_cnt;
461 | int8_t prefix_cnt;
462 | };
463 |
464 |
465 | //
466 | // Functions
467 | //
468 | int x64id_decode(struct instruction *instr, enum supported_architecture arch, char *data_src, int offset);
469 | static void x64id_decode_modrm(struct instruction *instr, enum supported_architecture arch, const char *data_src, const size_t *modrm_table, const size_t *imm_table, const size_t *jcc_table);
470 | static inline bool x64id_check_sib(uint8_t mod, uint8_t rm);
471 | static inline int x64id_displacement_size(uint8_t mod, uint8_t rm);
472 | static inline int x64id_imm_size(struct instruction *instr, size_t val, enum supported_architecture arch);
473 | static int x64id_decode_2b(struct instruction *instr, enum supported_architecture arch, const char *data_src);
474 | static inline int x64id_vex_size(struct instruction *instr, enum supported_architecture arch, const char *data);
475 | static inline void x64id_vex_decode(struct instruction *instr, enum supported_architecture arch, const char *data, uint8_t vex_size);
476 |
477 | #endif //x64id_H
478 |
--------------------------------------------------------------------------------
/test/CMakeLists.txt:
--------------------------------------------------------------------------------
1 | cmake_minimum_required(VERSION 3.16)
2 | project(x64id C)
3 |
4 | set(COMMONS ${CMAKE_CURRENT_SOURCE_DIR}/src/x64id.c ${CMAKE_CURRENT_SOURCE_DIR}/src/utils/function_length.c ${CMAKE_CURRENT_SOURCE_DIR}/src/vector.c ${CMAKE_CURRENT_SOURCE_DIR}/src/queue.c)
5 |
6 | set(TEST_SOURCES test/main_test.c)
7 |
8 | add_executable(x64id_test ${COMMONS} ${TEST_SOURCES})
9 |
10 |
--------------------------------------------------------------------------------
/test/asm_sources.tar.xz:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/DispatchCode/x64-Instruction-Decoder/44900ff87246e608d994377393cc990ff5cf07d9/test/asm_sources.tar.xz
--------------------------------------------------------------------------------
/test/main_test.c:
--------------------------------------------------------------------------------
1 | #include
2 | #include
3 | #include "../src/x64id.h"
4 |
5 | /*
6 | * Please, take a look at README before edit this file or tests (.asm) files
7 | */
8 |
9 | #define BUFFER_SIZE 16
10 |
11 | void instruction_info(struct instruction instr)
12 | {
13 | #ifdef _ENABLE_RAW_BYTES
14 | printf("RAW bytes (hex): ");
15 | for(int i=0; i ");
94 | printf("\n\nArchitecture must be 1 (x86) or 2 (x64).");
95 | exit(-1);
96 | }
97 |
98 | FILE *hfile = fopen(argv[1], "rb");
99 | int arch = atoi(argv[2]);
100 |
101 | if (hfile == NULL) {
102 | printf("ERROR: cannot open file!\n");
103 | exit(-1);
104 | }
105 |
106 | printf("\n\tSELECTED_ARCHITECTURE: %d\n", arch);
107 |
108 | struct instruction instr = {0};
109 | printf("Structure size: %d-bytes\n", sizeof(struct instruction));
110 | int reads;
111 | int offset = 0;
112 |
113 | uint8_t buf[BUFFER_SIZE];
114 |
115 | int instr_count = 0;
116 | while(!feof(hfile)) {
117 | reads = fread(buf, sizeof(char), BUFFER_SIZE, hfile);
118 | int cc_bytes = count_bytes(buf);
119 | if(cc_bytes == 0)
120 | continue; // skip, it's just an invalid/not encoded OP
121 |
122 | printf("\nBytes from file: ");
123 | for(int i=0; i