├── CMakeLists.txt ├── LICENSE ├── README.md ├── config.c ├── config.h ├── disassemble.c ├── disassemble.h ├── instruction.c ├── instruction.h ├── main.c ├── peparser.c ├── peparser.h ├── search.c └── search.h /CMakeLists.txt: -------------------------------------------------------------------------------- 1 | cmake_minimum_required(VERSION 3.8) 2 | project(Kaleidoscope) 3 | 4 | set(CMAKE_C_STANDARD 99) 5 | 6 | set(SOURCE_FILES main.c config.c config.h disassemble.c instruction.h disassemble.h instruction.c peparser.c peparser.h search.c search.h) 7 | add_executable(klp ${SOURCE_FILES}) -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 Zack Huang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Kaleidoscope 2 | 3 | A simple x86 (32 bit) disassembler written in C 4 | 5 | ## Features 6 | 7 | - decode common one-byte and two-byte x86-32 opcode 8 | - support 0x66 (operand-size override) prefix 9 | - parse PE files and calculate virtual addresses (used in JMP, etc.) automatically 10 | 11 | ## Building 12 | 13 | Kaleidoscope has been tested to compile and run on: 14 | 15 | - Windows 10, CLion, MinGW w64 3.4, CMake 3.13.2 16 | 17 | ## Usage 18 | 19 | ``` 20 | klp -s SIZE [-a ADDR] [-b BASE] [-h] FILE 21 | ``` 22 | 23 | `-s SIZE`: disassemble `SIZE` bytes (in decimal) starting from `ADDR` 24 | 25 | `-a ADDR`: start disassembling from file offset `ADDR` (in hex) 26 | 27 | `-b BASE`: specify the image base in hex (i.e. the address an executable is loaded to in memory); the disassember will add `BASE` to file offsets to form virtual addresses (VA). Note: this `BASE` is calculated automatically if `FILE` is a PE; you don't need to specify it manually. 28 | 29 | `-h`: display this help message 30 | 31 | ## License 32 | 33 | MIT License -------------------------------------------------------------------------------- /config.c: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hx1997 on 2018/8/17. 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | #include "config.h" 9 | 10 | DisassemblerConfig cf = {0}; 11 | 12 | static void usage(const char *executable) { 13 | printf("Usage: %s -s SIZE [-a ADDR] [-b BASE] [-h] FILE\n", executable); 14 | printf("-s SIZE\t\tdisassemble SIZE bytes (in decimal) starting from ADDR\n"); 15 | printf("-a ADDR\t\tstart disassembling from file offset ADDR (in hex)\n"); 16 | printf("-b BASE\t\tspecify the image base in hex (i.e. the address an executable is loaded to in memory); " 17 | "the disassember will add BASE to file offsets to form virtual addresses (VA). " 18 | "Note: this BASE is calculated automatically if FILE is a PE; you don't need to specify it manually.\n"); 19 | printf("-h\t\tdisplay this help message\n"); 20 | } 21 | 22 | void conf_parse_args(int argc, char **argv) { 23 | if (argc < 2) { 24 | usage(argv[0]); 25 | exit(-1); 26 | } 27 | 28 | // loop through argv 29 | for (int i = 1; i < argc; i++) { 30 | if (strcmp(argv[i], "-m16") == 0) { 31 | cf.mode_bitwidth = BIT_WIDTH_16; 32 | } else if (strcmp(argv[i], "-m32") == 0) { 33 | cf.mode_bitwidth = BIT_WIDTH_32; 34 | } else if (strcmp(argv[i], "-m64") == 0) { 35 | cf.mode_bitwidth = BIT_WIDTH_64; 36 | } else if (strcmp(argv[i], "-s") == 0) { 37 | cf.size_to_disasm = (unsigned int)strtol(argv[i+1], 0, 10); 38 | i++; 39 | } else if (strcmp(argv[i], "-a") == 0) { 40 | cf.start_address = strtoul(argv[i+1], 0, 16); 41 | i++; 42 | } else if (strcmp(argv[i], "-b") == 0) { 43 | cf.image_base = strtoul(argv[i+1], 0, 16); 44 | i++; 45 | } else if (strcmp(argv[i], "-h") == 0) { 46 | usage(argv[0]); 47 | exit(0); 48 | } else if (*argv[i] == '-') { 49 | fprintf(stderr, "conf_parse_args(): ignoring invalid argument '%s'!\n", argv[i]); 50 | usage(argv[0]); 51 | } else { 52 | if (cf.size_to_disasm == 0) { 53 | fprintf(stderr, "conf_parse_args(): disassembly of PE files requires the '-s' option! " 54 | "Either you did not specify it, or the argument specified is invalid!\n"); 55 | usage(argv[0]); 56 | exit(-1); 57 | } 58 | cf.disasm_file = argv[i]; 59 | } 60 | } 61 | } 62 | -------------------------------------------------------------------------------- /config.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hx1997 on 2018/8/17. 3 | // 4 | 5 | #ifndef KALEIDOSCOPE_CONFIG_H 6 | #define KALEIDOSCOPE_CONFIG_H 7 | 8 | /* supported ISAs (Instruction Set Architectures) */ 9 | typedef enum { 10 | ISA_INTEL_X86, 11 | } ISA; 12 | 13 | /* disassembly modes, 16-bit or 32-bit or 64-bit */ 14 | typedef enum { BIT_WIDTH_16 = 0x1, BIT_WIDTH_32 = 0x2, BIT_WIDTH_64 = 0x4, BIT_WIDTH_UNSPECIFIED = 0xff, } BitWidth; 15 | 16 | /* disassembler config, working mode, etc. */ 17 | typedef struct { 18 | ISA mode_isa; 19 | BitWidth mode_bitwidth; 20 | const char *disasm_file; 21 | unsigned int size_to_disasm; 22 | unsigned long int start_address; 23 | unsigned long int image_base; 24 | } DisassemblerConfig; 25 | extern DisassemblerConfig cf; 26 | 27 | /* parse command-line arguments and config accordingly */ 28 | void conf_parse_args(int argc, char **argv); 29 | 30 | #endif //KALEIDOSCOPE_CONFIG_H 31 | -------------------------------------------------------------------------------- /disassemble.c: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hx1997 on 2018/8/19. 3 | // 4 | 5 | #include 6 | #include 7 | #include 8 | #include "peparser.h" 9 | #include "disassemble.h" 10 | #include "instruction.h" 11 | #include "config.h" 12 | #include "search.h" 13 | 14 | #define MODRM_MOD(modrm) (((modrm) & 0b11000000u) >> 6u) 15 | #define MODRM_REGOPCODE(modrm) (((modrm) & 0b00111000u) >> 3u) 16 | #define MODRM_RM(modrm) ((modrm) & 0b00000111u) 17 | 18 | #define SIB_SCALE(sib) (((sib) & 0b11000000u) >> 6u) 19 | #define SIB_INDEX(sib) (((sib) & 0b00111000u) >> 3u) 20 | #define SIB_BASE(sib) ((sib) & 0b00000111u) 21 | 22 | #define SIGN(num) ((num) >= 0 ? '+' : '-') 23 | #define MAGNITUDE(num) ((num) >= 0 ? (num) : -(num)) 24 | 25 | static inline uint8_t u8(unsigned const char buf[]) { 26 | return buf[0]; 27 | } 28 | 29 | static inline uint16_t u16(unsigned const char buf[]) { 30 | return (buf[0] | (uint16_t)(buf[1] << 8u)); 31 | } 32 | 33 | static inline uint32_t u32(unsigned const char buf[]) { 34 | return (buf[0] | (uint32_t)(buf[1] << 8u) | (uint32_t)(buf[2] << 16u) | (uint32_t)(buf[3] << 24u)); 35 | } 36 | 37 | int is_prefix(unsigned const char byte) { 38 | return (byte == 0x26 || byte == 0x2e || byte == 0x36 || byte == 0x3e 39 | || byte == 0x64 || byte == 0x65 || byte == 0x66 || byte == 0x67 40 | || byte == 0xf0 || byte == 0xf2 || byte == 0xf3); 41 | } 42 | 43 | int is_opcode_extended(unsigned const char opcode1) { 44 | return ((opcode1 == 0x0f) || // two-byte opcodes 45 | (opcode1 >= 0x80 && opcode1 <= 0x83) || (opcode1 >= 0xc0 && opcode1 <= 0xc1) || 46 | (opcode1 >= 0xd0 && opcode1 <= 0xd3) || (opcode1 >= 0xd8 && opcode1 <= 0xdf) || 47 | (opcode1 >= 0xf6 && opcode1 <= 0xf7) || (opcode1 >= 0xfe && opcode1 <= 0xff)); 48 | } 49 | 50 | void decode_prefixes(unsigned const char buf[], Disassembly *dis, CurrentInst *curr_inst) { 51 | assert(buf != NULL); 52 | 53 | unsigned const char *ptr_prefix = buf + dis->curr_inst_offset; 54 | while (is_prefix(*ptr_prefix)) { 55 | switch (*ptr_prefix) { 56 | case 0xf0: 57 | case 0xf2: 58 | case 0xf3: 59 | curr_inst->prefixes[0] = *ptr_prefix; 60 | break; 61 | case 0x2e: 62 | case 0x36: 63 | case 0x3e: 64 | case 0x26: 65 | case 0x64: 66 | case 0x65: 67 | curr_inst->prefixes[1] = *ptr_prefix; 68 | break; 69 | case 0x66: 70 | curr_inst->prefixes[2] = *ptr_prefix; 71 | curr_inst->effective_opsize = (cf.mode_bitwidth == BIT_WIDTH_32 ? BIT_WIDTH_16 : BIT_WIDTH_32); 72 | break; 73 | case 0x67: 74 | curr_inst->prefixes[3] = *ptr_prefix; 75 | curr_inst->effective_addrsize = (cf.mode_bitwidth == BIT_WIDTH_32 ? BIT_WIDTH_16 : BIT_WIDTH_32); 76 | break; 77 | default: 78 | return; 79 | } 80 | dis->curr_inst_offset++; 81 | ptr_prefix++; 82 | } 83 | } 84 | 85 | InstInfo decode_opcodes(unsigned const char buf[], Disassembly *dis, CurrentInst *curr_inst) { 86 | assert(buf != NULL); 87 | int i = 0; 88 | 89 | unsigned const char *ptr_opcode = buf + dis->curr_inst_offset; 90 | curr_inst->opcode1 = *ptr_opcode; 91 | int is_extended = is_opcode_extended(*ptr_opcode); 92 | 93 | // FIXME: ugly copy-and-paste programming! 94 | if (!is_extended) { 95 | // standard one-byte opcode 96 | i = binary_search_lower(standard_insts, 0, standard_insts_len - 1, curr_inst->opcode1); 97 | if (i < 0) return standard_insts[standard_insts_len-1].info; // not found; return an empty entry 98 | for (; standard_insts[i].opcode == curr_inst->opcode1; i++) { 99 | if (curr_inst->effective_opsize & standard_insts[i].info.opsize) { 100 | curr_inst->mnemonic = standard_insts[i].info.mnemonic; 101 | curr_inst->opcount = standard_insts[i].info.opcount; 102 | break; 103 | } 104 | } 105 | return standard_insts[i].info; 106 | } else { 107 | if (*ptr_opcode == 0x0f) { 108 | // two-byte opcode 109 | curr_inst->opcode2 = *(ptr_opcode + 1); 110 | i = binary_search_lower(extended_insts, 0, extended_insts_len - 1, curr_inst->opcode2); 111 | if (i < 0) return extended_insts[extended_insts_len-1].info; // not found; return an empty entry 112 | for (; extended_insts[i].opcode == curr_inst->opcode2; i++) { 113 | if (curr_inst->effective_opsize & extended_insts[i].info.opsize) { 114 | curr_inst->mnemonic = extended_insts[i].info.mnemonic; 115 | curr_inst->opcount = extended_insts[i].info.opcount; 116 | dis->curr_inst_offset++; 117 | break; 118 | } 119 | } 120 | return extended_insts[i].info; 121 | } else { 122 | // one-byte opcode with opcode extension, e.g. 0x83 123 | curr_inst->modrm = *(ptr_opcode + 1); 124 | for (i = 0; extended_group_insts[i].info.opsize != 0 && extended_group_insts[i].opcode <= curr_inst->opcode1; i++) { 125 | if (extended_group_insts[i].opcode == curr_inst->opcode1 && 126 | extended_group_insts[i].opcode_ex == MODRM_REGOPCODE(curr_inst->modrm)) { 127 | if (curr_inst->effective_opsize & extended_group_insts[i].info.opsize) { 128 | curr_inst->mnemonic = extended_group_insts[i].info.mnemonic; 129 | curr_inst->opcount = extended_group_insts[i].info.opcount; 130 | break; 131 | } 132 | } 133 | } 134 | return extended_group_insts[i].info; 135 | } 136 | } 137 | } 138 | 139 | void decode_imm(unsigned const char *ptr_imm, Disassembly *dis, CurrentInst *curr_inst, OperandInfo opinfo) { 140 | switch (opinfo.optype) { 141 | case OPR_BYTE: 142 | curr_inst->imm = u8(ptr_imm); 143 | dis->curr_inst_offset += 1; 144 | break; 145 | case OPR_WORD: 146 | curr_inst->imm = u16(ptr_imm); 147 | dis->curr_inst_offset += 2; 148 | break; 149 | case OPR_DWORD: 150 | curr_inst->imm = u32(ptr_imm); 151 | dis->curr_inst_offset += 4; 152 | break; 153 | case OPR_WORD_DWORD: 154 | if (curr_inst->effective_opsize == BIT_WIDTH_16) { 155 | curr_inst->imm = u16(ptr_imm); 156 | dis->curr_inst_offset += 2; 157 | break; 158 | } else if (curr_inst->effective_opsize == BIT_WIDTH_32) { 159 | curr_inst->imm = u32(ptr_imm); 160 | dis->curr_inst_offset += 4; 161 | break; 162 | } 163 | default: 164 | return; 165 | } 166 | } 167 | 168 | void decode_sib(int op_ordinal, CurrentInst *curr_inst) { 169 | int scale = SIB_SCALE(curr_inst->sib); 170 | int index = SIB_INDEX(curr_inst->sib); 171 | int base = SIB_BASE(curr_inst->sib); 172 | 173 | // https://css.csail.mit.edu/6.858/2014/readings/i386/s17_02.htm 174 | if (base == 0b101 && MODRM_MOD(curr_inst->modrm) == 0b00) { 175 | // no base, operand is of the form [index*scale+disp] 176 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_INDEX_SCALE; 177 | } else { 178 | curr_inst->operand[op_ordinal].reg = (uint8_t) (base + ADDR_EAX); 179 | } 180 | 181 | if (index != 0b100) { 182 | // operand is of the form [base+index*scale+disp] 183 | curr_inst->operand[op_ordinal].index_reg = (uint8_t)(index + ADDR_EAX); 184 | switch (scale) { 185 | case 0b00: 186 | curr_inst->operand[op_ordinal].real_scale = 1; 187 | break; 188 | case 0b01: 189 | curr_inst->operand[op_ordinal].real_scale = 2; 190 | break; 191 | case 0b10: 192 | curr_inst->operand[op_ordinal].real_scale = 4; 193 | break; 194 | case 0b11: 195 | curr_inst->operand[op_ordinal].real_scale = 8; 196 | break; 197 | default: 198 | return; 199 | } 200 | } else { 201 | if (curr_inst->operand[op_ordinal].addr_method != ASM_ADDR_INDEX_SCALE) { 202 | // no index/scale, operand is of the form [base+disp] 203 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_REG_RELATIVE; 204 | } else { 205 | // no base/index/scale, operand is of the form [disp] 206 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_MEM_DIRECT; 207 | } 208 | curr_inst->operand[op_ordinal].real_scale = 0; 209 | } 210 | } 211 | 212 | void decode_modrm_greg(int op_ordinal, CurrentInst *curr_inst, OperandInfo opinfo) { 213 | int regopcode = MODRM_REGOPCODE(curr_inst->modrm); 214 | 215 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_REG; 216 | 217 | // is it DWORD type operand? if so, set the base to ADDR_AX (see next comment) 218 | if ((opinfo.optype == OPR_WORD_DWORD && curr_inst->effective_opsize == BIT_WIDTH_32) || (opinfo.optype == OPR_DWORD)) { 219 | curr_inst->operand[op_ordinal].reg = ADDR_AX; 220 | } else { 221 | curr_inst->operand[op_ordinal].reg = ADDR_AL; 222 | } 223 | 224 | // add an offset to the base to arrive at the correct register. 225 | // Offset is, BYTE operand: regopcode, WORD or DWORD operand: regopcode + (ADDR_AX - ADDR_AL) 226 | curr_inst->operand[op_ordinal].reg += (opinfo.optype == OPR_BYTE ? regopcode : regopcode + (ADDR_AX - ADDR_AL)); 227 | } 228 | 229 | void decode_modrm_mem(unsigned const char buf[], int op_ordinal, Disassembly *dis, CurrentInst *curr_inst) { 230 | int rm = MODRM_RM(curr_inst->modrm); 231 | int mod = MODRM_MOD(curr_inst->modrm); 232 | 233 | if (mod == 0b00) { 234 | if (rm == 0b101) { 235 | // operand is of the form [disp32] 236 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_MEM_DIRECT; 237 | curr_inst->displacement.size = 32; 238 | curr_inst->displacement.disp32 = u32(buf + dis->curr_inst_offset); 239 | dis->curr_inst_offset += sizeof(uint32_t); 240 | } else if (rm == 0b100) { 241 | // operand is of the form [base+index*scale] 242 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_BASE_INDEX_SCALE; 243 | 244 | if (!curr_inst->is_sib_decoded) { 245 | curr_inst->sib = *(buf + dis->curr_inst_offset); 246 | curr_inst->is_sib_decoded = 1; 247 | dis->curr_inst_offset++; 248 | decode_sib(op_ordinal, curr_inst); 249 | } else { 250 | decode_sib(op_ordinal, curr_inst); 251 | } 252 | } else { 253 | // operand is of the form [reg] 254 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_REG_INDIRECT; 255 | curr_inst->operand[op_ordinal].reg = (uint8_t)(rm + ADDR_EAX); 256 | } 257 | } else if (mod == 0b01) { 258 | if (rm == 0b100) { 259 | // operand is of the form [base+index*scale+disp8] 260 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_RELATIVE_BASE_INDEX; 261 | 262 | if (!curr_inst->is_sib_decoded) { 263 | curr_inst->sib = *(buf + dis->curr_inst_offset); 264 | curr_inst->is_sib_decoded = 1; 265 | dis->curr_inst_offset++; 266 | decode_sib(op_ordinal, curr_inst); 267 | } else { 268 | decode_sib(op_ordinal, curr_inst); 269 | } 270 | 271 | curr_inst->displacement.size = 8; 272 | curr_inst->displacement.disp8 = u8(buf + dis->curr_inst_offset); 273 | dis->curr_inst_offset += sizeof(uint8_t); 274 | } else { 275 | // operand is of the form [reg+disp8] 276 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_REG_RELATIVE; 277 | curr_inst->operand[op_ordinal].reg = (uint8_t)(rm + ADDR_EAX); 278 | 279 | curr_inst->displacement.size = 8; 280 | curr_inst->displacement.disp8 = u8(buf + dis->curr_inst_offset); 281 | dis->curr_inst_offset += sizeof(uint8_t); 282 | } 283 | } else if (mod == 0b10) { 284 | if (rm == 0b100) { 285 | // operand is of the form [base+index*scale+disp32] 286 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_RELATIVE_BASE_INDEX; 287 | 288 | if (!curr_inst->is_sib_decoded) { 289 | curr_inst->sib = *(buf + dis->curr_inst_offset); 290 | curr_inst->is_sib_decoded = 1; 291 | dis->curr_inst_offset++; 292 | decode_sib(op_ordinal, curr_inst); 293 | } else { 294 | decode_sib(op_ordinal, curr_inst); 295 | } 296 | 297 | curr_inst->displacement.size = 32; 298 | curr_inst->displacement.disp32 = u32(buf + dis->curr_inst_offset); 299 | dis->curr_inst_offset += sizeof(uint32_t); 300 | } else { 301 | // operand is of the form [reg+disp32] 302 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_REG_RELATIVE; 303 | curr_inst->operand[op_ordinal].reg = (uint8_t)(rm + ADDR_EAX); 304 | 305 | curr_inst->displacement.size = 32; 306 | curr_inst->displacement.disp32 = u32(buf + dis->curr_inst_offset); 307 | dis->curr_inst_offset += sizeof(uint32_t); 308 | } 309 | } 310 | } 311 | 312 | void decode_modrm_gpreg_mem(unsigned const char buf[], int op_ordinal, Disassembly *dis, CurrentInst *curr_inst, OperandInfo opinfo) { 313 | int rm = MODRM_RM(curr_inst->modrm); 314 | int mod = MODRM_MOD(curr_inst->modrm); 315 | 316 | if (mod == 0b11) { 317 | // operand is a register 318 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_REG; 319 | 320 | // is it DWORD type operand? if so, set the base to ADDR_AX (see next comment) 321 | if ((opinfo.optype == OPR_WORD_DWORD && curr_inst->effective_opsize == BIT_WIDTH_32) || (opinfo.optype == OPR_DWORD)) { 322 | curr_inst->operand[op_ordinal].reg = ADDR_AX; 323 | } else { 324 | curr_inst->operand[op_ordinal].reg = ADDR_AL; 325 | } 326 | 327 | // add an offset to the base to arrive at the correct register. 328 | // Offset is, BYTE operand: rm, WORD or DWORD operand: rm + (ADDR_AX - ADDR_AL) 329 | curr_inst->operand[op_ordinal].reg += (opinfo.optype == OPR_BYTE ? rm : rm + (ADDR_AX - ADDR_AL)); 330 | } else { 331 | // operand is a memory location 332 | decode_modrm_mem(buf, op_ordinal, dis, curr_inst); 333 | } 334 | } 335 | 336 | void decode_modrm_cdstreg(int op_ordinal, CurrentInst *curr_inst, AddressingMethod base_reg) { 337 | int regopcode = MODRM_REGOPCODE(curr_inst->modrm); 338 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_REG; 339 | curr_inst->operand[op_ordinal].reg = base_reg + regopcode; 340 | } 341 | 342 | void decode_modrm_greg_only(int op_ordinal, CurrentInst *curr_inst) { 343 | int rm = MODRM_RM(curr_inst->modrm); 344 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_REG; 345 | curr_inst->operand[op_ordinal].reg = ADDR_EAX + rm; 346 | } 347 | 348 | void decode_modrm(unsigned const char buf[], int op_ordinal, Disassembly *dis, CurrentInst *curr_inst, OperandInfo opinfo) { 349 | switch (opinfo.addr_method) { 350 | case ADDR_MODRM_GREG: 351 | decode_modrm_greg(op_ordinal, curr_inst, opinfo); 352 | break; 353 | case ADDR_MODRM_MEM: 354 | decode_modrm_mem(buf, op_ordinal, dis, curr_inst); 355 | break; 356 | case ADDR_MODRM_GPREG_MEM: 357 | decode_modrm_gpreg_mem(buf, op_ordinal, dis,curr_inst, opinfo); 358 | break; 359 | case ADDR_MODRM_MOD_GREG_ONLY: 360 | decode_modrm_greg_only(op_ordinal, curr_inst); 361 | break; 362 | case ADDR_CONTROL_REG: 363 | decode_modrm_cdstreg(op_ordinal, curr_inst, ADDR_CR0); 364 | break; 365 | case ADDR_DEBUG_REG: 366 | decode_modrm_cdstreg(op_ordinal, curr_inst, ADDR_DR0); 367 | break; 368 | case ADDR_MODRM_TREG: 369 | decode_modrm_cdstreg(op_ordinal, curr_inst, ADDR_TR0); 370 | break; 371 | case ADDR_MODRM_SREG: 372 | decode_modrm_cdstreg(op_ordinal, curr_inst, ADDR_ES); 373 | break; 374 | default: 375 | return; 376 | } 377 | } 378 | 379 | int decode_operand(unsigned const char buf[], int op_ordinal, Disassembly *dis, CurrentInst *curr_inst, OperandInfo opinfo) { 380 | // point to the operand 381 | unsigned const char *ptr_operand = buf + dis->curr_inst_offset; 382 | curr_inst->operand[op_ordinal].optype = opinfo.optype; 383 | 384 | switch (opinfo.addr_method) { 385 | // register addressing 386 | case ADDR_AL: 387 | case ADDR_BL: 388 | case ADDR_CL: 389 | case ADDR_DL: 390 | case ADDR_AH: 391 | case ADDR_BH: 392 | case ADDR_CH: 393 | case ADDR_DH: 394 | case ADDR_CS: 395 | case ADDR_DS: 396 | case ADDR_ES: 397 | case ADDR_FS: 398 | case ADDR_GS: 399 | case ADDR_SS: 400 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_REG; 401 | curr_inst->operand[op_ordinal].reg = opinfo.addr_method; 402 | break; 403 | // register addressing, those that have a 32-bit counterpart need special handling 404 | case ADDR_AX: 405 | case ADDR_CX: 406 | case ADDR_DX: 407 | case ADDR_BX: 408 | case ADDR_SP: 409 | case ADDR_BP: 410 | case ADDR_SI: 411 | case ADDR_DI: 412 | // setting the operand info struct according to decode results 413 | // reg should contain a symbolic constant indicating which register this operand refers to 414 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_REG; 415 | curr_inst->operand[op_ordinal].reg = opinfo.addr_method; 416 | 417 | // is it DWORD type operand? if so, change reg field to its 32-bit counterpart by adding a offset 418 | if (opinfo.optype == OPR_WORD_DWORD && curr_inst->effective_opsize == BIT_WIDTH_32) { 419 | curr_inst->operand[op_ordinal].reg += (ADDR_EAX - ADDR_AX); 420 | } 421 | break; 422 | // immediate addressing 423 | case ADDR_IMM: 424 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_IMM; 425 | decode_imm(ptr_operand, dis, curr_inst, opinfo); 426 | break; 427 | case ADDR_MODRM_GREG: 428 | case ADDR_MODRM_MEM: 429 | case ADDR_MODRM_GPREG_MEM: 430 | case ADDR_MODRM_MOD_GREG_ONLY: 431 | case ADDR_MODRM_SREG: 432 | case ADDR_CONTROL_REG: 433 | case ADDR_DEBUG_REG: 434 | case ADDR_MODRM_TREG: 435 | // Make sure we increment dis->curr_inst_offset only once, 436 | // because ModR/M occupies 1 byte at most 437 | if (!curr_inst->is_modrm_decoded) { 438 | curr_inst->modrm = *ptr_operand; 439 | curr_inst->is_modrm_decoded = 1; 440 | dis->curr_inst_offset++; 441 | decode_modrm(buf, op_ordinal, dis, curr_inst, opinfo); 442 | } else { 443 | decode_modrm(buf, op_ordinal, dis, curr_inst, opinfo); 444 | } 445 | break; 446 | case ADDR_DIRECT_OFFSET: 447 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_MEM_DIRECT; 448 | if (opinfo.optype == OPR_BYTE) { 449 | curr_inst->displacement.size = 8; 450 | curr_inst->displacement.disp8 = u8(ptr_operand); 451 | dis->curr_inst_offset += sizeof(uint8_t); 452 | } else { 453 | if (curr_inst->effective_opsize == BIT_WIDTH_32) { 454 | curr_inst->displacement.size = 32; 455 | curr_inst->displacement.disp32 = u32(ptr_operand); 456 | dis->curr_inst_offset += sizeof(uint32_t); 457 | } else if (curr_inst->effective_opsize == BIT_WIDTH_16) { 458 | curr_inst->displacement.size = 16; 459 | curr_inst->displacement.disp16 = u16(ptr_operand); 460 | dis->curr_inst_offset += sizeof(uint16_t); 461 | } 462 | } 463 | break; 464 | case ADDR_RELATIVE: 465 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_JMP_RELATIVE; 466 | if (opinfo.optype == OPR_BYTE) { 467 | curr_inst->relative_offset.size = 8; 468 | curr_inst->relative_offset.offset8 = u8(ptr_operand); 469 | dis->curr_inst_offset += sizeof(uint8_t); 470 | } else if (opinfo.optype == OPR_WORD_DWORD) { 471 | if (curr_inst->effective_opsize == BIT_WIDTH_32) { 472 | curr_inst->relative_offset.size = 32; 473 | curr_inst->relative_offset.offset32 = u32(ptr_operand); 474 | dis->curr_inst_offset += sizeof(uint32_t); 475 | } else if (curr_inst->effective_opsize == BIT_WIDTH_16) { 476 | curr_inst->relative_offset.size = 16; 477 | curr_inst->relative_offset.offset16 = u16(ptr_operand); 478 | dis->curr_inst_offset += sizeof(uint16_t); 479 | } 480 | } 481 | break; 482 | case ADDR_1: 483 | curr_inst->operand[op_ordinal].addr_method = ASM_ADDR_1; 484 | break; 485 | default: 486 | break; 487 | } 488 | 489 | return 0; 490 | } 491 | 492 | int disasm_one_inst_x86(unsigned const char buf[], Disassembly *dis, CurrentInst *curr_inst) { 493 | int delta = dis->curr_inst_offset; // number of bytes of current instruction 494 | int i; 495 | InstInfo inst_info; 496 | 497 | // prefixes 498 | curr_inst->effective_opsize = cf.mode_bitwidth; 499 | curr_inst->effective_addrsize = cf.mode_bitwidth; 500 | decode_prefixes(buf, dis, curr_inst); 501 | 502 | // opcode 503 | inst_info = decode_opcodes(buf, dis, curr_inst); 504 | 505 | // operands 506 | dis->curr_inst_offset++; 507 | if (curr_inst->mnemonic) { 508 | for (i = 0; i < curr_inst->opcount; i++) { 509 | decode_operand(buf, i, dis, curr_inst, inst_info.opinfo[i]); 510 | } 511 | } 512 | 513 | delta = dis->curr_inst_offset - delta; 514 | return delta; 515 | } 516 | 517 | int translate_inst_into_intel(CurrentInst curr_inst, char buf[], size_t bufsize, unsigned long int start_address, int delta) { 518 | int i = 0; 519 | 520 | memset(buf, 0, bufsize); 521 | 522 | // translate prefixes 523 | for (i = 0; i < 4; i++) { 524 | switch (curr_inst.prefixes[i]) { 525 | case 0x26: 526 | strcpy(buf, "es: "); 527 | continue; 528 | case 0x2e: 529 | strcpy(buf, "cs: "); 530 | continue; 531 | case 0x36: 532 | strcpy(buf, "ss: "); 533 | continue; 534 | case 0x3e: 535 | strcpy(buf, "ds: "); 536 | continue; 537 | case 0x64: 538 | strcpy(buf, "fs: "); 539 | continue; 540 | case 0x65: 541 | strcpy(buf, "gs: "); 542 | continue; 543 | case 0xf0: 544 | strcpy(buf, "lock "); 545 | continue; 546 | case 0xf2: 547 | strcpy(buf, "repne "); 548 | continue; 549 | case 0xf3: 550 | strcpy(buf, "rep "); 551 | continue; 552 | default: 553 | continue; 554 | } 555 | } 556 | 557 | // copy mnemonic directly to buf, since we already decoded that in disasm_one_inst 558 | if (curr_inst.mnemonic) { 559 | sprintf(buf, "%s%s ", buf, curr_inst.mnemonic); 560 | } else { 561 | sprintf(buf, "%sdb 0x%x ", buf, curr_inst.opcode1); 562 | } 563 | 564 | // translate operands 565 | for (i = 0; i < curr_inst.opcount; i++) { 566 | switch (curr_inst.operand[i].addr_method) { 567 | case ASM_ADDR_INVALID: 568 | sprintf(buf, "%s(none)", buf); 569 | break; 570 | case ASM_ADDR_1: 571 | sprintf(buf, "%s1", buf); 572 | break; 573 | case ASM_ADDR_IMM: 574 | sprintf(buf, "%s0x%x", buf, curr_inst.imm); 575 | break; 576 | case ASM_ADDR_REG: 577 | sprintf(buf, "%s%s", buf, regname[curr_inst.operand[i].reg - ADDR_AL]); 578 | break; 579 | case ASM_ADDR_MEM_DIRECT: 580 | if (curr_inst.operand[i].optype == OPR_BYTE) { 581 | sprintf(buf, "%sbyte ptr ", buf); 582 | } else if (curr_inst.operand[i].optype == OPR_WORD_DWORD) { 583 | if (curr_inst.effective_opsize == BIT_WIDTH_16) { 584 | sprintf(buf, "%sword ptr ", buf); 585 | } else if (curr_inst.effective_opsize == BIT_WIDTH_32) { 586 | sprintf(buf, "%sdword ptr ", buf); 587 | } 588 | } else if (curr_inst.operand[i].optype == OPR_WORD) { 589 | sprintf(buf, "%sword ptr ", buf); 590 | } else if (curr_inst.operand[i].optype == OPR_DWORD) { 591 | sprintf(buf, "%sdword ptr ", buf); 592 | } 593 | 594 | switch (curr_inst.displacement.size) { 595 | case 8: 596 | sprintf(buf, "%s[0x%x]", buf, curr_inst.displacement.disp8); 597 | break; 598 | case 16: 599 | sprintf(buf, "%s[0x%x]", buf, curr_inst.displacement.disp16); 600 | break; 601 | case 32: 602 | sprintf(buf, "%s[0x%x]", buf, curr_inst.displacement.disp32); 603 | break; 604 | default: 605 | break; 606 | } 607 | break; 608 | case ASM_ADDR_JMP_RELATIVE: 609 | switch (curr_inst.relative_offset.size) { 610 | case 8: 611 | sprintf(buf, "%s0x%lx", buf, curr_inst.relative_offset.offset8 + (int8_t)delta + start_address + cf.image_base); 612 | break; 613 | case 16: 614 | sprintf(buf, "%s0x%lx", buf, curr_inst.relative_offset.offset16 + (int16_t)delta + start_address + cf.image_base); 615 | break; 616 | case 32: 617 | sprintf(buf, "%s0x%lx", buf, curr_inst.relative_offset.offset32 + (int32_t)delta + start_address + cf.image_base); 618 | break; 619 | default: 620 | break; 621 | } 622 | break; 623 | case ASM_ADDR_REG_INDIRECT: 624 | if (curr_inst.operand[i].optype == OPR_BYTE) { 625 | sprintf(buf, "%sbyte ptr ", buf); 626 | } else if (curr_inst.operand[i].optype == OPR_WORD_DWORD) { 627 | if (curr_inst.effective_opsize == BIT_WIDTH_16) { 628 | sprintf(buf, "%sword ptr ", buf); 629 | } else if (curr_inst.effective_opsize == BIT_WIDTH_32) { 630 | sprintf(buf, "%sdword ptr ", buf); 631 | } 632 | } 633 | 634 | sprintf(buf, "%s[%s]", buf, regname[curr_inst.operand[i].reg - ADDR_AL]); 635 | break; 636 | case ASM_ADDR_REG_RELATIVE: 637 | if (curr_inst.operand[i].optype == OPR_BYTE) { 638 | sprintf(buf, "%sbyte ptr ", buf); 639 | } else if (curr_inst.operand[i].optype == OPR_WORD_DWORD) { 640 | if (curr_inst.effective_opsize == BIT_WIDTH_16) { 641 | sprintf(buf, "%sword ptr ", buf); 642 | } else if (curr_inst.effective_opsize == BIT_WIDTH_32) { 643 | sprintf(buf, "%sdword ptr ", buf); 644 | } 645 | } 646 | 647 | switch (curr_inst.displacement.size) { 648 | case 0: 649 | // no displacement 650 | sprintf(buf, "%s[%s]", buf, regname[curr_inst.operand[i].reg - ADDR_AL]); 651 | break; 652 | case 8: 653 | sprintf(buf, "%s[%s%c0x%x]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 654 | SIGN(curr_inst.displacement.disp8), MAGNITUDE(curr_inst.displacement.disp8)); 655 | break; 656 | case 16: 657 | sprintf(buf, "%s[%s%c0x%x]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 658 | SIGN(curr_inst.displacement.disp16), MAGNITUDE(curr_inst.displacement.disp16)); 659 | break; 660 | case 32: 661 | sprintf(buf, "%s[%s%c0x%x]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 662 | SIGN(curr_inst.displacement.disp32), MAGNITUDE(curr_inst.displacement.disp32)); 663 | break; 664 | default: 665 | break; 666 | } 667 | break; 668 | case ASM_ADDR_INDEX_SCALE: 669 | if (curr_inst.operand[i].optype == OPR_BYTE) { 670 | sprintf(buf, "%sbyte ptr ", buf); 671 | } else if (curr_inst.operand[i].optype == OPR_WORD_DWORD) { 672 | if (curr_inst.effective_opsize == BIT_WIDTH_16) { 673 | sprintf(buf, "%sword ptr ", buf); 674 | } else if (curr_inst.effective_opsize == BIT_WIDTH_32) { 675 | sprintf(buf, "%sdword ptr ", buf); 676 | } 677 | } 678 | 679 | if (curr_inst.operand[i].real_scale == 1) { 680 | sprintf(buf, "%s[%s]", buf, regname[curr_inst.operand[i].index_reg - ADDR_AL]); 681 | } else { 682 | sprintf(buf, "%s[%s*%d]", buf, 683 | regname[curr_inst.operand[i].index_reg - ADDR_AL], curr_inst.operand[i].real_scale); 684 | } 685 | break; 686 | case ASM_ADDR_BASE_INDEX_SCALE: 687 | if (curr_inst.operand[i].optype == OPR_BYTE) { 688 | sprintf(buf, "%sbyte ptr ", buf); 689 | } else if (curr_inst.operand[i].optype == OPR_WORD_DWORD) { 690 | if (curr_inst.effective_opsize == BIT_WIDTH_16) { 691 | sprintf(buf, "%sword ptr ", buf); 692 | } else if (curr_inst.effective_opsize == BIT_WIDTH_32) { 693 | sprintf(buf, "%sdword ptr ", buf); 694 | } 695 | } 696 | 697 | if (curr_inst.operand[i].real_scale == 1) { 698 | sprintf(buf, "%s[%s+%s]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 699 | regname[curr_inst.operand[i].index_reg - ADDR_AL]); 700 | } else { 701 | sprintf(buf, "%s[%s+%s*%d]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 702 | regname[curr_inst.operand[i].index_reg - ADDR_AL], curr_inst.operand[i].real_scale); 703 | } 704 | break; 705 | case ASM_ADDR_RELATIVE_BASE_INDEX: 706 | if (curr_inst.operand[i].optype == OPR_BYTE) { 707 | sprintf(buf, "%sbyte ptr ", buf); 708 | } else if (curr_inst.operand[i].optype == OPR_WORD_DWORD) { 709 | if (curr_inst.effective_opsize == BIT_WIDTH_16) { 710 | sprintf(buf, "%sword ptr ", buf); 711 | } else if (curr_inst.effective_opsize == BIT_WIDTH_32) { 712 | sprintf(buf, "%sdword ptr ", buf); 713 | } 714 | } 715 | 716 | if (curr_inst.operand[i].real_scale == 1) { 717 | switch (curr_inst.displacement.size) { 718 | case 0: 719 | // no displacement 720 | sprintf(buf, "%s[%s+%s]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 721 | regname[curr_inst.operand[i].index_reg - ADDR_AL]); 722 | break; 723 | case 8: 724 | sprintf(buf, "%s[%s+%s%c0x%x]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 725 | regname[curr_inst.operand[i].index_reg - ADDR_AL], 726 | SIGN(curr_inst.displacement.disp8), MAGNITUDE(curr_inst.displacement.disp8)); 727 | break; 728 | case 16: 729 | sprintf(buf, "%s[%s+%s%c0x%x]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 730 | regname[curr_inst.operand[i].index_reg - ADDR_AL], 731 | SIGN(curr_inst.displacement.disp16), MAGNITUDE(curr_inst.displacement.disp16)); 732 | break; 733 | case 32: 734 | sprintf(buf, "%s[%s+%s%c0x%x]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 735 | regname[curr_inst.operand[i].index_reg - ADDR_AL], 736 | SIGN(curr_inst.displacement.disp32), MAGNITUDE(curr_inst.displacement.disp32)); 737 | break; 738 | default: 739 | break; 740 | } 741 | } else { 742 | switch (curr_inst.displacement.size) { 743 | case 0: 744 | // no displacement 745 | sprintf(buf, "%s[%s+%s*%d]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 746 | regname[curr_inst.operand[i].index_reg - ADDR_AL], curr_inst.operand[i].real_scale); 747 | break; 748 | case 8: 749 | sprintf(buf, "%s[%s+%s*%d%c0x%x]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 750 | regname[curr_inst.operand[i].index_reg - ADDR_AL], curr_inst.operand[i].real_scale, 751 | SIGN(curr_inst.displacement.disp8), MAGNITUDE(curr_inst.displacement.disp8)); 752 | break; 753 | case 16: 754 | sprintf(buf, "%s[%s+%s*%d%c0x%x]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 755 | regname[curr_inst.operand[i].index_reg - ADDR_AL], curr_inst.operand[i].real_scale, 756 | SIGN(curr_inst.displacement.disp16), MAGNITUDE(curr_inst.displacement.disp16)); 757 | break; 758 | case 32: 759 | sprintf(buf, "%s[%s+%s*%d%c0x%x]", buf, regname[curr_inst.operand[i].reg - ADDR_AL], 760 | regname[curr_inst.operand[i].index_reg - ADDR_AL], curr_inst.operand[i].real_scale, 761 | SIGN(curr_inst.displacement.disp32), MAGNITUDE(curr_inst.displacement.disp32)); 762 | break; 763 | default: 764 | break; 765 | } 766 | } 767 | break; 768 | } 769 | if (i+1 < curr_inst.opcount) strcat(buf, ", "); 770 | } 771 | 772 | return 0; 773 | } 774 | 775 | void init_disasm_struct(Disassembly *dis) { 776 | memset(dis, 0, sizeof(Disassembly)); 777 | dis->asm_buf_size = ASM_BUFSIZE; 778 | } 779 | 780 | int disasm_byte_buf_x86(unsigned char buf[], unsigned int bufsize, unsigned long int start_address) { 781 | int delta; 782 | Disassembly dis; 783 | 784 | init_disasm_struct(&dis); 785 | 786 | // decode each instruction in buf 787 | for (int i = 0; i < bufsize; i += delta, start_address += delta) { 788 | CurrentInst curr_inst = {0}; 789 | delta = disasm_one_inst_x86(buf, &dis, &curr_inst); 790 | printf("%08lx: ", start_address + cf.image_base); 791 | 792 | // print the opcode. 50 should suffice because the longest possible x86 inst is 15 bytes. 793 | char opcode_str[50] = ""; 794 | for (int j = 0; j < delta; j++) { 795 | sprintf(opcode_str, "%s%02x ", opcode_str, buf[i+j]); 796 | } 797 | printf("%-20s ", opcode_str); 798 | 799 | char asmbuf[128]; 800 | translate_inst_into_intel(curr_inst, asmbuf, 128, start_address, delta); 801 | printf("%s\n", asmbuf); 802 | dis.asm_buf[0] = '\0'; 803 | } 804 | return 0; 805 | } 806 | 807 | int disasm_byte_buf(unsigned char buf[], unsigned int bufsize, unsigned long int start_address) { 808 | int ret = 0; 809 | 810 | switch (cf.mode_isa) { 811 | case ISA_INTEL_X86: 812 | ret = disasm_byte_buf_x86(buf, bufsize, start_address); 813 | break; 814 | } 815 | 816 | return ret; 817 | } 818 | 819 | int disasm_pe_file(const char *file, unsigned int size, DWORD start_address) { 820 | if (!file || size <= 0) { 821 | fprintf(stderr, "disasm_pe_file(): invalid arguments!\n"); 822 | return -1; 823 | } 824 | 825 | FILE *fp = fopen(file, "rb"); 826 | if (!fp) { 827 | fprintf(stderr, "disasm_pe_file(): error opening file %s!\n", file); 828 | return -1; 829 | } 830 | 831 | fseek(fp, 0, SEEK_END); 832 | long int file_size = ftell(fp); 833 | if (file_size < size || file_size < start_address) { 834 | fprintf(stderr, "disasm_pe_file(): specified file size too big!\n"); 835 | return -1; 836 | } 837 | 838 | // if start_address is 0 (by default), set it to the address of the PE entry point 839 | if (start_address == 0) { 840 | DWORD rva; 841 | start_address = get_pe_ep_addr(fp, &rva); 842 | if (start_address <= 0) { 843 | return -1; 844 | } 845 | fseek(fp, start_address, SEEK_SET); 846 | start_address = rva; 847 | } else { 848 | fseek(fp, start_address, SEEK_SET); 849 | } 850 | 851 | // allocate a buffer of 'size' bytes 852 | unsigned char *buf = malloc(sizeof(unsigned char) * size); 853 | // read 'size' bytes from 'file' and disassemble it 854 | size_t bytes_read = fread(buf, sizeof(unsigned char), size, fp); 855 | if (bytes_read != size) { 856 | fprintf(stderr, "disasm_pe_file(): error reading file %s!\n", file); 857 | return -1; 858 | } 859 | 860 | return disasm_byte_buf(buf, size, start_address); 861 | } -------------------------------------------------------------------------------- /disassemble.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hx1997 on 2018/9/22. 3 | // 4 | 5 | #ifndef KALEIDOSCOPE_DISASSEMBLE_H 6 | #define KALEIDOSCOPE_DISASSEMBLE_H 7 | 8 | #include 9 | #include 10 | #include "instruction.h" 11 | 12 | #define ASM_BUFSIZE 128 13 | 14 | typedef enum { 15 | ASM_ADDR_INVALID, 16 | ASM_ADDR_1, // for instructions like "rol bx, 1" where 1 is a fixed number 17 | ASM_ADDR_IMM, 18 | ASM_ADDR_REG, 19 | ASM_ADDR_MEM_DIRECT, 20 | ASM_ADDR_JMP_RELATIVE, 21 | ASM_ADDR_REG_INDIRECT, 22 | ASM_ADDR_INDEX_SCALE, 23 | ASM_ADDR_BASE_INDEX_SCALE, 24 | ASM_ADDR_REG_RELATIVE, 25 | ASM_ADDR_RELATIVE_BASE_INDEX, 26 | 27 | } AsmAddrMethod; 28 | 29 | typedef struct { 30 | AsmAddrMethod addr_method; 31 | OperandType optype; 32 | uint8_t reg; 33 | uint8_t index_reg; 34 | uint8_t real_scale; 35 | } CurrentOperand; 36 | 37 | typedef struct { 38 | uint8_t prefixes[4]; 39 | uint8_t opcode1, opcode2; 40 | const char *mnemonic; 41 | uint8_t opcount; 42 | CurrentOperand operand[4]; 43 | uint8_t effective_opsize; 44 | uint8_t effective_addrsize; 45 | uint8_t modrm; 46 | uint8_t is_modrm_decoded; 47 | uint8_t sib; 48 | uint8_t is_sib_decoded; 49 | struct { 50 | uint8_t size; 51 | union { 52 | int8_t disp8; 53 | int16_t disp16; 54 | int32_t disp32; 55 | }; 56 | } displacement; 57 | uint32_t imm; 58 | struct { 59 | uint8_t size; 60 | union { 61 | int8_t offset8; 62 | int16_t offset16; 63 | int32_t offset32; 64 | }; 65 | } relative_offset; 66 | } CurrentInst; 67 | 68 | typedef struct { 69 | char asm_buf[ASM_BUFSIZE]; 70 | uint8_t asm_buf_size; 71 | uint32_t curr_inst_offset; 72 | } Disassembly; 73 | 74 | int disasm_byte_buf(unsigned char buf[], unsigned int bufsize, unsigned long int start_address); 75 | int disasm_pe_file(const char *file, unsigned int size, DWORD start_address); 76 | 77 | #endif //KALEIDOSCOPE_DISASSEMBLE_H 78 | -------------------------------------------------------------------------------- /instruction.c: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hx1997 on 2018/11/9. 3 | // 4 | 5 | #include "instruction.h" 6 | 7 | Inst standard_insts[] = { 8 | {0x00, "add", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_MODRM_GREG, OPR_BYTE}}, 9 | {0x01, "add", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_MODRM_GREG, OPR_WORD_DWORD}}, 10 | {0x02, "add", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_BYTE, ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 11 | {0x03, "add", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}}, 12 | {0x04, "add", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_UNSPECIFIED, ADDR_IMM, OPR_BYTE}}, 13 | {0x05, "add", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 14 | {0x06, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_ES, OPR_UNSPECIFIED}}, 15 | {0x07, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_ES, OPR_UNSPECIFIED}}, 16 | {0x08, "or", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_MODRM_GREG, OPR_BYTE}}, 17 | {0x09, "or", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_MODRM_GREG, OPR_WORD_DWORD}}, 18 | {0x0a, "or", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_BYTE, ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 19 | {0x0b, "or", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}}, 20 | {0x0c, "or", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_UNSPECIFIED, ADDR_IMM, OPR_BYTE}}, 21 | {0x0d, "or", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 22 | {0x0e, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_CS, OPR_UNSPECIFIED}}, 23 | {0x10, "adc", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_MODRM_GREG, OPR_BYTE}}, 24 | {0x11, "adc", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_MODRM_GREG, OPR_WORD_DWORD}}, 25 | {0x12, "adc", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_BYTE, ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 26 | {0x13, "adc", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}}, 27 | {0x14, "adc", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_UNSPECIFIED, ADDR_IMM, OPR_BYTE}}, 28 | {0x15, "adc", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 29 | {0x16, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_SS, OPR_UNSPECIFIED}}, 30 | {0x17, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_SS, OPR_UNSPECIFIED}}, 31 | {0x18, "sbb", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_MODRM_GREG, OPR_BYTE}}, 32 | {0x19, "sbb", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_MODRM_GREG, OPR_WORD_DWORD}}, 33 | {0x1a, "sbb", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_BYTE, ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 34 | {0x1b, "sbb", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}}, 35 | {0x1c, "sbb", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_UNSPECIFIED, ADDR_IMM, OPR_BYTE}}, 36 | {0x1d, "sbb", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 37 | {0x1e, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_DS, OPR_UNSPECIFIED}}, 38 | {0x1f, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_DS, OPR_UNSPECIFIED}}, 39 | {0x20, "and", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_MODRM_GREG, OPR_BYTE}}, 40 | {0x21, "and", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_MODRM_GREG, OPR_WORD_DWORD}}, 41 | {0x22, "and", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_BYTE, ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 42 | {0x23, "and", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}}, 43 | {0x24, "and", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_UNSPECIFIED, ADDR_IMM, OPR_BYTE}}, 44 | {0x25, "and", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 45 | {0x27, "daa", 0, BIT_WIDTH_UNSPECIFIED}, 46 | {0x28, "sub", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_MODRM_GREG, OPR_BYTE}}, 47 | {0x29, "sub", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_MODRM_GREG, OPR_WORD_DWORD}}, 48 | {0x2a, "sub", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_BYTE, ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 49 | {0x2b, "sub", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}}, 50 | {0x2c, "sub", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_UNSPECIFIED, ADDR_IMM, OPR_BYTE}}, 51 | {0x2d, "sub", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 52 | {0x2f, "das", 0, BIT_WIDTH_UNSPECIFIED}, 53 | {0x30, "xor", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_MODRM_GREG, OPR_BYTE}}, 54 | {0x31, "xor", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_MODRM_GREG, OPR_WORD_DWORD}}, 55 | {0x32, "xor", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_BYTE, ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 56 | {0x33, "xor", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}}, 57 | {0x34, "xor", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_UNSPECIFIED, ADDR_IMM, OPR_BYTE}}, 58 | {0x35, "xor", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 59 | {0x37, "aaa", 0, BIT_WIDTH_UNSPECIFIED}, 60 | {0x38, "cmp", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_MODRM_GREG, OPR_BYTE}}, 61 | {0x39, "cmp", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_MODRM_GREG, OPR_WORD_DWORD}}, 62 | {0x3a, "cmp", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_BYTE, ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 63 | {0x3b, "cmp", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}}, 64 | {0x3c, "cmp", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_UNSPECIFIED, ADDR_IMM, OPR_BYTE}}, 65 | {0x3d, "cmp", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 66 | {0x3f, "aas", 0, BIT_WIDTH_UNSPECIFIED}, 67 | {0x40, "inc", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD}}, 68 | {0x41, "inc", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_CX, OPR_WORD_DWORD}}, 69 | {0x42, "inc", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_DX, OPR_WORD_DWORD}}, 70 | {0x43, "inc", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_BX, OPR_WORD_DWORD}}, 71 | {0x44, "inc", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_SP, OPR_WORD_DWORD}}, 72 | {0x45, "inc", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_BP, OPR_WORD_DWORD}}, 73 | {0x46, "inc", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_SI, OPR_WORD_DWORD}}, 74 | {0x47, "inc", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_DI, OPR_WORD_DWORD}}, 75 | {0x48, "dec", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD}}, 76 | {0x49, "dec", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_CX, OPR_WORD_DWORD}}, 77 | {0x4a, "dec", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_DX, OPR_WORD_DWORD}}, 78 | {0x4b, "dec", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_BX, OPR_WORD_DWORD}}, 79 | {0x4c, "dec", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_SP, OPR_WORD_DWORD}}, 80 | {0x4d, "dec", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_BP, OPR_WORD_DWORD}}, 81 | {0x4e, "dec", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_SI, OPR_WORD_DWORD}}, 82 | {0x4f, "dec", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_DI, OPR_WORD_DWORD}}, 83 | {0x50, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD}}, 84 | {0x51, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_CX, OPR_WORD_DWORD}}, 85 | {0x52, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_DX, OPR_WORD_DWORD}}, 86 | {0x53, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_BX, OPR_WORD_DWORD}}, 87 | {0x54, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_SP, OPR_WORD_DWORD}}, 88 | {0x55, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_BP, OPR_WORD_DWORD}}, 89 | {0x56, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_SI, OPR_WORD_DWORD}}, 90 | {0x57, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_DI, OPR_WORD_DWORD}}, 91 | {0x58, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD}}, 92 | {0x59, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_CX, OPR_WORD_DWORD}}, 93 | {0x5a, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_DX, OPR_WORD_DWORD}}, 94 | {0x5b, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_BX, OPR_WORD_DWORD}}, 95 | {0x5c, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_SP, OPR_WORD_DWORD}}, 96 | {0x5d, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_BP, OPR_WORD_DWORD}}, 97 | {0x5e, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_SI, OPR_WORD_DWORD}}, 98 | {0x5f, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_DI, OPR_WORD_DWORD}}, 99 | {0x60, "pushaw", 0, BIT_WIDTH_16}, 100 | {0x60, "pusha", 0, BIT_WIDTH_UNSPECIFIED}, 101 | {0x60, "pushad", 0, BIT_WIDTH_32}, 102 | {0x61, "popaw", 0, BIT_WIDTH_16}, 103 | {0x61, "popa", 0, BIT_WIDTH_UNSPECIFIED}, 104 | {0x61, "popad", 0, BIT_WIDTH_32}, 105 | {0x63, "arpl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD, ADDR_MODRM_GREG, OPR_WORD}}, 106 | {0x68, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_IMM, OPR_WORD_DWORD}}, 107 | {0x69, "imul", 3, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 108 | {0x6a, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_IMM, OPR_BYTE}}, 109 | {0x6b, "imul", 3, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}}, 110 | {0x6c, "insb", 0, BIT_WIDTH_UNSPECIFIED}, 111 | {0x6d, "insw", 0, BIT_WIDTH_16}, 112 | {0x6d, "insd", 0, BIT_WIDTH_32}, 113 | {0x6e, "outsb", 0, BIT_WIDTH_UNSPECIFIED}, 114 | {0x6f, "outsw", 0, BIT_WIDTH_16}, 115 | {0x6f, "outsd", 0, BIT_WIDTH_32}, 116 | {0x70, "jo", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 117 | {0x71, "jno", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 118 | {0x72, "jb", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 119 | {0x73, "jnb", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 120 | {0x74, "jz", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 121 | {0x75, "jnz", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 122 | {0x76, "jbe", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 123 | {0x77, "ja", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 124 | {0x78, "js", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 125 | {0x79, "jns", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 126 | {0x7a, "jp", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 127 | {0x7b, "jnp", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 128 | {0x7c, "jl", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 129 | {0x7d, "jnl", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 130 | {0x7e, "jle", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 131 | {0x7f, "jnle", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 132 | {0x84, "test", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_MODRM_GREG, OPR_BYTE}}, 133 | {0x85, "test", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_MODRM_GREG, OPR_WORD_DWORD}}, 134 | {0x86, "xchg", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_MODRM_GREG, OPR_BYTE}}, 135 | {0x87, "xchg", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_MODRM_GREG, OPR_WORD_DWORD}}, 136 | {0x88, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_MODRM_GREG, OPR_BYTE}}, 137 | {0x89, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_MODRM_GREG, OPR_WORD_DWORD}}, 138 | {0x8a, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_BYTE, ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 139 | {0x8b, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}}, 140 | {0x8c, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD, ADDR_MODRM_SREG, OPR_WORD}}, 141 | {0x8d, "lea", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_MEM, OPR_WORD_DWORD}}, 142 | {0x8e, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_SREG, OPR_WORD, ADDR_MODRM_GPREG_MEM, OPR_WORD}}, 143 | {0x8f, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}}, 144 | {0x90, "nop", 0, BIT_WIDTH_UNSPECIFIED}, 145 | {0x91, "xchg", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_UNSPECIFIED, ADDR_CX, OPR_UNSPECIFIED}}, 146 | {0x92, "xchg", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_UNSPECIFIED, ADDR_DX, OPR_UNSPECIFIED}}, 147 | {0x93, "xchg", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_UNSPECIFIED, ADDR_BX, OPR_UNSPECIFIED}}, 148 | {0x94, "xchg", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_UNSPECIFIED, ADDR_SP, OPR_UNSPECIFIED}}, 149 | {0x95, "xchg", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_UNSPECIFIED, ADDR_BP, OPR_UNSPECIFIED}}, 150 | {0x96, "xchg", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_UNSPECIFIED, ADDR_SI, OPR_UNSPECIFIED}}, 151 | {0x97, "xchg", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_UNSPECIFIED, ADDR_DI, OPR_UNSPECIFIED}}, 152 | {0x98, "cbw", 0, BIT_WIDTH_16}, 153 | {0x98, "cwde", 0, BIT_WIDTH_32 | BIT_WIDTH_64}, 154 | {0x99, "cwd", 0, BIT_WIDTH_16}, 155 | {0x99, "cdq", 0, BIT_WIDTH_32 | BIT_WIDTH_64}, 156 | {0x9b, "wait", 0, BIT_WIDTH_UNSPECIFIED}, 157 | {0x9c, "pushfw", 0, BIT_WIDTH_16}, 158 | {0x9c, "pushf", 0, BIT_WIDTH_UNSPECIFIED}, 159 | {0x9c, "pushfd", 0, BIT_WIDTH_32}, 160 | {0x9d, "popfw", 0, BIT_WIDTH_16}, 161 | {0x9d, "popf", 0, BIT_WIDTH_UNSPECIFIED}, 162 | {0x9d, "popfd", 0, BIT_WIDTH_32}, 163 | {0x9e, "sahf", 0, BIT_WIDTH_UNSPECIFIED}, 164 | {0x9f, "lahf", 0, BIT_WIDTH_UNSPECIFIED}, 165 | {0xa0, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_BYTE, ADDR_DIRECT_OFFSET, OPR_BYTE}}, 166 | {0xa1, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_DIRECT_OFFSET, OPR_WORD_DWORD}}, 167 | {0xa2, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_DIRECT_OFFSET, OPR_BYTE, ADDR_AL, OPR_BYTE}}, 168 | {0xa3, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_DIRECT_OFFSET, OPR_WORD_DWORD, ADDR_AX, OPR_WORD_DWORD}}, 169 | {0xa4, "movsb", 0, BIT_WIDTH_UNSPECIFIED}, 170 | {0xa5, "movsw", 0, BIT_WIDTH_16}, 171 | {0xa5, "movsd", 0, BIT_WIDTH_32}, 172 | {0xa6, "cmpsb", 0, BIT_WIDTH_UNSPECIFIED}, 173 | {0xa7, "cmpsw", 0, BIT_WIDTH_16}, 174 | {0xa7, "cmpsd", 0, BIT_WIDTH_32}, 175 | {0xa8, "test", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_UNSPECIFIED, ADDR_IMM, OPR_BYTE}}, 176 | {0xa9, "test", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 177 | {0xaa, "stosb", 0, BIT_WIDTH_UNSPECIFIED}, 178 | {0xab, "stosw", 0, BIT_WIDTH_16}, 179 | {0xab, "stosd", 0, BIT_WIDTH_32}, 180 | {0xac, "lodsb", 0, BIT_WIDTH_UNSPECIFIED}, 181 | {0xad, "lodsw", 0, BIT_WIDTH_16}, 182 | {0xad, "lodsd", 0, BIT_WIDTH_32}, 183 | {0xae, "scasb", 0, BIT_WIDTH_UNSPECIFIED}, 184 | {0xaf, "scasw", 0, BIT_WIDTH_16}, 185 | {0xaf, "scasd", 0, BIT_WIDTH_32}, 186 | {0xb0, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_BYTE, ADDR_IMM, OPR_BYTE}}, 187 | {0xb1, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_CL, OPR_BYTE, ADDR_IMM, OPR_BYTE}}, 188 | {0xb2, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_DL, OPR_BYTE, ADDR_IMM, OPR_BYTE}}, 189 | {0xb3, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_BL, OPR_BYTE, ADDR_IMM, OPR_BYTE}}, 190 | {0xb4, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AH, OPR_BYTE, ADDR_IMM, OPR_BYTE}}, 191 | {0xb5, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_CH, OPR_BYTE, ADDR_IMM, OPR_BYTE}}, 192 | {0xb6, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_DH, OPR_BYTE, ADDR_IMM, OPR_BYTE}}, 193 | {0xb7, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_BH, OPR_BYTE, ADDR_IMM, OPR_BYTE}}, 194 | {0xb8, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 195 | {0xb9, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_CX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 196 | {0xba, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_DX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 197 | {0xbb, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_BX, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 198 | {0xbc, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_SP, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 199 | {0xbd, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_BP, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 200 | {0xbe, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_SI, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 201 | {0xbf, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_DI, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 202 | {0xc2, "ret", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_IMM, OPR_WORD}}, 203 | {0xc3, "ret", 0, BIT_WIDTH_UNSPECIFIED}, 204 | {0xc6, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}}, 205 | {0xc7, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}}, 206 | {0xc8, "enter", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_IMM, OPR_WORD, ADDR_IMM, OPR_BYTE}}, 207 | {0xc9, "leave", 0, BIT_WIDTH_UNSPECIFIED}, 208 | {0xca, "retf", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_IMM, OPR_WORD}}, 209 | {0xcb, "retf", 0, BIT_WIDTH_UNSPECIFIED}, 210 | {0xcc, "int 3", 0, BIT_WIDTH_UNSPECIFIED}, 211 | {0xcd, "int", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_IMM, OPR_BYTE}}, 212 | {0xce, "into", 0, BIT_WIDTH_UNSPECIFIED}, 213 | {0xcf, "iretw", 0, BIT_WIDTH_16}, 214 | {0xcf, "iret", 0, BIT_WIDTH_UNSPECIFIED}, 215 | {0xcf, "iretd", 0, BIT_WIDTH_32}, 216 | {0xd4, "aam", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_IMM, OPR_BYTE}}, 217 | {0xd5, "aad", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_IMM, OPR_BYTE}}, 218 | {0xd6, "salc", 0, BIT_WIDTH_UNSPECIFIED}, 219 | {0xd7, "xlat", 0, BIT_WIDTH_UNSPECIFIED}, 220 | {0xe0, "loopnz", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 221 | {0xe1, "loopz", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 222 | {0xe2, "loop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 223 | {0xe3, "jcxz", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 224 | {0xe4, "in", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_UNSPECIFIED, ADDR_IMM, OPR_BYTE}}, 225 | {0xe5, "in", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}}, 226 | {0xe6, "out", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_IMM, OPR_BYTE, ADDR_AL, OPR_UNSPECIFIED}}, 227 | {0xe7, "out", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_IMM, OPR_BYTE, ADDR_AX, OPR_WORD_DWORD}}, 228 | {0xe8, "call", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 229 | {0xe9, "jmp", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 230 | {0xeb, "jmp", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_BYTE}}, 231 | {0xec, "in", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_UNSPECIFIED, ADDR_DX, OPR_UNSPECIFIED}}, 232 | {0xed, "in", 2, BIT_WIDTH_16, {ADDR_AX, OPR_UNSPECIFIED, ADDR_DX, OPR_UNSPECIFIED}}, 233 | {0xed, "in", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_DX, OPR_UNSPECIFIED}}, 234 | {0xee, "out", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_DX, OPR_UNSPECIFIED, ADDR_AL, OPR_UNSPECIFIED}}, 235 | {0xef, "out", 2, BIT_WIDTH_16, {ADDR_DX, OPR_UNSPECIFIED, ADDR_AX, OPR_UNSPECIFIED}}, 236 | {0xef, "out", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_DX, OPR_WORD, ADDR_AX, OPR_WORD_DWORD}}, 237 | {0xf1, "int1", 0, BIT_WIDTH_UNSPECIFIED}, 238 | {0xf4, "hlt", 0, BIT_WIDTH_UNSPECIFIED}, 239 | {0xf5, "cmc", 0, BIT_WIDTH_UNSPECIFIED}, 240 | {0xf8, "clc", 0, BIT_WIDTH_UNSPECIFIED}, 241 | {0xf9, "stc", 0, BIT_WIDTH_UNSPECIFIED}, 242 | {0xfa, "cli", 0, BIT_WIDTH_UNSPECIFIED}, 243 | {0xfb, "sti", 0, BIT_WIDTH_UNSPECIFIED}, 244 | {0xfc, "cld", 0, BIT_WIDTH_UNSPECIFIED}, 245 | {0xfd, "std", 0, BIT_WIDTH_UNSPECIFIED}, 246 | {0, 0, 0, 0, 0} 247 | }; 248 | 249 | Inst extended_insts[] = { 250 | {0x02, "lar", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD, ADDR_MODRM_GPREG_MEM, OPR_WORD}}, 251 | {0x03, "lsl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD}}, 252 | {0x06, "clts", 0, BIT_WIDTH_UNSPECIFIED}, 253 | {0x20, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_MOD_GREG_ONLY, OPR_DWORD, ADDR_CONTROL_REG, OPR_DWORD}}, 254 | {0x21, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_MOD_GREG_ONLY, OPR_DWORD, ADDR_DEBUG_REG, OPR_DWORD}}, 255 | {0x22, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_CONTROL_REG, OPR_DWORD, ADDR_MODRM_MOD_GREG_ONLY, OPR_DWORD}}, 256 | {0x23, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_DEBUG_REG, OPR_DWORD, ADDR_MODRM_MOD_GREG_ONLY, OPR_DWORD}}, 257 | {0x24, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_MOD_GREG_ONLY, OPR_DWORD, ADDR_MODRM_TREG, OPR_DWORD}}, 258 | {0x26, "mov", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_TREG, OPR_DWORD, ADDR_MODRM_MOD_GREG_ONLY, OPR_DWORD}}, 259 | {0x80, "jo", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 260 | {0x81, "jno", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 261 | {0x82, "jb", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 262 | {0x83, "jnb", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 263 | {0x84, "jz", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 264 | {0x85, "jnz", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 265 | {0x86, "jbe", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 266 | {0x87, "jnbe", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 267 | {0x88, "js", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 268 | {0x89, "jns", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 269 | {0x8a, "jp", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 270 | {0x8b, "jnp", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 271 | {0x8c, "jl", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 272 | {0x8d, "jnl", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 273 | {0x8e, "jle", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 274 | {0x8f, "jnle", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_RELATIVE, OPR_WORD_DWORD}}, 275 | {0x90, "seto", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 276 | {0x91, "setno", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 277 | {0x92, "setb", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 278 | {0x93, "setnb", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 279 | {0x94, "setz", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 280 | {0x95, "setnz", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 281 | {0x96, "setbe", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 282 | {0x97, "setnbe", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 283 | {0x98, "sets", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 284 | {0x99, "setns", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 285 | {0x9a, "setp", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 286 | {0x9b, "setnp", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 287 | {0x9c, "setl", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 288 | {0x9d, "setnl", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 289 | {0x9e, "setle", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 290 | {0x9f, "setnle", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 291 | {0xa0, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_FS, OPR_WORD_DWORD}}, 292 | {0xa1, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_FS, OPR_WORD_DWORD}}, 293 | {0xa8, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_GS, OPR_WORD_DWORD}}, 294 | {0xa9, "pop", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_GS, OPR_WORD_DWORD}}, 295 | {0xaf, "imul", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}}, 296 | {0xb6, "movzx", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 297 | {0xb7, "movzx", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD}}, 298 | {0xbe, "movsx", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_BYTE}}, 299 | {0xbf, "movsx", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GREG, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_WORD}}, 300 | {0, 0, 0, 0, 0} 301 | }; 302 | 303 | ExtendedGroupInst extended_group_insts[] = { 304 | {0x80, "add", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 0}, 305 | {0x80, "or", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 1}, 306 | {0x80, "adc", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 2}, 307 | {0x80, "sbb", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 3}, 308 | {0x80, "and", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 4}, 309 | {0x80, "sub", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 5}, 310 | {0x80, "xor", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 6}, 311 | {0x80, "cmp", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 7}, 312 | {0x81, "add", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}, 0}, 313 | {0x81, "or", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}, 1}, 314 | {0x81, "adc", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}, 2}, 315 | {0x81, "sbb", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}, 3}, 316 | {0x81, "and", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}, 4}, 317 | {0x81, "sub", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}, 5}, 318 | {0x81, "xor", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}, 6}, 319 | {0x81, "cmp", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}, 7}, 320 | {0x82, "add", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 0}, 321 | {0x82, "or", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 1}, 322 | {0x82, "adc", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 2}, 323 | {0x82, "sbb", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 3}, 324 | {0x82, "and", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 4}, 325 | {0x82, "sub", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 5}, 326 | {0x82, "xor", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 6}, 327 | {0x82, "cmp", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 7}, 328 | {0x83, "add", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 0}, 329 | {0x83, "or", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 1}, 330 | {0x83, "adc", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 2}, 331 | {0x83, "sbb", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 3}, 332 | {0x83, "and", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 4}, 333 | {0x83, "sub", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 5}, 334 | {0x83, "xor", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 6}, 335 | {0x83, "cmp", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 7}, 336 | {0xc0, "rol", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 0}, 337 | {0xc0, "ror", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 1}, 338 | {0xc0, "rcl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 2}, 339 | {0xc0, "rcr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 3}, 340 | {0xc0, "shl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 4}, 341 | {0xc0, "shr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 5}, 342 | {0xc0, "sal", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 6}, 343 | {0xc0, "sar", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 7}, 344 | {0xc1, "rol", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 0}, 345 | {0xc1, "ror", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 1}, 346 | {0xc1, "rcl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 2}, 347 | {0xc1, "rcr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 3}, 348 | {0xc1, "shl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 4}, 349 | {0xc1, "shr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 5}, 350 | {0xc1, "sal", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 6}, 351 | {0xc1, "sar", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_BYTE}, 7}, 352 | {0xd0, "rol", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_1, OPR_BYTE}, 0}, 353 | {0xd0, "ror", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_1, OPR_BYTE}, 1}, 354 | {0xd0, "rcl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_1, OPR_BYTE}, 2}, 355 | {0xd0, "rcr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_1, OPR_BYTE}, 3}, 356 | {0xd0, "shl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_1, OPR_BYTE}, 4}, 357 | {0xd0, "shr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_1, OPR_BYTE}, 5}, 358 | {0xd0, "sal", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_1, OPR_BYTE}, 6}, 359 | {0xd0, "sar", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_1, OPR_BYTE}, 7}, 360 | {0xd1, "rol", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_1, OPR_BYTE}, 0}, 361 | {0xd1, "ror", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_1, OPR_BYTE}, 1}, 362 | {0xd1, "rcl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_1, OPR_BYTE}, 2}, 363 | {0xd1, "rcr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_1, OPR_BYTE}, 3}, 364 | {0xd1, "shl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_1, OPR_BYTE}, 4}, 365 | {0xd1, "shr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_1, OPR_BYTE}, 5}, 366 | {0xd1, "sal", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_1, OPR_BYTE}, 6}, 367 | {0xd1, "sar", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_1, OPR_BYTE}, 7}, 368 | {0xd2, "rol", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_CL, OPR_BYTE}, 0}, 369 | {0xd2, "ror", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_CL, OPR_BYTE}, 1}, 370 | {0xd2, "rcl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_CL, OPR_BYTE}, 2}, 371 | {0xd2, "rcr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_CL, OPR_BYTE}, 3}, 372 | {0xd2, "shl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_CL, OPR_BYTE}, 4}, 373 | {0xd2, "shr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_CL, OPR_BYTE}, 5}, 374 | {0xd2, "sal", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_CL, OPR_BYTE}, 6}, 375 | {0xd2, "sar", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_CL, OPR_BYTE}, 7}, 376 | {0xd3, "rol", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_CL, OPR_BYTE}, 0}, 377 | {0xd3, "ror", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_CL, OPR_BYTE}, 1}, 378 | {0xd3, "rcl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_CL, OPR_BYTE}, 2}, 379 | {0xd3, "rcr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_CL, OPR_BYTE}, 3}, 380 | {0xd3, "shl", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_CL, OPR_BYTE}, 4}, 381 | {0xd3, "shr", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_CL, OPR_BYTE}, 5}, 382 | {0xd3, "sal", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_CL, OPR_BYTE}, 6}, 383 | {0xd3, "sar", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_CL, OPR_BYTE}, 7}, 384 | {0xf6, "test", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 0}, 385 | {0xf6, "test", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE, ADDR_IMM, OPR_BYTE}, 1}, 386 | {0xf6, "not", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 2}, 387 | {0xf6, "neg", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 3}, 388 | {0xf6, "mul", 3, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_AL, OPR_BYTE, ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 4}, 389 | {0xf6, "imul", 3, BIT_WIDTH_UNSPECIFIED, {ADDR_AX, OPR_WORD_DWORD, ADDR_AL, OPR_BYTE, ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 5}, 390 | {0xf6, "div", 4, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_BYTE, ADDR_AH, OPR_BYTE, ADDR_AX, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 6}, 391 | {0xf6, "idiv", 4, BIT_WIDTH_UNSPECIFIED, {ADDR_AL, OPR_BYTE, ADDR_AH, OPR_BYTE, ADDR_AX, OPR_WORD_DWORD, ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 7}, 392 | {0xf7, "test", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}, 0}, 393 | {0xf7, "test", 2, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD, ADDR_IMM, OPR_WORD_DWORD}, 1}, 394 | {0xf7, "not", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}, 2}, 395 | {0xf7, "neg", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}, 3}, 396 | {0xf7, "mul", 3, BIT_WIDTH_UNSPECIFIED, {ADDR_DX, OPR_UNSPECIFIED, ADDR_AX, OPR_UNSPECIFIED, ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 4}, 397 | {0xf7, "imul", 3, BIT_WIDTH_UNSPECIFIED, {ADDR_DX, OPR_UNSPECIFIED, ADDR_AX, OPR_UNSPECIFIED, ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 5}, 398 | {0xf7, "div", 3, BIT_WIDTH_UNSPECIFIED, {ADDR_DX, OPR_UNSPECIFIED, ADDR_AX, OPR_UNSPECIFIED, ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 6}, 399 | {0xf7, "idiv", 3, BIT_WIDTH_UNSPECIFIED, {ADDR_DX, OPR_UNSPECIFIED, ADDR_AX, OPR_UNSPECIFIED, ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 7}, 400 | {0xfe, "inc", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 0}, 401 | {0xfe, "dec", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_BYTE}, 1}, 402 | {0xff, "inc", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}, 0}, 403 | {0xff, "dec", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}, 1}, 404 | {0xff, "call", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}, 2}, 405 | //{0xff, "callf", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}, 3}, 406 | {0xff, "jmp", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}, 4}, 407 | //{0xff, "jmpf", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}, 5}, 408 | {0xff, "push", 1, BIT_WIDTH_UNSPECIFIED, {ADDR_MODRM_GPREG_MEM, OPR_WORD_DWORD}, 6}, 409 | {0, 0, 0, 0, 0} 410 | }; 411 | 412 | int standard_insts_len = sizeof(standard_insts) / sizeof(standard_insts[0]); 413 | int extended_insts_len = sizeof(extended_insts) / sizeof(extended_insts[0]); 414 | int extended_group_insts_len = sizeof(extended_group_insts) / sizeof(extended_group_insts[0]); 415 | 416 | const char *regname[] = { 417 | "al", "cl", "dl", "bl", "ah", "ch", "dh", "bh", // 8-bit general registers 418 | "ax", "cx", "dx", "bx", // 16-bit general registers 419 | "sp", "bp", "si", "di", // 16-bit pointer/index registers 420 | "eax", "ecx", "edx", "ebx", // 32-bit general registers 421 | "esp", "ebp", "esi", "edi", // 32-bit pointer/index registers 422 | "es", "cs", "ss", "ds", "fs", "gs", // segment registers 423 | "cr0", "cr1", "cr2", "cr3", "cr4", "cr5", "cr6", "cr7", // control registers 424 | "dr0", "dr1", "dr2", "dr3", "dr4", "dr5", "dr6", "dr7", // debug registers 425 | "tr0", "tr1", "tr2", "tr3", "tr4", "tr5", "tr6", "tr7", // test registers 426 | }; -------------------------------------------------------------------------------- /instruction.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hx1997 on 2018/8/19. 3 | // 4 | 5 | #ifndef KALEIDOSCOPE_INSTRUCTION_H 6 | #define KALEIDOSCOPE_INSTRUCTION_H 7 | 8 | #include 9 | #include "config.h" 10 | 11 | // operand addressing methods, the MSB indicates ModR/M 12 | // http://sparksandflames.com/files/x86InstructionChart.html 13 | typedef enum { 14 | ADDR_1, // for instructions like "rol bx, 1" where 1 is a fixed number 15 | ADDR_DIRECT_ADDRESS, 16 | ADDR_CONTROL_REG, 17 | ADDR_DEBUG_REG, 18 | ADDR_EFLAGS, 19 | ADDR_IMM, 20 | ADDR_RELATIVE, 21 | ADDR_DIRECT_OFFSET, 22 | ADDR_MEM_DS, 23 | ADDR_MEM_ES, 24 | ADDR_AL, 25 | ADDR_CL, 26 | ADDR_DL, 27 | ADDR_BL, 28 | ADDR_AH, 29 | ADDR_CH, 30 | ADDR_DH, 31 | ADDR_BH, 32 | ADDR_AX, 33 | ADDR_CX, 34 | ADDR_DX, 35 | ADDR_BX, 36 | ADDR_SP, 37 | ADDR_BP, 38 | ADDR_SI, 39 | ADDR_DI, 40 | ADDR_EAX, 41 | ADDR_ECX, 42 | ADDR_EDX, 43 | ADDR_EBX, 44 | ADDR_ESP, 45 | ADDR_EBP, 46 | ADDR_ESI, 47 | ADDR_EDI, 48 | ADDR_ES, 49 | ADDR_CS, 50 | ADDR_SS, 51 | ADDR_DS, 52 | ADDR_FS, 53 | ADDR_GS, 54 | ADDR_CR0, 55 | ADDR_CR1, 56 | ADDR_CR2, 57 | ADDR_CR3, 58 | ADDR_CR4, 59 | ADDR_CR5, 60 | ADDR_CR6, 61 | ADDR_CR7, 62 | ADDR_DR0, 63 | ADDR_DR1, 64 | ADDR_DR2, 65 | ADDR_DR3, 66 | ADDR_DR4, 67 | ADDR_DR5, 68 | ADDR_DR6, 69 | ADDR_DR7, 70 | ADDR_TR0, 71 | ADDR_TR1, 72 | ADDR_TR2, 73 | ADDR_TR3, 74 | ADDR_TR4, 75 | ADDR_TR5, 76 | ADDR_TR6, 77 | ADDR_TR7, 78 | ADDR_MODRM_GPREG_MEM = 0x80, 79 | ADDR_MODRM_GREG, 80 | ADDR_MODRM_MEM, 81 | ADDR_MODRM_MMXREG, 82 | ADDR_MODRM_MMXREG_MEM, 83 | ADDR_MODRM_MOD_GREG_ONLY, 84 | ADDR_MODRM_SREG, 85 | ADDR_MODRM_TREG, 86 | ADDR_MODRM_FPREG, 87 | ADDR_MODRM_FPREG_MEM, 88 | } AddressingMethod; 89 | 90 | #define ADDR_16BIT_REGISTER (ADDR_AX) 91 | 92 | // operand type 93 | // http://sparksandflames.com/files/x86InstructionChart.html 94 | typedef enum { 95 | OPR_UNSPECIFIED, 96 | OPR_BOUND, 97 | OPR_BYTE, 98 | OPR_BYTE_WORD, 99 | OPR_DWORD, 100 | OPR_DQWORD, 101 | OPR_32_48_PTR, 102 | OPR_QWORD_MMXREG, 103 | OPR_PS_FP, 104 | OPR_SS_FP, 105 | OPR_QWORD, 106 | OPR_PSEUDO_DESCRIPTOR, 107 | OPR_DWORD_REG, 108 | OPR_WORD_DWORD, 109 | OPR_WORD, 110 | } OperandType; 111 | 112 | typedef struct { 113 | AddressingMethod addr_method; 114 | OperandType optype; 115 | } OperandInfo; 116 | 117 | typedef struct { 118 | const char *mnemonic; 119 | uint8_t opcount; 120 | uint8_t opsize; 121 | OperandInfo opinfo[4]; 122 | } InstInfo; 123 | 124 | typedef struct { 125 | uint8_t opcode; 126 | InstInfo info; 127 | } Inst; 128 | 129 | typedef struct { 130 | uint8_t opcode; 131 | InstInfo info; 132 | uint8_t opcode_ex; 133 | } ExtendedGroupInst; 134 | 135 | extern Inst standard_insts[]; 136 | extern Inst extended_insts[]; 137 | extern ExtendedGroupInst extended_group_insts[]; 138 | extern int standard_insts_len; 139 | extern int extended_insts_len; 140 | extern int extended_group_insts_len; 141 | extern const char *regname[]; 142 | 143 | #endif //KALEIDOSCOPE_INSTRUCTION_H 144 | -------------------------------------------------------------------------------- /main.c: -------------------------------------------------------------------------------- 1 | #include 2 | #include "disassemble.h" 3 | #include "config.h" 4 | 5 | int main(int argc, char **argv) { 6 | cf.mode_bitwidth = BIT_WIDTH_32; 7 | 8 | conf_parse_args(argc, argv); 9 | 10 | disasm_pe_file(cf.disasm_file, cf.size_to_disasm, cf.start_address); 11 | return 0; 12 | } -------------------------------------------------------------------------------- /peparser.c: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hx1997 on 2019/2/9. 3 | // 4 | 5 | #include 6 | #include 7 | 8 | int parse_pe(FILE *fp, PIMAGE_OPTIONAL_HEADER32 ptr_opt_header, PIMAGE_SECTION_HEADER ptr_code_sect_header) { 9 | // read and check DOS header 10 | IMAGE_DOS_HEADER dos_header = {0}; 11 | fseek(fp, 0, SEEK_SET); 12 | fread(&dos_header, sizeof(dos_header), 1, fp); 13 | 14 | if (dos_header.e_magic != IMAGE_DOS_SIGNATURE) { 15 | fprintf(stderr, "parse_pe(): input file not a valid PE!"); 16 | return -1; 17 | } 18 | 19 | // check NT signature 20 | DWORD nt_signature = 0; 21 | fseek(fp, dos_header.e_lfanew, SEEK_SET); 22 | fread(&nt_signature, sizeof(DWORD), 1, fp); 23 | 24 | if (nt_signature != IMAGE_NT_SIGNATURE) { 25 | fprintf(stderr, "parse_pe(): input file not a valid PE!"); 26 | return -1; 27 | } 28 | 29 | // read NT headers 30 | IMAGE_FILE_HEADER file_header = {0}; 31 | fread(&file_header, sizeof(file_header), 1, fp); 32 | fread(ptr_opt_header, sizeof(IMAGE_OPTIONAL_HEADER32), 1, fp); 33 | 34 | // find and read code section header 35 | for (int i = 0; i < file_header.NumberOfSections; i++) { 36 | fseek(fp, dos_header.e_lfanew + sizeof(IMAGE_NT_HEADERS32) + i * sizeof(IMAGE_SECTION_HEADER), SEEK_SET); 37 | fread(ptr_code_sect_header, sizeof(IMAGE_SECTION_HEADER), 1, fp); 38 | 39 | // is it the code section? 40 | if ((ptr_code_sect_header->Characteristics & IMAGE_SCN_CNT_CODE) 41 | && (ptr_code_sect_header->VirtualAddress == ptr_opt_header->BaseOfCode)) { 42 | return 0; 43 | } 44 | } 45 | 46 | fprintf(stderr, "parse_pe(): code section not found! Maybe the input file is not a valid PE!"); 47 | return -1; 48 | } 49 | 50 | DWORD raw_to_rva(DWORD raw_addr, PIMAGE_OPTIONAL_HEADER32 ptr_opt_header, PIMAGE_SECTION_HEADER ptr_sect_header) { 51 | return ptr_sect_header->VirtualAddress + ptr_opt_header->ImageBase + (raw_addr - ptr_sect_header->PointerToRawData); 52 | } 53 | 54 | DWORD get_pe_ep_addr(FILE *fp, DWORD *rva) { 55 | IMAGE_OPTIONAL_HEADER32 opt_header = {0}; 56 | IMAGE_SECTION_HEADER code_sect_header = {0}; 57 | 58 | if (parse_pe(fp, &opt_header, &code_sect_header) < 0) { 59 | return 0; 60 | } 61 | 62 | DWORD ep = code_sect_header.PointerToRawData + (opt_header.AddressOfEntryPoint - opt_header.BaseOfCode); 63 | *rva = raw_to_rva(ep, &opt_header, &code_sect_header); 64 | 65 | return ep; 66 | } -------------------------------------------------------------------------------- /peparser.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hx1997 on 2019/2/9. 3 | // 4 | 5 | #ifndef KALEIDOSCOPE_PEPARSER_H 6 | #define KALEIDOSCOPE_PEPARSER_H 7 | 8 | #include 9 | #include 10 | 11 | int parse_pe(FILE *fp, PIMAGE_OPTIONAL_HEADER32 ptr_opt_header, PIMAGE_SECTION_HEADER ptr_code_sect_header); 12 | DWORD raw_to_rva(DWORD raw_addr, PIMAGE_OPTIONAL_HEADER32 ptr_opt_header, PIMAGE_SECTION_HEADER ptr_sect_header); 13 | DWORD get_pe_ep_addr(FILE *fp, DWORD *rva); 14 | 15 | #endif //KALEIDOSCOPE_PEPARSER_H 16 | -------------------------------------------------------------------------------- /search.c: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hx1997 on 2019/2/11. 3 | // 4 | 5 | #include "search.h" 6 | 7 | int binary_search_lower(Inst insts[], int l, int r, int key) { 8 | int mid = l + (r - l) / 2; 9 | // if search scope has been narrowed down to nothing, 10 | // then either we have found the key, or the key does not exist in the array 11 | if (l > r) { 12 | if (insts[mid].opcode == key) 13 | return mid; 14 | else 15 | return -1; 16 | } 17 | 18 | if (insts[mid].opcode >= key) 19 | return binary_search_lower(insts, l, mid - 1, key); 20 | else 21 | return binary_search_lower(insts, mid + 1, r, key); 22 | } -------------------------------------------------------------------------------- /search.h: -------------------------------------------------------------------------------- 1 | // 2 | // Created by hx1997 on 2019/2/11. 3 | // 4 | 5 | #ifndef KALEIDOSCOPE_SEARCH_H 6 | #define KALEIDOSCOPE_SEARCH_H 7 | 8 | #include "instruction.h" 9 | 10 | int binary_search_lower(Inst insts[], int l, int r, int key); 11 | 12 | #endif //KALEIDOSCOPE_SEARCH_H 13 | --------------------------------------------------------------------------------