├── examples ├── mips_example ├── arm_example ├── x64_example └── x86_example ├── src ├── arch │ ├── arm │ │ ├── astrings.h │ │ ├── astrings.c │ │ ├── aload.h │ │ ├── arm.ins │ │ ├── aload.c │ │ ├── arm.h │ │ └── arm.c │ ├── mips │ │ ├── mstrings.h │ │ ├── mload.h │ │ ├── mstrings.c │ │ ├── mips.ins │ │ ├── mips.h │ │ ├── mload.c │ │ └── mips.c │ └── x86 │ │ ├── x86load.h │ │ ├── x86strings.h │ │ ├── x86asm.h │ │ ├── x86load.c │ │ ├── x86.h │ │ ├── x86strings.c │ │ ├── x86.ins │ │ ├── x86asm.c │ │ ├── x64.ins │ │ └── x86.c ├── gen │ ├── x86.sym │ ├── gen.h │ ├── Makefile │ └── gen.c ├── common │ ├── file.h │ ├── common.h │ ├── file.c │ ├── table.h │ ├── trie.h │ ├── table.c │ └── trie.c ├── spec │ ├── mips.spec │ └── x86.spec ├── sym.h ├── sym.c ├── lex.h ├── lex.c ├── disas.h ├── dis.h ├── dss.h ├── disas.c ├── main.c ├── dis.c └── dss.c ├── Makefile ├── .travis.yml ├── README.md └── LICENSE /examples/mips_example: -------------------------------------------------------------------------------- 1 | 04 00 02 24 05 00 02 24 25 40 40 00 04 00 02 24 05 00 02 24 25 48 40 00 20 40 09 01 04 00 02 24 2 | -------------------------------------------------------------------------------- /examples/arm_example: -------------------------------------------------------------------------------- 1 | 0d c0 a0 e1 10 d8 2d e9 04 b0 4c e2 24 d0 4d e2 28 00 0b e5 2c 10 0b e5 28 30 1b e5 01 00 53 e3 02 00 00 ca 2 | -------------------------------------------------------------------------------- /src/arch/arm/astrings.h: -------------------------------------------------------------------------------- 1 | #ifndef ARM_STRINGS_H 2 | #define ARM_STRINGS_H 3 | 4 | extern const char *arm_conditions[16]; 5 | extern const char *arm_registers[16]; 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/arch/mips/mstrings.h: -------------------------------------------------------------------------------- 1 | #ifndef MIPS_STRINGS_H 2 | #define MIPS_STRINGS_H 3 | 4 | extern const char *mips_registers[32]; 5 | extern const char *mips_fp_registers[32]; 6 | 7 | #endif 8 | -------------------------------------------------------------------------------- /src/gen/x86.sym: -------------------------------------------------------------------------------- 1 | "+" : plus 2 | "-" : minus 3 | "*" : mult 4 | "[" : rbrk 5 | "]" : lbrk 6 | "," : comma 7 | "byte" : byte 8 | "word" : word 9 | "dword" : dword 10 | "qword" : qword 11 | -------------------------------------------------------------------------------- /examples/x64_example: -------------------------------------------------------------------------------- 1 | 55 48 89 e5 48 83 ec 10 c7 45 f8 00 00 00 00 c7 45 fc 00 00 00 00 eb 23 8b 45 fc 2b 45 f8 89 c2 8b 4d f8 8b 45 fc 01 c1 8b 45 fc 89 c6 89 cf e8 b2 ff ff ff 01 45 f8 83 45 fc 01 83 7d fc 09 7e d7 b8 00 00 00 00 5d c3 2 | -------------------------------------------------------------------------------- /examples/x86_example: -------------------------------------------------------------------------------- 1 | 55 89 e5 83 ec 10 e8 44 00 00 00 05 ee 1a 00 00 c7 45 f8 00 00 00 00 c7 45 fc 00 00 00 00 eb 22 8b 45 fc 2b 45 f8 8b 4d f8 8b 55 fc 01 ca 50 ff 75 fc 52 e8 ae ff ff ff 83 c4 0c 01 45 f8 83 45 fc 01 83 7d fc 09 7e d8 b8 00 00 00 00 c9 c3 2 | -------------------------------------------------------------------------------- /src/common/file.h: -------------------------------------------------------------------------------- 1 | #ifndef FILE_H 2 | #define FILE_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | long ascii_to_hex(unsigned char *out, char *in, long len); 9 | int get_line(FILE * f, char *buf, long max); 10 | 11 | #endif 12 | -------------------------------------------------------------------------------- /src/arch/arm/astrings.c: -------------------------------------------------------------------------------- 1 | #include "astrings.h" 2 | 3 | const char *arm_conditions[16] = { 4 | "eq", "ne", "cs", "cc", 5 | "mi", "pl", "vs", "vc", 6 | "hi", "ls", "ge", "lt", 7 | "gt", "le", "al", "nv" 8 | }; 9 | 10 | const char *arm_registers[16] = { 11 | "r0", "r1", "r2", "r3", 12 | "r4", "r5", "r6", "r7", 13 | "r8", "r9", "r10", "fp", 14 | "ip", "sp", "lr", "pc", 15 | }; 16 | -------------------------------------------------------------------------------- /src/arch/arm/aload.h: -------------------------------------------------------------------------------- 1 | #ifndef ARM_LOAD_H 2 | #define ARM_LOAD_H 3 | 4 | #include 5 | #include 6 | #include "../../common/file.h" 7 | #include "../../common/trie.h" 8 | #include "../../common/table.h" 9 | 10 | #define MAX_MNEM_SIZE_ARM 128 11 | 12 | /*ARM Instruction Entry*/ 13 | struct arm_instr_entry { 14 | char mnemonic[MAX_MNEM_SIZE_ARM]; 15 | int instr_type; 16 | }; 17 | 18 | void arm_parse(struct trie_node *root, struct hash_table *table, int mode); 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /src/gen/gen.h: -------------------------------------------------------------------------------- 1 | #ifndef GEN_H 2 | #define GEN_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | 9 | /*Generates symbol tables from text files*/ 10 | 11 | int get_line(FILE *f, char *buf, long max); 12 | void print_type(FILE *hfile, FILE *cfile, char *name, char **types, int nt); 13 | void print_symt(FILE *header, FILE *cfile, char *name, char **symbols, int ns); 14 | void parse_symfiles(char *filename, FILE *header, FILE *cfile); 15 | 16 | #endif 17 | -------------------------------------------------------------------------------- /src/spec/mips.spec: -------------------------------------------------------------------------------- 1 | [sll] 2 | end 3 | [srl] 4 | end 5 | [jr] 6 | end 7 | [mfhi] 8 | end 9 | [mflo] 10 | end 11 | [mtlo] 12 | end 13 | [add] 0 14 | o: $w = $0, $r = $1, $r = $2 15 | end 16 | [addu] 0 17 | o: $w = $0, $r = $1, $r = $2 18 | end 19 | [addiu] 0 20 | o: $w = $0, $r = $1, $r = $2 21 | end 22 | [sub] 0 23 | o: $w = $0, $r = $1, $r = $2 24 | end 25 | [or] 1 26 | o: $w = $0, $r = $1, $r = $2 27 | end 28 | [subu] 0 29 | o: $w = $0, $r = $1, $r = $2 30 | end 31 | [mult] 0 32 | o: $r = $0, $r = $1 33 | end 34 | -------------------------------------------------------------------------------- /src/sym.h: -------------------------------------------------------------------------------- 1 | #ifndef SYM_H 2 | #define SYM_H 3 | 4 | /*Automatically Generated By gen.h&gen.c using all the .sym files*/ 5 | 6 | #define X86_IDX 0 7 | enum x86_types { 8 | t_x86_plus=2, 9 | t_x86_minus, 10 | t_x86_mult, 11 | t_x86_rbrk, 12 | t_x86_lbrk, 13 | t_x86_comma, 14 | t_x86_byte, 15 | t_x86_word, 16 | t_x86_dword, 17 | t_x86_qword 18 | }; 19 | extern const int x86_type[11]; 20 | extern const char *x86_sym[11]; 21 | extern const char **symbol_tt[1]; 22 | extern const int *symtype_tt[1]; 23 | 24 | #endif 25 | -------------------------------------------------------------------------------- /src/sym.c: -------------------------------------------------------------------------------- 1 | #include "sym.h" 2 | 3 | const int x86_type[] = { 4 | t_x86_plus, 5 | t_x86_minus, 6 | t_x86_mult, 7 | t_x86_rbrk, 8 | t_x86_lbrk, 9 | t_x86_comma, 10 | t_x86_byte, 11 | t_x86_word, 12 | t_x86_dword, 13 | t_x86_qword, 14 | 0 15 | }; 16 | const char *x86_sym[] = { 17 | "+", 18 | "-", 19 | "*", 20 | "[", 21 | "]", 22 | ",", 23 | "byte", 24 | "word", 25 | "dword", 26 | "qword", 27 | 0 28 | }; 29 | const char **symbol_tt[] = { 30 | (const char**)&x86_sym 31 | };const int *symtype_tt[] = { 32 | (const int*)&x86_type 33 | }; -------------------------------------------------------------------------------- /src/gen/Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | EXE = gen 3 | SOURCES = $(wildcard *.c) 4 | OBJS = $(SOURCES:.c=.o) 5 | UNAME_S := $(shell uname -s) 6 | 7 | ifeq ($(UNAME_S), Linux) #LINUX 8 | ECHO_MESSAGE = "Linux" 9 | CXXFLAGS = -ggdb3 -std=c99 -Wall -Wextra -pedantic 10 | CFLAGS = $(CXXFLAGS) 11 | endif 12 | 13 | .c.o: 14 | $(CC) $(CXXFLAGS) -c -o $@ $< 15 | 16 | all: $(EXE) 17 | @echo Build complete for $(ECHO_MESSAGE) 18 | 19 | $(EXE): $(OBJS) 20 | $(CC) -o $(EXE) $(OBJS) $(CXXFLAGS) $(LIBS) 21 | 22 | run: gen 23 | ./gen *.sym 24 | 25 | clean: 26 | rm $(EXE) $(OBJS) 27 | -------------------------------------------------------------------------------- /src/common/common.h: -------------------------------------------------------------------------------- 1 | #ifndef COMMON_H 2 | #define COMMON_H 3 | 4 | #include 5 | 6 | /*Typedefs to make code cleaner*/ 7 | typedef unsigned char u8; 8 | typedef uint16_t u16; 9 | typedef uint32_t u32; 10 | typedef uint64_t u64; 11 | 12 | /*Convenience Macros*/ 13 | #define CHECK_FLAG(byte, flag) (!!(byte & flag)) 14 | #define SET_FLAG(byte, flag) (byte |= flag) 15 | #define SIGNED(val) ((val>>(sizeof(val)*8-1))) 16 | #define SIGN(val) ((SIGNED(val)) ? -val : val) 17 | #define BITS(val, s, e) (((val) >> (s)) & ~((unsigned)-1 << ((e) - (s)))) 18 | 19 | 20 | #endif 21 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CC = gcc 2 | EXE = dynzasm 3 | SOURCES = $(wildcard src/*.c src/arch/*.c src/arch/arm/*.c src/arch/mips/*.c src/arch/x86/*.c src/common/*.c src/file/*.c) 4 | OBJS = $(SOURCES:.c=.o) 5 | UNAME_S := $(shell uname -s) 6 | 7 | ifeq ($(UNAME_S), Linux) #LINUX 8 | ECHO_MESSAGE = "Linux" 9 | CXXFLAGS = -std=c99 -Wall -Wextra -pedantic 10 | CFLAGS = $(CXXFLAGS) 11 | endif 12 | 13 | .c.o: 14 | $(CC) $(CXXFLAGS) -c -o $@ $< 15 | 16 | all: $(EXE) 17 | @echo Build complete for $(ECHO_MESSAGE) 18 | 19 | $(EXE): $(OBJS) 20 | $(CC) -o $(EXE) $(OBJS) $(CXXFLAGS) $(LIBS) 21 | 22 | clean: 23 | rm $(EXE) $(OBJS) 24 | -------------------------------------------------------------------------------- /src/arch/mips/mload.h: -------------------------------------------------------------------------------- 1 | #ifndef MIPS_LOAD_H 2 | #define MIPS_LOAD_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "../../common/file.h" 8 | #include "../../common/trie.h" 9 | #include "../../common/table.h" 10 | 11 | #define MAX_MNEM_SIZE_MIPS 12 12 | #ifndef MODE_32B 13 | #define MODE_32B 1 14 | #endif 15 | #ifndef MODE_64B 16 | #define MODE_64B 2 17 | #endif 18 | 19 | /*MIPS Instruction Entry*/ 20 | struct mips_instr_entry { 21 | char mnemonic[MAX_MNEM_SIZE_MIPS]; 22 | char instr_type; 23 | }; 24 | 25 | void mips_parse(struct trie_node *root, struct hash_table *table, int mode); 26 | 27 | #endif 28 | -------------------------------------------------------------------------------- /.travis.yml: -------------------------------------------------------------------------------- 1 | language: c 2 | os: linux 3 | compiler: gcc 4 | env: 5 | global: 6 | # The next declaration is the encrypted COVERITY_SCAN_TOKEN, created 7 | before_install: 8 | - echo -n | openssl s_client -connect https://scan.coverity.com:443 | sed -ne '/-BEGIN CERTIFICATE-/,/-END CERTIFICATE-/p' | sudo tee -a /etc/ssl/certs/ca- 9 | 10 | addons: 11 | coverity_scan: 12 | project: 13 | name: "Mithreindeir/Dynzasm" 14 | description: "Interactive Disassembler" 15 | notification_email: mithreindeir@gmail.com 16 | build_command_prepend: "make clean" 17 | build_command: "make" 18 | branch_pattern: coverity_scan 19 | script: 20 | - make 21 | -------------------------------------------------------------------------------- /src/arch/mips/mstrings.c: -------------------------------------------------------------------------------- 1 | #include "mstrings.h" 2 | 3 | const char *mips_registers[32] = { 4 | "$zero", "$at", "$v0", "$v1", 5 | "$a0", "$a1", "$a2", "$a3", 6 | "$t0", "$t1", "$t2", "$t3", 7 | "$t4", "$t5", "$t6", "$t7", 8 | "$s0", "$s1", "$s2", "$s3", 9 | "$s4", "$s5", "$s6", "$s7", 10 | "$t8", "$t9", "$k0", "$k1", 11 | "$gp", "$sp", "$fp", "$ra" 12 | }; 13 | 14 | const char *mips_fp_registers[32] = { 15 | "$f0", "$f1", "$f2", "$f3", 16 | "$f4", "$f5", "$f6", "$f7", 17 | "$f8", "$f9", "$f10", "$f11", 18 | "$f12", "$f13", "$f14", "$f15", 19 | "$f16", "$f17", "$f18", "$f19", 20 | "$f20", "$f21", "$f22", "$f23", 21 | "$f24", "$f25", "$f26", "$f27", 22 | "$f28", "$f29", "$f30", "$f31" 23 | }; 24 | -------------------------------------------------------------------------------- /src/arch/x86/x86load.h: -------------------------------------------------------------------------------- 1 | #ifndef X86_LOAD_H 2 | #define X86_LOAD_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "../../common/file.h" 8 | #include "../../common/trie.h" 9 | #include "../../common/table.h" 10 | 11 | #define MAX_MNEM_SIZE_X86 12 12 | #define MAX_OPER_LEN 12 13 | #define MAX_OPERANDS 3 14 | 15 | /*A copy of the MODE macros in disas*/ 16 | #ifndef MODE_32B 17 | #define MODE_32B 1 18 | #endif 19 | #ifndef MODE_64B 20 | #define MODE_64B 2 21 | #endif 22 | 23 | struct x86_instr_entry { 24 | char mnemonic[MAX_MNEM_SIZE_X86]; 25 | char operand[MAX_OPERANDS][MAX_OPER_LEN]; 26 | int num_op; 27 | }; 28 | 29 | void x86_parse(struct trie_node *root, struct hash_table * table, int mode); 30 | 31 | #endif 32 | -------------------------------------------------------------------------------- /src/common/file.c: -------------------------------------------------------------------------------- 1 | #include "file.h" 2 | 3 | /*Gets a line from a file or returns 1 on eof*/ 4 | int get_line(FILE * f, char *buf, long max) 5 | { 6 | memset(buf, 0, max); 7 | char c; 8 | int iter = 0; 9 | int eof = 0; 10 | while ((c = (char) fgetc(f)) != '\n' && !(eof = feof(f))) 11 | buf[iter++] = c; 12 | return eof; 13 | } 14 | 15 | /*Converts ascii hex to raw hex. EG "A" -> 0x0a */ 16 | long ascii_to_hex(unsigned char *out, char *in, long len) 17 | { 18 | long j = 0; 19 | for (int i = 0; i < len; i += 2) { 20 | out[j] = in[i] > '9' ? in[i] - 'a' + 10 : in[i] - '0'; 21 | out[j] = 22 | (out[j] << 4) | (in[i + 1] > 23 | '9' ? in[i + 1] - 'a' + 10 : in[i + 24 | 1] - 25 | '0'); 26 | j++; 27 | } 28 | return j; 29 | } 30 | -------------------------------------------------------------------------------- /src/lex.h: -------------------------------------------------------------------------------- 1 | #ifndef LEX_H 2 | #define LEX_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include "sym.h" 10 | 11 | #define SYMTS(idx) (symbol_tt[idx]) 12 | #define SYMTT(idx) (symtype_tt[idx]) 13 | 14 | /* Assembly Lexer 15 | * Each language file generates a symbol table and gets an index 16 | * into a table of symbol table. 17 | * At runtime the index is passed into the lex function 18 | * */ 19 | 20 | //These are the only global token types 21 | enum token_type { 22 | t_notype, 23 | t_string, 24 | t_number 25 | }; 26 | 27 | char **lex(char *string, int *start, char *delim, int *num_tokens, int type); 28 | int token_type(char *str, int *len, int type, int ptype); 29 | 30 | #endif 31 | -------------------------------------------------------------------------------- /src/arch/x86/x86strings.h: -------------------------------------------------------------------------------- 1 | #ifndef X86_STRINGS_H 2 | #define X86_STRINGS_H 3 | 4 | #include 5 | #include 6 | 7 | #define REG_BIN_IDX(idx) (idx / 4) 8 | #define REG_SIZE_IDX(idx) (idx % 4 + 1) 9 | 10 | /*X86 General Registers*/ 11 | extern const char *general_registers[64]; 12 | 13 | /*X87 Stack Registers*/ 14 | extern const char *x87_registers[8]; 15 | 16 | /*X86 XMM Registers*/ 17 | extern const char *xmm_registers[8]; 18 | 19 | /*X86 MMX Registers*/ 20 | extern const char *mm_registers[8]; 21 | 22 | /*Operand Size Prefix Strings*/ 23 | extern const char *operand_size_prefix[4]; 24 | 25 | const char *get_register(int reg, int size, int rexb); 26 | 27 | int get_x87_index(const char *reg); 28 | int get_xmm_index(const char *reg); 29 | int get_mm_index(const char *reg); 30 | int get_register_index(const char *reg); 31 | 32 | #endif 33 | -------------------------------------------------------------------------------- /src/arch/mips/mips.ins: -------------------------------------------------------------------------------- 1 | 00 invalid NA f:1 2 | 0000 sll R f:2 3 | 0002 srl R f:2 4 | 0008 jr R f:4 f:16 5 | 0010 mfhi R f:8 f:16 6 | 0011 mthi R f:8 f:16 7 | 0012 mflo R f:8 f:16 8 | 0013 mtlo R f:8 f:16 9 | 0020 add R 10 | 0021 addu R 11 | 0022 sub R 12 | 0023 subu R 13 | 0018 mult R f:4 14 | 0019 multu R f:4 15 | 001a div R f:4 16 | 001b divu R f:4 17 | 0027 nor R 18 | 0026 xor R 19 | 0025 or R 20 | 002a slt R 21 | 002b sltu R 22 | 02 j J 23 | 03 jal J 24 | 04 beq I 25 | 05 bne I 26 | 06 blez I 27 | 08 addi I 28 | 09 addiu I 29 | 0a slti I 30 | 0b sltiu I 31 | 0c andi I 32 | 0d ori I 33 | 0f lui I 34 | 23 lw I 35 | 24 lbu I 36 | 25 lhu I 37 | 28 sb I 38 | 29 sh I 39 | 2b sw I 40 | 11 invalid1 NA f:32 41 | 1110 invalid2 NA f:1 42 | 1111 invalid2 NA f:1 43 | 111000 add.s F 44 | 111001 sub.s F 45 | 111002 mul.s F 46 | 111003 div.s F 47 | 111100 add.d F 48 | 111101 sub.d F 49 | 111102 mul.d F 50 | 111103 div.d F 51 | -------------------------------------------------------------------------------- /src/common/table.h: -------------------------------------------------------------------------------- 1 | #ifndef TABLE_H 2 | #define TABLE_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | /*Generic Hash Table Implementation (depended on by disassembly semantic lookup)*/ 9 | struct hash_table { 10 | int num_buckets; 11 | struct hash_entry **buckets; 12 | }; 13 | 14 | struct hash_entry { 15 | unsigned long hash; 16 | char *mnemonic; 17 | void *value; 18 | struct hash_entry *next; 19 | }; 20 | 21 | struct hash_table *hash_table_init(int num_buckets); 22 | void hash_table_destroy(struct hash_table *table, void(destroy)(void*)); 23 | 24 | struct hash_entry *hash_entry_init(char *mnemonic, void *value); 25 | void hash_entry_destroy(struct hash_entry *entry); 26 | 27 | void hash_table_insert(struct hash_table *table, struct hash_entry *entry); 28 | struct hash_entry *hash_table_lookup(struct hash_table *table, const char *mnem); 29 | 30 | void hash_entry_insert(struct hash_entry **head, struct hash_entry *entry); 31 | struct hash_entry *hash_entry_lookup(struct hash_entry *head, const char *mnem, unsigned long hash); 32 | 33 | unsigned long hash_str(const char *str); 34 | 35 | #endif 36 | -------------------------------------------------------------------------------- /src/arch/mips/mips.h: -------------------------------------------------------------------------------- 1 | #ifndef MIPS_H 2 | #define MIPS_H 3 | 4 | #include 5 | #include 6 | #include "../../common/common.h" 7 | #include "../../dis.h" 8 | #include "mload.h" 9 | #include "mstrings.h" 10 | 11 | /*Trie node flags*/ 12 | #define INSTR_FUNC 1 13 | #define INSTR_SHIFT 2 14 | #define INSTR_NORS 4 15 | #define INSTR_NORT 8 16 | #define INSTR_NORD 16 17 | #define INSTR_RSEXT 32 18 | 19 | /*Instruction Types*/ 20 | #define TYPE_R 'R' 21 | #define TYPE_I 'I' 22 | #define TYPE_J 'J' 23 | #define TYPE_F 'F' 24 | 25 | /*Bit Macros for MIPS instruction encoding*/ 26 | #define OPCODE(instr) ((instr>>26)) 27 | #define FUNC(instr) ((instr&0x03f)) 28 | #define SHAMT(instr) ((instr>>6)&0x01f) 29 | #define RS(instr) ((instr>>21)&0x01f) 30 | #define RT(instr) ((instr>>16)&0x01f) 31 | #define RD(instr) ((instr>>11)&0x01f) 32 | #define ADDR(instr) ((instr&0x03ffffff)) 33 | #define IMM(instr) (instr&0x0ffff) 34 | 35 | struct dis *mips_disassemble(int mode, struct trie_node *node, u8 * stream, 36 | long max, uint64_t addr); 37 | void mips_decode_operands(struct dis *disas, struct mips_instr_entry *e, 38 | uint32_t instruction, u8 flags); 39 | 40 | #endif 41 | -------------------------------------------------------------------------------- /src/arch/arm/arm.ins: -------------------------------------------------------------------------------- 1 | 00 invalid NA f:1 2 | 0010 mul M 3 | 0020 mla A 4 | 0030 umaal U 5 | 0040 mls A 6 | 0050 umull U 7 | 0060 umlal U 8 | 0070 smull U 9 | 0080 smlal U 10 | 01 invalid NA f:1 11 | 0000 and D 12 | 0001 eor D 13 | 0002 sub D 14 | 0003 rsb D 15 | 0004 add D 16 | 0005 adb D 17 | 0006 sbc D 18 | 0007 rsb D 19 | 0008 tst D 20 | 0009 teq D 21 | 000a cmp D 22 | 000b cmn D 23 | 000c irr D 24 | 000d mov D f:2 25 | 000e bic D 26 | 000f mvn D f:2 27 | 0100 and I 28 | 0101 eor I 29 | 0102 sub I 30 | 0103 rsb I 31 | 0104 add I 32 | 0105 adb I 33 | 0106 sbc I 34 | 0107 rsb I 35 | 0108 tst I 36 | 0109 teq I 37 | 010a cmp I 38 | 010b cmn I 39 | 010c irr I 40 | 010d mov I f:2 41 | 010e bic I 42 | 010f mvn I f:2 43 | 02 invalid NA f:4 44 | 0201 ldr O 45 | 0200 str O 46 | 03 invalid NA f:4 47 | 0301 ldr R 48 | 0300 str R 49 | 04 invalid NA f:4 50 | 0401 ldm L f:8 51 | 040100 ldmda L 52 | 040101 ldmia L 53 | 040102 ldmdb L 54 | 040103 ldmib L 55 | 040104 ldmfa L 56 | 040105 ldmfd L 57 | 040106 ldmea L 58 | 040107 ldmed L 59 | 0400 stm L f:8 60 | 040000 stmda L 61 | 040001 stmia L 62 | 040002 stmdb L 63 | 040003 stmib L 64 | 040004 stmed L 65 | 040005 stmea L 66 | 040006 stmfd L 67 | 040007 stmfa L 68 | 05 b B 69 | -------------------------------------------------------------------------------- /src/common/trie.h: -------------------------------------------------------------------------------- 1 | #ifndef TRIE_H 2 | #define TRIE_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | /*Opaque Trie structure for disassembly indexing. 9 | * Branch Nodes can hold flags to resolve lookup conflicts.*/ 10 | struct trie_node { 11 | struct trie_node *parent; 12 | //Trie children 13 | struct trie_node **children; 14 | int num_children; 15 | 16 | //Distance from the root node 17 | int dist; 18 | 19 | //You can set flags for branches 20 | unsigned char flags; 21 | 22 | //Key byte 23 | unsigned char key; 24 | //Opaque pointer to hold entry 25 | void *value; 26 | }; 27 | 28 | struct trie_node *trie_init(unsigned char key, void *value); 29 | void trie_destroy(struct trie_node *node); 30 | 31 | //Creates path and inserts a value. Returns trie node or NULL on failure 32 | struct trie_node *trie_insert(struct trie_node *root, unsigned char *stream, long max, 33 | void *value, unsigned char flags); 34 | //Returns the leaf node or branch if there is a lookup conflict 35 | struct trie_node *trie_lookup(struct trie_node *root, 36 | unsigned char *stream, long max); 37 | //Insertion sort trie nodes 38 | void trie_node_insert(struct trie_node *node, struct trie_node *child); 39 | //Search on a sorted trie node 40 | struct trie_node *trie_node_search(struct trie_node *node, 41 | unsigned char key); 42 | 43 | #endif 44 | -------------------------------------------------------------------------------- /src/spec/x86.spec: -------------------------------------------------------------------------------- 1 | [adc] 0 2 | o: $rw = $0, $r = $1 3 | f: $rf=.......c 4 | f: $mf=o..szapc 5 | end 6 | 7 | [sbb] 0 8 | o: $rw = $0, $r = $1 9 | f: $rf=.......c 10 | f: $mf=o..szapc 11 | end 12 | 13 | [add] 0 14 | o: $rw = $0, $r = $1 15 | f: $mf=o..szapc 16 | end 17 | 18 | [sub] 0 19 | o: $rw = $0, $r = $1 20 | f: $mf=o..szapc 21 | end 22 | 23 | [imul] 0 24 | o: $rw = $0, $r = $1, $r = $2 25 | f: $mf=o..szapc 26 | end 27 | 28 | [cmp] 0 29 | o: $r = $0, $r = $1 30 | f: $mf=o..szapc 31 | end 32 | 33 | [or] 1 34 | o: $rw = $0, $r = $1 35 | f: $rf=........ 36 | f: $mf=o..sz.pc 37 | end 38 | 39 | [and] 1 40 | o: $rw = $0, $r = $1 41 | f: $mf=o..szapc 42 | end 43 | 44 | [xor] 1 45 | o: $rw = $0, $r = $1 46 | f: $mf=o..szapc 47 | end 48 | 49 | [jb] 4 50 | o: $r = $0 51 | f: $rf=.......c 52 | end 53 | 54 | [jnb] 4 55 | o: $r = $0 56 | f: $rf=.......c 57 | end 58 | 59 | [jz] 4 60 | o: $r = $0 61 | f: $rf=....z... 62 | end 63 | 64 | [jnz] 4 65 | o: $r = $0 66 | f: $rf=....z... 67 | end 68 | 69 | [jl] 4 70 | o: $r = $0 71 | f: $rf=o..s.... 72 | end 73 | 74 | [jle] 4 75 | o: $r = $0 76 | f: $rf=o..sz... 77 | end 78 | 79 | [jge] 4 80 | o: $r = $0 81 | f: $rf=o..s.... 82 | end 83 | 84 | [jg] 4 85 | o: $r = $0 86 | f: $rf=o..sz... 87 | end 88 | 89 | [mov] 2 90 | o: $w = $0, $r = $1 91 | end 92 | 93 | [push] 3 94 | i: $rw = "esp" 95 | o: $r = $0 96 | end 97 | 98 | [pop] 3 99 | i: $rw = "esp" 100 | o: $w = $0 101 | end 102 | -------------------------------------------------------------------------------- /src/arch/arm/aload.c: -------------------------------------------------------------------------------- 1 | #include "aload.h" 2 | 3 | void arm_parse(struct trie_node *root, struct hash_table *table, int mode) 4 | { 5 | (void)mode; //Disregard mode for now 6 | FILE *fp = NULL; 7 | fp = fopen("./src/arch/arm/arm.ins", "r"); 8 | if (!fp) { 9 | printf("Error opening arm instruction file\n"); 10 | return; 11 | } 12 | char buf[64]; 13 | 14 | char *bytes = NULL, *mnem = NULL, *type, *sflags = NULL; 15 | unsigned char flags = 0; 16 | while (!get_line(fp, buf, 64)) { 17 | flags = 0; 18 | bytes = strtok(buf, " "); 19 | mnem = strtok(NULL, " "); 20 | type = strtok(NULL, " \n"); 21 | 22 | while ((sflags = strtok(NULL, " \n"))) { 23 | if (!strncmp(sflags, "f:", 2)) { 24 | unsigned char n = 25 | strtol(sflags + 2, NULL, 10); 26 | flags |= n; 27 | } 28 | } 29 | if (!bytes || !mnem || !type) continue; 30 | 31 | struct arm_instr_entry *entry = 32 | malloc(sizeof(struct arm_instr_entry)); 33 | strncpy(entry->mnemonic, mnem, MAX_MNEM_SIZE_ARM - 1); 34 | entry->instr_type = *type; 35 | unsigned char buffer[32]; 36 | long blen = ascii_to_hex(buffer, bytes, strlen(bytes)); 37 | 38 | struct trie_node *leaf = root ? trie_insert(root, buffer, blen, entry, flags) : NULL; 39 | if (table && leaf) { 40 | /*Insert the trie node into the hash table*/ 41 | hash_table_insert(table, hash_entry_init(entry->mnemonic, leaf)); 42 | } 43 | if (!leaf) { 44 | printf("Error duplicate instructions near %s\n", entry->mnemonic); 45 | for (int i = 0; i < blen; i++) 46 | printf("%02x ", buffer[i]); 47 | free(entry); 48 | } 49 | } 50 | fclose(fp); 51 | } 52 | -------------------------------------------------------------------------------- /src/arch/mips/mload.c: -------------------------------------------------------------------------------- 1 | #include "mload.h" 2 | 3 | void mips_parse(struct trie_node *root, struct hash_table *table, int mode) 4 | { 5 | FILE *fp = NULL; 6 | fp = fopen("./src/arch/mips/mips.ins", "r"); 7 | (void) mode; /*32 and 64 mode have the same file, so mode isnt needed */ 8 | if (!fp) { 9 | printf("Error opening mips instruction file\n"); 10 | return; 11 | } 12 | char buf[64]; 13 | char *bytes = NULL, *mnem = NULL, *type = NULL, *sflags = NULL; 14 | unsigned char flags = 0; 15 | while (!get_line(fp, buf, 64)) { 16 | flags = 0; 17 | bytes = strtok(buf, " "); 18 | mnem = strtok(NULL, " "); 19 | type = strtok(NULL, " "); 20 | while ((sflags = strtok(NULL, " \n"))) { 21 | if (!strncmp(sflags, "f:", 2)) { 22 | unsigned char n = 23 | strtol(sflags + 2, NULL, 10); 24 | flags |= n; 25 | } 26 | } 27 | if (!bytes || !mnem || !type) 28 | continue; 29 | /*Create instruction entry from line info: bytes mnem type func(optional) */ 30 | struct mips_instr_entry *entry = 31 | malloc(sizeof(struct mips_instr_entry)); 32 | strncpy(entry->mnemonic, mnem, MAX_MNEM_SIZE_MIPS - 1); 33 | entry->instr_type = *type; 34 | unsigned char buffer[32]; 35 | long blen = ascii_to_hex(buffer, bytes, strlen(bytes)); 36 | 37 | struct trie_node *leaf = root ? trie_insert(root, buffer, blen, entry, flags) : NULL; 38 | if (table && leaf) { 39 | /*Insert the trie node into the hash table*/ 40 | hash_table_insert(table, hash_entry_init(entry->mnemonic, leaf)); 41 | } 42 | if (!leaf) { 43 | printf("Error duplicate instructions near %s\n", entry->mnemonic); 44 | for (int i = 0; i < blen; i++) 45 | printf("%02x ", buffer[i]); 46 | free(entry); 47 | } 48 | } 49 | 50 | fclose(fp); 51 | } 52 | -------------------------------------------------------------------------------- /src/arch/x86/x86asm.h: -------------------------------------------------------------------------------- 1 | #ifndef X86_ASM_H 2 | #define X86_ASM_H 3 | 4 | #include 5 | #include 6 | #include "../../common/table.h" 7 | #include "../../common/trie.h" 8 | #include "x86.h" 9 | #include "x86load.h" 10 | #include "x86strings.h" 11 | 12 | #define X86_SIZE_COMPAT(csize, size) ((csize=='q'&&size==4)\ 13 | ||(csize=='v'&&(size==3||size==4||size==2))\ 14 | ||(csize=='d'&&size==3)\ 15 | ||(csize=='w'&&size==2)\ 16 | ||(csize=='b'&&size==1)\ 17 | ||(csize==0)) 18 | 19 | #define X86_SIZE_IMM(size)\ 20 | ((size=='q'?8:(size=='v'||size=='d')?4:(size=='w'?2:(size=='b')))) 21 | 22 | #define X86_SIZE_MIN(csize, size) ((csize=='q'&&size<=4)\ 23 | ||(csize=='v'&&size<=4)\ 24 | ||(csize=='d'&&size<=3)\ 25 | ||(csize=='w'&&size<=2)\ 26 | ||(csize=='b'&&size==1)) 27 | 28 | #define MAX(bits) (((unsigned long long)1<<(bits))-1) 29 | #define X64_SCALE(s) (s==8?3:(s==4?2:(s==2?1:0))) 30 | 31 | u8 *x86_assemble(char **tokens, int num_tokens, int mode, struct hash_entry *instr_head, int *len); 32 | int x86_classify_operand(char **tokens, int num_tokens, char operands[][MAX_OPER_LEN], int num_operands); 33 | int x86_match_operand(char **tokens, int num_tokens, char *op_type); 34 | 35 | int x86_size(char *tok); 36 | int x86_valid_modrm(char **tokens, int num_tokens, int size); 37 | 38 | u8 *x86_encode(char **tokens,int num_tokens,int mode,struct trie_node *n,struct x86_instr_entry *e,int*len); 39 | int x86_encode_modrm(char **tokens, int num_tokens, u8 **barr, int *blen, int os, int as, u8 *flags); 40 | int x86_get_indir(char **tokens, int nt, char **b, char **i, int*scale, uint64_t*d, int*ds); 41 | 42 | int x86_next_operand(char ** tokens, int num_tokens, int oidx, int *len); 43 | 44 | void x86_add_byte(u8 **barr, int *len, u8 b); 45 | void x86_add_pbyte(u8 **barr, int *len, u8 b); 46 | 47 | #endif 48 | -------------------------------------------------------------------------------- /src/lex.c: -------------------------------------------------------------------------------- 1 | #include "lex.h" 2 | 3 | char **lex(char *string, int *start, char *delim, int *num_tokens, int type) 4 | { 5 | char **tokens = NULL; 6 | int nt = 0, ptype = -1, sidx = *start; 7 | char *iter = string + sidx, *max = string + sidx + strlen(string+sidx); 8 | char *end = strchr(string+sidx, '\n'); 9 | if (end < max) max = end; 10 | while (iter < max) { 11 | char * token = iter; 12 | int ctype=0, clen=0, tlen = 0; 13 | while (token < max && !strchr(delim, *iter) && !(ctype=token_type(iter, &clen, type,ptype))) { 14 | tlen += clen, iter += clen; 15 | ptype = ctype; 16 | } 17 | if (ctype && !tlen) { 18 | tlen += clen; 19 | } else if (!ctype && !tlen) { 20 | iter++; 21 | continue; 22 | } 23 | nt++; 24 | if (!tokens) tokens = malloc(sizeof(char*)); 25 | else tokens = realloc(tokens, sizeof(char*)*nt); 26 | 27 | char *atok = malloc(tlen+1); 28 | memcpy(atok, token, tlen); 29 | /*Convert to lower case letters only*/ 30 | for (int i = 0; i < tlen; i++) 31 | atok[i] = tolower(atok[i]); 32 | atok[tlen] = 0; 33 | tokens[nt-1] = atok; 34 | iter = token + (tlen?tlen:1); 35 | ptype = ctype; 36 | } 37 | *start = (iter - string) + 1; 38 | *num_tokens = nt; 39 | return tokens; 40 | } 41 | 42 | int token_type(char *str, int *len, int type, int ptype) 43 | { 44 | assert(type < (int)(sizeof(symbol_tt)/sizeof(char **))); 45 | const char ** symbols = SYMTS(type); 46 | const int * symbols_type = SYMTT(type); 47 | for (int i=0; symbols[i] && symbols_type[i]; i++) { 48 | int slen = strlen(symbols[i]); 49 | if (!strncmp(str, symbols[i], slen)) { 50 | *len = slen; 51 | return symbols_type[i]; 52 | } 53 | } 54 | 55 | if (ptype != t_notype && (!strncmp(str,"0X",2)||!strncmp(str,"0x",2)) && isxdigit(str[2])) { 56 | int l = 2; 57 | while (isxdigit(str[++l])); 58 | *len = l; 59 | return t_number; 60 | } else if (ptype != t_notype && isdigit(str[0])) { 61 | int l = 1; 62 | while (isdigit(str[l])) l++; 63 | *len = l; 64 | return t_number; 65 | } 66 | 67 | *len = 1; 68 | return t_notype; 69 | } 70 | -------------------------------------------------------------------------------- /src/arch/x86/x86load.c: -------------------------------------------------------------------------------- 1 | #include "x86load.h" 2 | 3 | void x86_parse(struct trie_node *root, struct hash_table *table, int mode) 4 | { 5 | FILE *fp = NULL; 6 | if (mode == MODE_32B) 7 | fp = fopen("./src/arch/x86/x86.ins", "r"); 8 | else 9 | fp = fopen("./src/arch/x86/x64.ins", "r"); 10 | if (!fp) { 11 | printf("Error opening x86 instruction file\n"); 12 | return; 13 | } 14 | 15 | char buf[64]; 16 | char *bytes = NULL, *mnem = NULL, *op[3] = { NULL, NULL, NULL }; 17 | int num_op = 0; 18 | unsigned char flags = 0; 19 | /*Loop through lines in the files */ 20 | while (!(get_line(fp, buf, 64))) { 21 | bytes = strtok(buf, " "); 22 | mnem = strtok(NULL, " "); 23 | num_op = 0; 24 | flags = 0; 25 | /*Set the operands. If "f:" prefix then its a flag to set */ 26 | while ((op[num_op++] = strtok(NULL, " "))) { 27 | //Set flags 28 | if (!strncmp(op[num_op - 1], "f:", 2)) { 29 | unsigned char n = 30 | strtol(op[num_op - 1] + 2, NULL, 10); 31 | op[num_op - 1] = NULL; 32 | num_op--; 33 | flags |= n; 34 | } 35 | } 36 | num_op--; 37 | if (!mnem || !bytes) 38 | continue; 39 | 40 | /*Construct instruction entry from the lines strings */ 41 | struct x86_instr_entry *entry = 42 | malloc(sizeof(struct x86_instr_entry)); 43 | strncpy(entry->mnemonic, mnem, MAX_MNEM_SIZE_X86 - 1); 44 | for (int i = 0; i < num_op; i++) { 45 | strncpy(entry->operand[i], op[i], 46 | MAX_OPER_LEN - 1); 47 | } 48 | entry->num_op = num_op; 49 | 50 | /*Convert the key string to raw bytes and insert into trie */ 51 | unsigned char buffer[32]; 52 | long blen = ascii_to_hex(buffer, bytes, strlen(bytes)); 53 | 54 | struct trie_node *leaf = root ? trie_insert(root, buffer, blen, entry, flags) : NULL; 55 | if (table && leaf) { 56 | /*Insert the trie node into the hash table*/ 57 | hash_table_insert(table, hash_entry_init(entry->mnemonic, leaf)); 58 | } 59 | if (!leaf) { 60 | printf("Error duplicate instructions near %s\n", entry->mnemonic); 61 | for (int i = 0; i < blen; i++) 62 | printf("%02x ", buffer[i]); 63 | free(entry); 64 | } 65 | } 66 | 67 | fclose(fp); 68 | } 69 | -------------------------------------------------------------------------------- /src/arch/x86/x86.h: -------------------------------------------------------------------------------- 1 | #ifndef X86_H 2 | #define X86_H 3 | 4 | #include 5 | #include "../../dis.h" 6 | #include "../../common/trie.h" 7 | #include "../../common/common.h" 8 | #include "x86strings.h" 9 | #include "x86load.h" 10 | 11 | /*Disassembler Mode*/ 12 | #define MODE_X64 2 13 | #define MODE_X86 1 14 | 15 | /*Trie Node Flags*/ 16 | #define REG_EXT_FLAG 2 17 | #define PREFIX_FLAG 4 18 | #define OFFSET_FLAG 8 19 | #define MOD_EXT_FLAG 16 20 | 21 | /*Instruction Flags*/ 22 | #define OPER_SIZE_OVERRIDE 1 //01 23 | #define ADDR_SIZE_OVERRIDE 2 //10 24 | #define REX_B 4 //100 25 | #define REX_X 8 //1000 26 | #define REX_R 16 //10000 27 | #define REX_W 32 //100000 28 | 29 | /*Defaults*/ 30 | #define DEF_OPER_SIZE(mode) (mode == MODE_X64 ? 3 : 3) 31 | #define DEF_ADDR_SIZE(mode) (mode == MODE_X64 ? 4 : 3) 32 | 33 | /*Encoding constants*/ 34 | #define SIB_RM 4 35 | #define MODRM_INDIR 0 36 | #define MODRM_1DISP 1 37 | #define MODRM_4DISP 2 38 | #define MODRM_REG 3 39 | #define MODRM_DISPONLY(mod, rm) ((mod==0) && (rm == 5)) 40 | #define SIB_NO_BASE(mod, base) (base==5 && (mod == 0)) 41 | #define SIB_NO_INDEX(idx) (idx==4) 42 | 43 | struct dis *x86_disassemble(int mode, struct trie_node *node, u8 * stream, 44 | long max, uint64_t addr); 45 | long x86_decode_operand(struct operand_tree **opt, int mode, char *operand, 46 | u8 flags, u8 * stream, long max); 47 | int x86_operand_size(int mode, int op_size, char size_byte, u8 flags); 48 | long x86_decode_modrm(struct operand_tree **operand, int mode, int op_size, 49 | int addr_size, u8 * stream, long max, u8 flags); 50 | long x86_decode_sib(struct operand_tree **operand, int op_size, 51 | int addr_size, u8 * stream, long max, u8 flags); 52 | long x86_disassemble_operand(struct operand_tree **operand, int mode, u8 addr_mode, 53 | int op_size, int addr_size, u8 * stream, 54 | long max, u8 flags); 55 | struct operand_tree *x86_indir_operand_tree(int op_size, const char *base, 56 | const char *index, long scale, 57 | unsigned long offset); 58 | long get_integer(uint64_t * val, int size, u8 * stream, long max); 59 | void fmt_offset_str(struct operand_tree *operand, const char *offset); 60 | 61 | #endif 62 | -------------------------------------------------------------------------------- /src/disas.h: -------------------------------------------------------------------------------- 1 | #ifndef DISAS_H 2 | #define DISAS_H 3 | 4 | /*Disassembler*/ 5 | #include 6 | #include 7 | #include 8 | #include "arch/x86/x86load.h" 9 | #include "arch/x86/x86.h" 10 | #include "arch/x86/x86asm.h" 11 | #include "arch/mips/mload.h" 12 | #include "arch/mips/mips.h" 13 | #include "arch/arm/aload.h" 14 | #include "arch/arm/arm.h" 15 | #include "dis.h" 16 | #include "dss.h" 17 | #include "lex.h" 18 | #include "common/trie.h" 19 | #include "common/table.h" 20 | 21 | #define DS_FOREACH(ds, cur)\ 22 | for (int i = 0; i < ds->num_instr && (cur=ds->instr[i]); i++) 23 | 24 | #define X86_ARCH 1 25 | #define MIPS_ARCH 2 26 | #define ARM_ARCH 3 27 | 28 | #ifndef MODE_64B 29 | #define MODE_64B MODE_X64 30 | #endif 31 | #ifndef MODE_32B 32 | #define MODE_32B MODE_X86 33 | #endif 34 | 35 | 36 | /*Disassembler structure. Holds disassembly and information needed to disassemble*/ 37 | struct disassembler { 38 | int arch, mode; 39 | 40 | struct dis **instr; 41 | int num_instr; 42 | 43 | struct trie_node *root; 44 | struct hash_table *sem_table; 45 | struct hash_table *asm_table; 46 | }; 47 | 48 | /*Returns new disassembler struct given arch and mode*/ 49 | struct disassembler *ds_init(int isa, int mode); 50 | /*Decodes a stream starting at address entry and stopping at end of stream*/ 51 | void ds_decode(struct disassembler *ds, unsigned char *stream, int size, 52 | uint64_t entry); 53 | /*Disassembles a single instruction starting at addr from the stream*/ 54 | struct dis *ds_disas(struct disassembler *ds, unsigned char *stream, int size, 55 | uint64_t addr); 56 | 57 | void ds_asm(struct disassembler *ds, char *instr); 58 | /*Appends disassembly to the end of the array*/ 59 | void ds_addinstr(struct disassembler *ds, struct dis *dis); 60 | /*Frees memory used by disassembler struct*/ 61 | void ds_destroy(struct disassembler *ds); 62 | 63 | /*Function's that give extra semantic information*/ 64 | 65 | unsigned char ds_modified_flags(struct disassembler *ds, struct dis *dis); 66 | unsigned char ds_read_flags(struct disassembler *ds, struct dis *dis); 67 | 68 | char ** ds_used_registers(struct disassembler *ds, struct dis *dis); 69 | char ** ds_read_operands(struct disassembler *ds, struct dis *dis); 70 | char ** ds_written_operands(struct disassembler *ds, struct dis *dis); 71 | 72 | #endif 73 | -------------------------------------------------------------------------------- /src/arch/x86/x86strings.c: -------------------------------------------------------------------------------- 1 | #include "x86strings.h" 2 | 3 | const char *general_registers[64] = { 4 | "al", "ax", "eax", "rax", 5 | "cl", "cx", "ecx", "rcx", 6 | "dl", "dx", "edx", "rdx", 7 | "bl", "bx", "ebx", "rbx", 8 | "ah", "sp", "esp", "rsp", 9 | "ch", "bp", "ebp", "rbp", 10 | "dh", "si", "esi", "rsi", 11 | "bh", "di", "edi", "rdi", 12 | "r8b", "r8w", "r8d", "r8", 13 | "r9b", "r9w", "r9d", "r9", 14 | "r10b", "r10w", "r10d", "r10", 15 | "r11b", "r11w", "r11d", "r11", 16 | "r12b", "r12w", "r12d", "r12", 17 | "r13b", "r13w", "r13d", "r13", 18 | "r14b", "r14w", "r14d", "r14", 19 | "r15b", "r15w", "r15d", "r15" 20 | }; 21 | 22 | const char *x87_registers[8] = { 23 | "st0", "st1", "st2", "st3", "st4", "st5", "st6", "st7" 24 | }; 25 | 26 | 27 | const char *xmm_registers[8] = { 28 | "xmm0", "xmm1", "xmm2", "xmm3", "xmm4", "xmm5", "xmm6", "xmm7" 29 | }; 30 | 31 | const char *mm_registers[8] = { 32 | "mm0", "mm1", "mm2", "mm3", "mm4", "mm5", "mm6", "mm7" 33 | }; 34 | 35 | const char *operand_size_prefix[4] = { 36 | "byte", "word", "dword", "qword" 37 | }; 38 | 39 | const char *get_register(int reg, int size, int rexb) 40 | { 41 | int idx = reg * 4; 42 | if (size != 0) 43 | idx += size - 1; 44 | if (rexb) 45 | idx += rexb * 32; 46 | if (idx < 0 47 | || idx >= (int) (sizeof(general_registers) / sizeof(char *))) 48 | return NULL; 49 | return general_registers[idx]; 50 | } 51 | 52 | int get_register_index(const char *reg) 53 | { 54 | for (int i = 0; 55 | i < (signed int) (sizeof(general_registers) / sizeof(char *)); 56 | i++) { 57 | if (!strcmp(reg, general_registers[i])) 58 | return i; 59 | } 60 | return -1; 61 | } 62 | 63 | int get_x87_index(const char *reg) 64 | { 65 | for (int i = 0; 66 | i < (signed int) (sizeof(x87_registers) / sizeof(char *)); 67 | i++) { 68 | if (!strcmp(reg, x87_registers[i])) 69 | return i; 70 | } 71 | return -1; 72 | } 73 | 74 | int get_xmm_index(const char *reg) 75 | { 76 | for (int i = 0; 77 | i < (signed int) (sizeof(xmm_registers) / sizeof(char *)); 78 | i++) { 79 | if (!strcmp(reg, xmm_registers[i])) 80 | return i; 81 | } 82 | return -1; 83 | } 84 | 85 | int get_mm_index(const char *reg) 86 | { 87 | for (int i = 0; 88 | i < (signed int) (sizeof(mm_registers) / sizeof(char *)); 89 | i++) { 90 | if (!strcmp(reg, mm_registers[i])) 91 | return i; 92 | } 93 | return -1; 94 | } 95 | -------------------------------------------------------------------------------- /src/common/table.c: -------------------------------------------------------------------------------- 1 | #include "table.h" 2 | 3 | struct hash_table *hash_table_init(int num_buckets) 4 | { 5 | if (num_buckets < 0) return NULL; 6 | struct hash_table *table = malloc(sizeof(struct hash_table)); 7 | table->num_buckets = num_buckets; 8 | table->buckets = calloc(num_buckets, sizeof(struct hash_entry*)); 9 | return table; 10 | } 11 | 12 | void hash_table_destroy(struct hash_table *table, void(destroy)(void*)) 13 | { 14 | if (!table) return; 15 | for (int i = 0; i < table->num_buckets; i++) { 16 | struct hash_entry *cur = table->buckets[i], *next = NULL; 17 | while (cur) { 18 | next = cur->next; 19 | if (cur->value && destroy) 20 | destroy(cur->value); 21 | cur->value = NULL; 22 | hash_entry_destroy(cur); 23 | cur = next; 24 | } 25 | } 26 | free(table->buckets); 27 | free(table); 28 | } 29 | 30 | struct hash_entry *hash_entry_init(char *mnemonic, void *value) 31 | { 32 | struct hash_entry *entry = malloc(sizeof(struct hash_entry)); 33 | entry->mnemonic = mnemonic, entry->value = value; 34 | entry->hash = hash_str(mnemonic); 35 | entry->next = NULL; 36 | return entry; 37 | } 38 | 39 | void hash_entry_destroy(struct hash_entry *entry) 40 | { 41 | if (!entry) return; 42 | free(entry); 43 | } 44 | 45 | void hash_table_insert(struct hash_table *table, struct hash_entry *entry) 46 | { 47 | if (!table->buckets) return; 48 | hash_entry_insert(&table->buckets[entry->hash % table->num_buckets], entry); 49 | } 50 | 51 | struct hash_entry *hash_table_lookup(struct hash_table *table, const char *mnem) 52 | { 53 | if (!table->buckets) return NULL; 54 | unsigned long hv = hash_str(mnem); 55 | return hash_entry_lookup(table->buckets[hv % table->num_buckets], mnem, hv); 56 | } 57 | 58 | void hash_entry_insert(struct hash_entry **head, struct hash_entry *entry) 59 | { 60 | if (!(*head)) { 61 | (*head) = entry; 62 | return; 63 | } 64 | struct hash_entry *cur = *head; 65 | while (cur->next && !!strcmp(cur->mnemonic, entry->mnemonic)) cur = cur->next; 66 | if (!cur->next) { 67 | cur->next = entry; 68 | } else { 69 | entry->next = cur->next; 70 | cur->next = entry; 71 | } 72 | } 73 | 74 | struct hash_entry *hash_entry_lookup(struct hash_entry *head, const char *mnem, unsigned long hash) 75 | { 76 | while (head && (head->hash!=hash || !!strcmp(head->mnemonic, mnem))) head=head->next; 77 | return head; 78 | } 79 | 80 | unsigned long hash_str(const char *str) 81 | { 82 | unsigned long hash = 5381; 83 | int c; 84 | while ((c = *str++)) hash = ((hash << 5) + hash) + c; 85 | return hash; 86 | } 87 | -------------------------------------------------------------------------------- /src/dis.h: -------------------------------------------------------------------------------- 1 | #ifndef DIS_H 2 | #define DIS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | #include "common/common.h" 11 | 12 | #define DIS_ADDR 3 13 | #define DIS_REG 2 14 | #define DIS_IMM 1 15 | #define DIS_UNSET 0 16 | 17 | #define DIS_OPER 2 18 | #define DIS_BRANCH 1 19 | 20 | #define MNEM_SIZE 32 21 | #define SQUASH_SIZE 128 22 | #define GROUP_SIZE 8 23 | #define REG_SIZE 8 24 | #define FMT_SIZE 32 25 | 26 | //Macros for easy tree access 27 | #define TREE_TYPE(t) (t->type) 28 | #define TREE_OPTYPE(t) (t->body.operand.operand_type) 29 | #define TREE_REG(t) (t->body.operand.operand_val.reg) 30 | #define TREE_ADDR(t) (t->body.operand.operand_val.addr) 31 | #define TREE_IMM(t) (t->body.operand.operand_val.imm) 32 | #define TREE_CHILD(t, idx) (t->body.op_tree.operands[idx]) 33 | #define TREE_NCHILD(t) (t->body.op_tree.num_operands) 34 | #define TREE_FORMAT(t) (t->body.op_tree.format) 35 | 36 | /* Operand Tree 37 | * Can represent arbitrarily complex addressing modes. 38 | * Can be "squashed" into a string 39 | * */ 40 | struct operand_tree { 41 | int type; 42 | union { 43 | struct { 44 | struct operand_tree **operands; 45 | int num_operands; 46 | char format[FMT_SIZE]; 47 | } op_tree; 48 | 49 | struct { 50 | int operand_type; 51 | union { 52 | char reg[REG_SIZE]; 53 | u64 addr; 54 | u64 imm; 55 | } operand_val; 56 | } operand; 57 | } body; 58 | }; 59 | 60 | /*Architecture Independent Disassembler*/ 61 | struct dis { 62 | struct operand_tree **operands; 63 | int num_operands; 64 | 65 | unsigned int id; 66 | unsigned int group[10]; 67 | char mnemonic[MNEM_SIZE]; 68 | char op_squash[SQUASH_SIZE]; 69 | uint64_t address; 70 | int used_bytes; 71 | }; 72 | 73 | 74 | struct dis *dis_init(); 75 | void dis_destroy(struct dis *disasm); 76 | void dis_add_operand(struct dis *dis, struct operand_tree *tree); 77 | void dis_squash(struct dis *dis); 78 | 79 | struct operand_tree *operand_tree_init(int type); 80 | void operand_tree_destroy(struct operand_tree *node); 81 | void operand_tree_fmt(struct operand_tree *node, const char *fmt, ...); 82 | void operand_tree_add(struct operand_tree *node, 83 | struct operand_tree *child); 84 | /*Convenience initializers for operands*/ 85 | struct operand_tree *operand_reg(const char *reg); 86 | struct operand_tree *operand_imm(const u64 imm); 87 | struct operand_tree *operand_addr(const u64 addr); 88 | 89 | int operand_squash(char *buf, long max, struct operand_tree *tree); 90 | 91 | #endif 92 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Dynzasm 2 | [![Build Status](https://travis-ci.org/Mithreindeir/Dynzasm.svg?branch=master)](https://travis-ci.org/Mithreindeir/Dynzasm) 3 | [![Coverity Scan Build Status](https://scan.coverity.com/projects/15646/badge.svg)](https://scan.coverity.com/projects/mithreindeir-dynzasm) 4 | 5 | Dynzasm is a fast lightweight disassembly library written in c99 code with no external dependencies. Disassembly is structured as trees with arbitrary formatting strings, allowing detailed disassembly information and making it easy to support custom syntaxes. 6 | 7 | | ARCH | Disassembly SUPPORT | Assembler Support | 8 | |-----|----------|-----------| 9 | |X86| Most (excluding extensions) | Partial (No fp, or isa ext)| 10 | |X64| Most (excluding extensions)| Partial (No fp, or isa ext)| 11 | |ARM| Partial| None (WIP)| 12 | |MIPS| Most | None (WIP)| 13 | 14 | Includes sample commandline utility 15 | ```bash 16 | ./dynzasm --help 17 | Usage: ./dynzasm options filename 18 | --arch= Set architecture to be disassembled (x86, arm, or mips 19 | --mode= Set the architecture mode (32 or 64) 20 | --entry= Set a starting address 21 | -a convert ascii to hex 22 | -A Assemble 23 | If no file is specified stdin will be used 24 | Must specify architecture and mode 25 | ``` 26 | ```bash 27 | echo "55 48 89 e5 48 83 ec 70" | ./dynzasm --arch=x86 --mode=64 -a --addr=0x2172 28 | 0x002172: 55 push rbp 29 | 0x002173: 48 89 e5 mov rbp, rsp 30 | 0x002176: 48 83 ec 70 sub rsp, 0x70 31 | 32 | ``` 33 | 34 | An example of using the assembler from stdin, and piping it into the disassembler. 35 | 36 | ```bash 37 | ./dynzasm --arch=x86 --mode=64 -A | ./dynzasm --arch=x86 --mode=64 -a 38 | push rbp 39 | mov rbp, rsp 40 | mov eax, 0 41 | ret 42 | 00000000: 55 push rbp 43 | 0x000001: 48 8b ec mov rbp, rsp 44 | 0x000004: b8 00 00 00 00 mov eax, 0 45 | 0x000009: c3 ret 46 | ``` 47 | 48 | It is also very easy to use as a library. Detailed semantics from disassembly for easy analysis coming soon. 49 | 50 | ```C 51 | #include "disas.h" 52 | 53 | int main() 54 | { 55 | 56 | struct disassembler *ds = ds_init(X86_ARCH, MODE_64B); 57 | unsigned char bytes[] = "\x55\x48\x89\xe5\xb8\x00\x00\x00\x00\xc3"; 58 | 59 | ds_decode(ds, bytes, sizeof(bytes)-1, 0x0); 60 | struct dis *dis = NULL; 61 | 62 | DS_FOREACH(ds, dis) { 63 | printf("%#08lx:\t%s\t%s\n", dis->address, dis->mnemonic, dis->op_squash); 64 | } 65 | 66 | ds_destroy(ds); 67 | return 0; 68 | } 69 | 70 | ``` 71 | -------------------------------------------------------------------------------- /src/arch/arm/arm.h: -------------------------------------------------------------------------------- 1 | #ifndef ARM_H 2 | #define ARM_H 3 | 4 | #include 5 | #include 6 | #include "aload.h" 7 | #include "astrings.h" 8 | #include "../../common/trie.h" 9 | #include "../../common/common.h" 10 | #include "../../dis.h" 11 | 12 | /*Trie node flags*/ 13 | #define D_CROSS 1 14 | #define ARM_NORN 2 15 | #define LDSTC 4 16 | #define LDM_STM 8 17 | 18 | /*Constants*/ 19 | #define ALWAYS_EXECUTE 0xe 20 | #define ARM_LSLI 0 21 | #define ARM_LSLR 1 22 | #define ARM_LSRI 2 23 | #define ARM_LSRR 3 24 | #define ARM_ASR 4 25 | #define ARM_ROR_RRX 6 26 | 27 | /*Bitfield Extraction Macros*/ 28 | #define STKM(ins) (ARM_RN(ins)==13&&!(ins&(1<<13))&&LDST_W_FIELD(ins)) 29 | #define LDMSTM_BITS(ins) (((STKM(ins))<<2)+(LD_P_FIELD(ins)<<1)+LD_U_FIELD(ins)) 30 | #define DATA_OPCODE(ins) (BITS(ins, 21, 25)) 31 | #define S_FIELD(ins) (BITS(ins, 20, 21)) 32 | #define B_L_FIELD(ins) (BITS(ins, 24, 25)) 33 | #define LD_B_FIELD(ins) (BITS(ins, 22, 23)) 34 | #define LDST_W_FIELD(ins) (BITS(ins, 21, 22)) 35 | #define LD_L_FIELD(ins) (BITS(ins, 20, 21)) 36 | #define LD_P_FIELD(ins) (BITS(ins, 24, 25)) 37 | #define LD_U_FIELD(ins) (BITS(ins, 23, 24)) 38 | #define THREE_OBITS(ins) (BITS(ins, 25, 28)) 39 | #define COND(ins) (BITS(ins, 28, 32)) 40 | #define RLIST(ins) (BITS(ins, 0, 16)) 41 | 42 | #define ARM_ADDSUB(ins) (BITS(ins, 23, 24)) 43 | #define ARM_PREINDEX(ins) (BITS(ins, 24, 25)) 44 | #define ARM_IMM8(ins) (BITS(ins, 0, 8)) 45 | #define ARM_ROTATE_IMM(ins) (BITS(ins, 8, 12)) 46 | #define ARM_OFFSET24(ins) (BITS(ins, 0, 24)) 47 | #define ARM_OFFSET12(ins) (BITS(ins, 0, 12)) 48 | #define ARM_RN(ins) (BITS(ins, 16, 20)) 49 | #define ARM_RS(ins) (BITS(ins, 8, 12)) 50 | #define ARM_RD(ins) (BITS(ins, 12, 16)) 51 | #define ARM_RM(ins) (BITS(ins, 0, 4)) 52 | #define ARM_SHIFT_AMOUNT(ins) (BITS(ins, 7, 12)) 53 | #define ARM_SHIFT(ins) (BITS(ins, 4, 7)) 54 | 55 | /*Instruction Type Macros*/ 56 | #define VALID_DPROC(instr) (!(!(THREE_OBITS(instr)<=1) || IS_MULT(instr) || ((DATA_OPCODE(instr)>>2==0x2) && !S_FIELD(instr)))) 57 | #define IS_MULT(ins) ((BITS(ins, 25, 28)==0&&BITS(ins, 4,8)==0x9)) 58 | 59 | /*Instruction Encoding Types*/ 60 | #define DATA_PROCESS 'D' 61 | #define MULT 'M' 62 | #define UMULT 'U' 63 | #define AMULT 'A' 64 | #define LD_ST_OFF 'O' 65 | #define LD_ST_REG 'R' 66 | #define LD_ST_MUL 'L' 67 | #define BRANCH 'B' 68 | 69 | 70 | struct dis *arm_disassemble(int mode, struct trie_node *node, u8 * stream, 71 | long max, uint64_t addr); 72 | void arm_decode_operands(struct dis *disas, struct arm_instr_entry *e, 73 | uint64_t addr, uint32_t instruction, u8 flags); 74 | void arm_shifter_operand(struct dis *disas, struct operand_tree *opr, uint32_t instruction, int type); 75 | 76 | #endif 77 | -------------------------------------------------------------------------------- /src/common/trie.c: -------------------------------------------------------------------------------- 1 | #include "trie.h" 2 | 3 | struct trie_node *trie_init(unsigned char key, void *value) 4 | { 5 | struct trie_node *node = malloc(sizeof(struct trie_node)); 6 | 7 | node->dist = 0; 8 | node->key = key; 9 | node->value = value; 10 | node->parent = NULL; 11 | node->children = NULL; 12 | node->num_children = 0; 13 | node->flags = 0; 14 | 15 | return node; 16 | } 17 | 18 | void trie_destroy(struct trie_node *node) 19 | { 20 | if (!node) 21 | return; 22 | 23 | for (int i = 0; i < node->num_children; i++) { 24 | trie_destroy(node->children[i]); 25 | } 26 | 27 | free(node->children); 28 | free(node->value); 29 | free(node); 30 | } 31 | 32 | struct trie_node *trie_lookup(struct trie_node *node, 33 | unsigned char *stream, long max) 34 | { 35 | struct trie_node *far = node; 36 | struct trie_node *tmp = NULL; 37 | while (max > 0) { 38 | tmp = trie_node_search(far, stream[0]); 39 | if (!tmp) 40 | break; 41 | far = tmp; 42 | stream++; 43 | max--; 44 | } 45 | return far; 46 | } 47 | 48 | struct trie_node *trie_insert(struct trie_node *root, unsigned char *stream, long max, 49 | void *value, unsigned char flags) 50 | { 51 | struct trie_node *far = root; 52 | struct trie_node *tmp = NULL; 53 | while (max > 0) { 54 | tmp = trie_node_search(far, stream[0]); 55 | if (!tmp) 56 | break; 57 | far = tmp; 58 | stream++; 59 | max--; 60 | } 61 | struct trie_node *child = NULL; 62 | while (max > 0) { 63 | child = trie_init(stream[0], NULL); 64 | child->dist = far->dist + 1; 65 | trie_node_insert(far, child); 66 | far = child; 67 | stream++; 68 | max--; 69 | } 70 | if (far->value) return NULL; 71 | far->value = value; 72 | far->flags = flags; 73 | return far; 74 | } 75 | 76 | struct trie_node *trie_node_search(struct trie_node *node, 77 | unsigned char key) 78 | { 79 | long start = 0, end = node->num_children - 1; 80 | long mid; 81 | while (start <= end) { 82 | mid = (end + start) / 2; 83 | if (node->children[mid]->key > key) { 84 | end = mid - 1; 85 | } else if (node->children[mid]->key < key) { 86 | start = mid + 1; 87 | } else { 88 | return node->children[mid]; 89 | } 90 | } 91 | return NULL; 92 | } 93 | 94 | void trie_node_insert(struct trie_node *node, struct trie_node *child) 95 | { 96 | int idx = 0; 97 | for (; idx < node->num_children; idx++) { 98 | if (child->key < node->children[idx]->key) 99 | break; 100 | } 101 | node->num_children++; 102 | if (!node->children) 103 | node->children = malloc(sizeof(struct trie_node *)); 104 | else 105 | node->children = 106 | realloc(node->children, 107 | sizeof(struct trie_node *) * 108 | node->num_children); 109 | if (node->num_children > 1 && idx < (node->num_children - 1)) { 110 | long size = 111 | (node->num_children - 112 | (idx + 1)) * sizeof(struct trie_node *); 113 | memmove(node->children + idx + 1, node->children + idx, 114 | size); 115 | } 116 | child->parent = node; 117 | node->children[idx] = child; 118 | } 119 | -------------------------------------------------------------------------------- /src/dss.h: -------------------------------------------------------------------------------- 1 | #ifndef DSS_H 2 | #define DSS_H 3 | 4 | #include 5 | #include 6 | #include 7 | #include "common/table.h" 8 | #include "dis.h" 9 | 10 | #define DSEM_READ 1 11 | #define DSEM_WRITE 2 12 | #define DSEM_RW 3 13 | 14 | #define O 128 15 | #define D 64 16 | #define I 32 17 | #define S 16 18 | #define Z 8 19 | #define A 4 20 | #define P 2 21 | #define C 1 22 | 23 | #define FVAL(c) (c=='o'?O:(c=='d'?D:(c=='i'?I:(c=='s'?S:(c=='z'?Z:(c=='a'?A:(c=='p'?P:(c=='c'?C:0)))))))) 24 | #define VALF(c) (c&O?'o':(c&D?'d':(c&I?'i':(c&S?'s':(c&Z?'z':(c&A?'a':(c&P?'p':(c&C?'c':'.')))))))) 25 | 26 | enum dss_instr_type { 27 | dss_arithmetic, 28 | dss_logical, 29 | dss_dataproccess, 30 | dss_stack, 31 | dss_cbranch, 32 | dss_ubranch 33 | }; 34 | 35 | extern const char *dss_instr_type_str[6]; 36 | 37 | /* Disassembly Semantic Specification 38 | * Group Numbers: 39 | * - arithmetic 0 40 | * - data processing 1 41 | * - stack instruction 2 42 | * - branch instruction 3 43 | * 44 | * 45 | * Flags (odiszapc): 46 | * - overflow 47 | * - direction 48 | * - interrupt 49 | * - sign 50 | * - zero 51 | * - acarry 52 | * - parity 53 | * - carry 54 | * 55 | * FORMAT: 56 | * [mnemonic] group-id 57 | * i: $r = "implicitly_read_register", $w = "implicitly_written_register" 58 | * f: $w = $instruction-idx... 59 | * f: $r = $instruction-idx... 60 | * f: $rf = $read_flags 61 | * f: $mf = $modified_flags 62 | * 63 | * 64 | * [add] 0 65 | * o: $rw = $0, $r = $1 66 | * f: $rf=........ 67 | * f: $mf=o..szapc 68 | * 69 | * [or] 0 70 | * o: $rw = $0, $r = $1 71 | * f: $rf=........ 72 | * f: $mf=o..sz.pc 73 | * 74 | * [adc] 0 75 | * o: $rw = $0, $r = 1 76 | * f: $rf=.......c 77 | * f: $mf=o..szapc 78 | * 79 | * */ 80 | 81 | /*Disassembly Semantic*/ 82 | struct dsem { 83 | char *mnemonic; 84 | int *read, nread; 85 | int *write, nwrite; 86 | char **implr, **implw; 87 | int nimplr, nimplw; 88 | unsigned char rflags; 89 | unsigned char mflags; 90 | unsigned int group; 91 | }; 92 | 93 | /*Reads semantics from a file and inserts them into a hash table*/ 94 | void parse_sem_file(const char *file, struct hash_table *stable); 95 | /*Returns populated dis semantic or null on error. Increments the file pointer to the end of the semantic*/ 96 | struct dsem *parse_semantic(char **buffer); 97 | /*Set operand */ 98 | void parse_rwoperands(struct dsem *sem, char *line); 99 | /*Set flag bits*/ 100 | void parse_rwflags(struct dsem *sem, char *line); 101 | /*Set implied registers*/ 102 | void parse_rwimplied(struct dsem *sem, char *line); 103 | /*Allocate and initialize a semantic struct*/ 104 | struct dsem *dsem_init(char *mnemonic, int group); 105 | /*Free semantic struct*/ 106 | void dsem_destroy(struct dsem *sem); 107 | /*Add a operand to read/write arrays*/ 108 | void dsem_add(struct dsem *sem, int val, int rw); 109 | /*Add register to implied read/write array*/ 110 | void dsem_addi(struct dsem *sem, char *val, int rw); 111 | /*Debugging print function*/ 112 | void dsem_print(struct dsem *sem); 113 | /*Information with disassembly combined with semantics*/ 114 | void print_semantics(struct dis *dis, struct dsem *sem); 115 | 116 | #endif 117 | -------------------------------------------------------------------------------- /src/disas.c: -------------------------------------------------------------------------------- 1 | #include "disas.h" 2 | 3 | struct disassembler *ds_init(int arch, int mode) 4 | { 5 | struct disassembler *ds = malloc(sizeof(struct disassembler)); 6 | 7 | ds->arch = arch, ds->mode = mode;; 8 | ds->root = NULL; 9 | ds->instr = NULL, ds->num_instr = 0; 10 | ds->root = trie_init(0, NULL); 11 | ds->sem_table = hash_table_init(101); 12 | ds->asm_table = hash_table_init(101); 13 | if (arch == X86_ARCH) { 14 | x86_parse(ds->root, ds->asm_table, mode); 15 | parse_sem_file("src/spec/x86.spec", ds->sem_table); 16 | } else if (arch == MIPS_ARCH) { 17 | mips_parse(ds->root, ds->asm_table, mode); 18 | parse_sem_file("src/spec/mips.spec", ds->sem_table); 19 | } else if (arch == ARM_ARCH) { 20 | arm_parse(ds->root, ds->asm_table, mode); 21 | } 22 | 23 | return ds; 24 | } 25 | 26 | void ds_destroy(struct disassembler *ds) 27 | { 28 | if (!ds) 29 | return; 30 | 31 | for (int i = 0; i < ds->num_instr; i++) { 32 | dis_destroy(ds->instr[i]); 33 | } 34 | 35 | trie_destroy(ds->root); 36 | hash_table_destroy(ds->asm_table, NULL); 37 | hash_table_destroy(ds->sem_table, (void(*)(void*))&dsem_destroy); 38 | free(ds->instr); 39 | free(ds); 40 | } 41 | 42 | void ds_decode(struct disassembler *ds, unsigned char *stream, int size, 43 | uint64_t entry) 44 | { 45 | int iter = 0; 46 | int addr = entry; 47 | struct dis *disas = NULL; 48 | while (iter < size) { 49 | disas = ds_disas(ds, stream+iter, size-iter, addr); 50 | if (!disas) { 51 | iter++; 52 | addr++; 53 | continue; 54 | } 55 | iter += disas->used_bytes; 56 | addr += disas->used_bytes; 57 | } 58 | } 59 | 60 | struct dis *ds_disas(struct disassembler *ds, unsigned char *stream, int size, 61 | uint64_t addr) 62 | { 63 | struct dis *disas = NULL; 64 | switch (ds->arch) { 65 | case ARM_ARCH: 66 | disas = arm_disassemble(ds->mode, ds->root, stream, size, addr); 67 | break; 68 | case MIPS_ARCH: 69 | disas = mips_disassemble(ds->mode, ds->root, stream, size, addr); 70 | break; 71 | case X86_ARCH: 72 | disas = x86_disassemble(ds->mode, ds->root, stream, size, addr); 73 | break; 74 | } 75 | 76 | if (!disas) return NULL; 77 | disas->address = addr; 78 | dis_squash(disas); 79 | ds_addinstr(ds, disas); 80 | return disas; 81 | } 82 | 83 | void ds_asm(struct disassembler *ds, char *instr) 84 | { 85 | if (ds->arch != X86_ARCH) { 86 | printf("Assembler support for x86/x64 currently\n"); 87 | return; 88 | } 89 | int num_tokens = 0, idx = 0, len = strlen(instr); 90 | while (idx < len) { 91 | char ** tokens = lex(instr, &idx, " \t\n", &num_tokens, X86_IDX); 92 | struct hash_entry * e = NULL; 93 | if (num_tokens) 94 | e = hash_table_lookup(ds->asm_table, tokens[0]); 95 | if (e) { 96 | int alen = 0; 97 | u8 *arr=x86_assemble(tokens, num_tokens, ds->mode, e, &alen); 98 | for (int i = 0; i < alen; i++) { 99 | printf("%02x ", arr[i]); 100 | } 101 | printf("\n"); 102 | free(arr); 103 | } 104 | for (int i = 0; i < num_tokens; i++) 105 | free(tokens[i]); 106 | free(tokens); 107 | } 108 | } 109 | 110 | void ds_addinstr(struct disassembler *ds, struct dis *dis) 111 | { 112 | ds->num_instr++; 113 | if (!ds->instr) 114 | ds->instr = malloc(sizeof(struct dis *)); 115 | else 116 | ds->instr = 117 | realloc(ds->instr, 118 | sizeof(struct dis *) * ds->num_instr); 119 | ds->instr[ds->num_instr - 1] = dis; 120 | } 121 | -------------------------------------------------------------------------------- /src/arch/mips/mips.c: -------------------------------------------------------------------------------- 1 | #include "mips.h" 2 | 3 | struct dis *mips_disassemble(int mode, struct trie_node *node, u8 * stream, 4 | long max, uint64_t addr) 5 | { 6 | if (max < 4) { 7 | return NULL; 8 | } 9 | (void) mode; 10 | (void) addr; 11 | uint32_t instruction = *((uint32_t *) stream); 12 | 13 | unsigned char opcode = OPCODE(instruction); 14 | struct trie_node *n = trie_lookup(node, &opcode, 1); 15 | /*Floating point instructions may have the rs field as an opcode extension */ 16 | if (CHECK_FLAG(n->flags, INSTR_RSEXT)) { 17 | opcode = RS(instruction); 18 | n = trie_lookup(n, &opcode, 1); 19 | } 20 | /*Some instructions have a func field that specifies the mnemonic */ 21 | if (CHECK_FLAG(n->flags, INSTR_FUNC)) { 22 | opcode = FUNC(instruction); 23 | n = trie_lookup(n, &opcode, 1); 24 | } 25 | 26 | if (!n || !n->value) { 27 | return NULL; 28 | } 29 | 30 | struct mips_instr_entry *e = n->value; 31 | struct dis *disas = dis_init(); 32 | 33 | memcpy(disas->mnemonic, e->mnemonic, strlen(e->mnemonic)); 34 | mips_decode_operands(disas, e, instruction, n->flags); 35 | 36 | disas->used_bytes = 4; 37 | return disas; 38 | } 39 | 40 | void mips_decode_operands(struct dis *disas, struct mips_instr_entry *e, 41 | uint32_t instruction, u8 flags) 42 | { 43 | switch (e->instr_type) { 44 | /*Register Type */ 45 | case 'R': 46 | if (!CHECK_FLAG(flags, INSTR_NORS)) 47 | dis_add_operand(disas, 48 | operand_reg(mips_registers 49 | [RD(instruction)])); 50 | if (!CHECK_FLAG(flags, INSTR_NORT)) 51 | dis_add_operand(disas, 52 | operand_reg(mips_registers 53 | [RS(instruction)])); 54 | if (!CHECK_FLAG(flags, INSTR_NORD)) 55 | dis_add_operand(disas, 56 | operand_reg(mips_registers 57 | [RT(instruction)])); 58 | break; 59 | /*Immediate Type */ 60 | case 'I': 61 | /*Store and Load Instruction have a different disassembly format */ 62 | if (e->mnemonic[0] == 's' || e->mnemonic[0] == 'l') { 63 | dis_add_operand(disas, 64 | operand_reg(mips_registers 65 | [RT(instruction)])); 66 | struct operand_tree *indir = 67 | operand_tree_init(DIS_BRANCH); 68 | operand_tree_add(indir, 69 | operand_reg(mips_registers 70 | [RS(instruction)])); 71 | operand_tree_add(indir, 72 | operand_imm((int64_t) (int16_t) 73 | IMM(instruction))); 74 | strncpy(TREE_FORMAT(indir), "$1($0)", FMT_SIZE); 75 | dis_add_operand(disas, indir); 76 | } else { 77 | dis_add_operand(disas, 78 | operand_reg(mips_registers 79 | [RT(instruction)])); 80 | dis_add_operand(disas, 81 | operand_reg(mips_registers 82 | [RS(instruction)])); 83 | dis_add_operand(disas, 84 | operand_imm((int64_t) (int16_t) 85 | IMM(instruction))); 86 | } 87 | break; 88 | /*Jump type */ 89 | case 'J': 90 | dis_add_operand(disas, operand_addr(ADDR(instruction))); 91 | break; 92 | /*Floating point type */ 93 | case 'F': 94 | dis_add_operand(disas, 95 | operand_reg(mips_fp_registers 96 | [SHAMT(instruction)])); 97 | if (!CHECK_FLAG(flags, INSTR_NORD)) 98 | dis_add_operand(disas, 99 | operand_reg(mips_fp_registers 100 | [RD(instruction)])); 101 | if (!CHECK_FLAG(flags, INSTR_NORT)) 102 | dis_add_operand(disas, 103 | operand_reg(mips_fp_registers 104 | [RT(instruction)])); 105 | break; 106 | } 107 | } 108 | -------------------------------------------------------------------------------- /src/gen/gen.c: -------------------------------------------------------------------------------- 1 | #include "gen.h" 2 | 3 | int get_line(FILE *f, char *buf, long max) 4 | { 5 | memset(buf, 0, max); 6 | char c; 7 | int iter = 0; 8 | int eof = 0; 9 | while ((c= (char)fgetc(f)) != '\n' && !(eof = feof(f))) 10 | buf[iter++] = c; 11 | return eof; 12 | } 13 | 14 | void print_type(FILE *hfile, FILE *cfile, char *name, char **types, int nt) 15 | { 16 | fprintf(hfile, "enum %s_types {\n", name); 17 | for (int i = 0; i < nt; i++) { 18 | fprintf(hfile, "\tt_%s_%s", name, types[i]); 19 | if (i==0) fprintf(hfile, "=2"); 20 | if ((i+1)!=nt) 21 | fprintf(hfile, ","); 22 | fprintf(hfile, "\n"); 23 | } 24 | fprintf(hfile, "};\n"); 25 | 26 | fprintf(hfile, "extern const int %s_type[%d];\n", name, nt+1); 27 | 28 | fprintf(cfile, "const int %s_type[] = {\n", name); 29 | 30 | for (int i = 0; i < nt; i++) { 31 | fprintf(cfile, "\tt_%s_%s,\n", name, types[i]); 32 | } 33 | fprintf(cfile, "\t0\n"); 34 | 35 | fprintf(cfile, "};\n"); 36 | } 37 | 38 | void print_symt(FILE *header, FILE *cfile, char *name, char **symbols, int ns) 39 | { 40 | fprintf(header, "extern const char *%s_sym[%d];\n", name, ns+1); 41 | 42 | fprintf(cfile, "const char *%s_sym[] = {\n", name); 43 | 44 | for (int i = 0; i < ns; i++) { 45 | fprintf(cfile, "\t\"%s\",\n", symbols[i]); 46 | } 47 | fprintf(cfile, "\t0\n"); 48 | 49 | fprintf(cfile, "};\n"); 50 | } 51 | 52 | void parse_symfile(char *filename, FILE *header, FILE *cfile) 53 | { 54 | FILE *fp = fopen(filename, "r"); 55 | if (!fp) { 56 | printf("ERROR OPENING FILE %s\n", filename); 57 | exit(0); 58 | } 59 | char **symbols = NULL, **types = NULL; 60 | int ns = 0, nt = 0; 61 | 62 | int line = 0; 63 | char buf[64]; 64 | while (!(get_line(fp, buf, 64))) { 65 | char * mid = strchr(buf, ':'); 66 | if (!mid) continue; 67 | mid++; 68 | char * sp = strchr(buf, '\"'); 69 | if (!sp) continue; 70 | sp++; 71 | char *ep = strchr(sp, '\"'); 72 | if (!ep) continue; 73 | ep[0] = 0; 74 | while (*mid == ' ') mid++; 75 | char *me = mid; 76 | while (isalpha(*me)) me++; 77 | *me = 0; 78 | 79 | ns++, nt++; 80 | if (!symbols) symbols = malloc(sizeof(char*)); 81 | else symbols = realloc(symbols, sizeof(char*)*ns); 82 | if (!types) types = malloc(sizeof(char*)); 83 | else types = realloc(types, sizeof(char*)*nt); 84 | 85 | int len = strlen(sp); 86 | char *sym = malloc(len+1); 87 | strncpy(sym, sp, len); 88 | sym[len] = 0; 89 | symbols[ns-1] = sym; 90 | len = strlen(mid); 91 | char *typ = malloc(len+1); 92 | strncpy(typ, mid, len); 93 | typ[len] = 0; 94 | types[nt-1] = typ; 95 | 96 | line++; 97 | } 98 | int flen = strlen(filename); 99 | char *fdup = malloc(flen+1); 100 | strncpy(fdup, filename, flen); 101 | fdup[flen] = 0; 102 | char *dot = strchr(fdup, '.'); 103 | if (dot) *dot = 0; 104 | 105 | print_type(header, cfile, fdup, types, nt); 106 | print_symt(header, cfile, fdup, symbols, ns); 107 | 108 | for (int i = 0; i < ns; i++) 109 | free(symbols[ns]); 110 | for (int i = 0; i < nt; i++) 111 | free(types[nt]); 112 | free(symbols); 113 | free(types); 114 | fclose(fp); 115 | } 116 | 117 | int main(int argc, char **argv) 118 | { 119 | if (argc < 2) return 1; 120 | 121 | FILE * header = fopen("../sym.h", "w"), *cfile = fopen("../sym.c", "w"); 122 | if (!header || !cfile) { 123 | printf("ERROR OPENING FILES\n"); 124 | return 1; 125 | } 126 | fprintf(header, "#ifndef SYM_H\n#define SYM_H\n\n"); 127 | fprintf(header, "/*Automatically Generated By gen.h&gen.c using all the .sym files*/\n\n"); 128 | fprintf(cfile, "#include \"sym.h\"\n\n"); 129 | char cbuf[64]; 130 | for (int i = 1; i < argc; i++) { 131 | int l = strlen(argv[i]); 132 | for (int j = 0; j < l && argv[i][j] != '.'; j++) 133 | cbuf[j] = toupper(argv[i][j]); 134 | cbuf[l] = 0; 135 | fprintf(header, "#define %s_IDX %d\n", cbuf, i-1); 136 | 137 | } 138 | for (int i = 1; i < argc; i++) 139 | parse_symfile(argv[i], header, cfile); 140 | 141 | fprintf(header, "extern const char **symbol_tt[%d];\n", argc-1); 142 | fprintf(cfile, "const char **symbol_tt[] = {\n"); 143 | for (int i = 1; i < argc; i++) { 144 | int l = strlen(argv[i]); 145 | for (int j = 0; j < l && argv[i][j] != '.'; j++) 146 | cbuf[j] = argv[i][j]; 147 | cbuf[l] = 0; 148 | fprintf(cfile, "\t(const char**)&%s_sym%s", cbuf, (i+1)==argc?"\n":",\n"); 149 | } 150 | fprintf(cfile, "};"); 151 | 152 | fprintf(header, "extern const int *symtype_tt[%d];\n", argc-1); 153 | fprintf(cfile, "const int *symtype_tt[] = {\n"); 154 | for (int i = 1; i < argc; i++) { 155 | int l = strlen(argv[i]); 156 | for (int j = 0; j < l && argv[i][j] != '.'; j++) 157 | cbuf[j] = argv[i][j]; 158 | cbuf[l] = 0; 159 | fprintf(cfile, "\t(const int*)&%s_type%s", cbuf, (i+1)==argc?"\n":",\n"); 160 | } 161 | fprintf(cfile, "};"); 162 | 163 | fprintf(header, "\n#endif\n"); 164 | fclose(header); 165 | fclose(cfile); 166 | 167 | return 0; 168 | } 169 | -------------------------------------------------------------------------------- /src/main.c: -------------------------------------------------------------------------------- 1 | #include "disas.h" 2 | #include "dss.h" 3 | //#include "bfile.h" 4 | //#include "file/elf.h" 5 | #include 6 | #include 7 | #include 8 | 9 | void print_help(char *pn) 10 | { 11 | printf("Usage: %s options filename\n", pn); 12 | printf("\t--arch= Set architecture to be disassembled (x86, arm, or mips\n"); 13 | printf("\t--mode= Set the architecture mode (32 or 64)\n"); 14 | printf("\t--addr= Set a starting address\n"); 15 | printf("\t-a convert ascii to hex\n"); 16 | printf("\t-A assemble\n"); 17 | printf("If no file is specified stdin will be used\n"); 18 | } 19 | 20 | int get_arch(const char *arch) 21 | { 22 | if (!strcmp(arch, "x86")) return X86_ARCH; 23 | if (!strcmp(arch, "arm")) return ARM_ARCH; 24 | if (!strcmp(arch, "mips")) return MIPS_ARCH; 25 | printf("Invalid architecture\n"); 26 | exit(1); 27 | } 28 | 29 | int get_mode(const char *mode) 30 | { 31 | if (!strcmp(mode, "32")) return MODE_32B; 32 | if (!strcmp(mode, "64")) return MODE_64B; 33 | printf("Invalid mode\n"); 34 | exit(1); 35 | } 36 | 37 | void disas(int arch, int mode, unsigned char *bytes, long max, uint64_t addr) 38 | { 39 | struct disassembler *ds = ds_init(arch, mode); 40 | 41 | ds_decode(ds, bytes, max, addr); 42 | struct dis *dis = NULL; 43 | int biter = 0; 44 | DS_FOREACH(ds, dis) { 45 | printf("%#08lx:\t", dis->address); 46 | for (int m=0,t=dis->used_bytes;m<10;m++,t=mused_bytes) 47 | t?printf("%02x ",bytes[biter++]):printf(" "); 48 | printf("\t%s\t%s\n",dis->mnemonic, dis->op_squash); 49 | struct hash_entry *e = hash_table_lookup(ds->sem_table, dis->mnemonic); 50 | if (!e) continue; 51 | //print_semantics(dis, e->value); 52 | } 53 | 54 | ds_destroy(ds); 55 | } 56 | 57 | int main(int argc, char **argv) 58 | { 59 | /* 60 | if (argc < 2) return 1; 61 | struct bfile *file = bfile_init(); 62 | FILE *f = fopen(argv[1], "r"); 63 | fseek(f, 0, SEEK_END); 64 | long size = ftell(f); 65 | rewind(f); 66 | unsigned char *buffer = malloc(size); 67 | fread(buffer, size, 1, f); 68 | elf_parse(file, buffer, size); 69 | struct bshx *text = bfile_get_shx(file, ".text"); 70 | if (text) { 71 | for (int i = 0; i < text->size; i++) 72 | printf("%02x", buffer[text->offset + i]); 73 | } 74 | 75 | fclose(f); 76 | bfile_destroy(file); 77 | return 0; 78 | */ 79 | ///* 80 | if (argc < 2) { 81 | printf("%s: No target specified\n%s: Use --help for more information.\n", argv[0], argv[0]); 82 | } 83 | char *file = NULL, *archs = NULL, *modes = NULL, *addrs = NULL; 84 | int ascii = 0, disassemble = 1; 85 | for (int i = 1; i < argc; i++) { 86 | if (!strncmp(argv[i], "--", 2)) { 87 | if (!strcmp(argv[i]+2, "help")) { 88 | print_help(argv[0]); 89 | } else if (!strncmp(argv[i]+2, "arch=", 5)) { 90 | archs = argv[i]+7; 91 | } else if (!strncmp(argv[i]+2, "mode=", 5)) { 92 | modes = argv[i]+7; 93 | } else if (!strncmp(argv[i]+2, "addr=", 5)) { 94 | addrs = argv[i]+7; 95 | } else { 96 | printf("%s not a valid argument. Use --help to find valid arguments\n", argv[i]); 97 | } 98 | } else if (argv[i][0] == '-') { 99 | int len = strlen(argv[i]); 100 | for (int j = 1; j < len; j++) { 101 | if (argv[i][j] == 'a') { 102 | ascii = 1; 103 | } else if (argv[i][j] == 'A') { 104 | disassemble = 0; 105 | } else { 106 | printf("-%c not a valid argument. Use --help to find valid arguments\n", argv[i][j]); 107 | } 108 | } 109 | } else if (!file){ 110 | file = argv[i]; 111 | } 112 | } 113 | if (!archs || !modes) { 114 | printf("Must specify architecture and mode\n"); 115 | exit(1); 116 | } 117 | int arch = get_arch(archs), mode = get_mode(modes); 118 | uint64_t addr = 0; 119 | if (addrs) 120 | addr = strtol(addrs, NULL, 0); 121 | 122 | int fd = STDIN_FILENO; 123 | if (file) 124 | fd = open(file, O_RDWR, 0666); 125 | if (fd == -1) { 126 | printf("Error opening file\n"); 127 | exit(1); 128 | } 129 | unsigned char byte = 0; 130 | int iter = 0; 131 | int blen = 100; 132 | unsigned char *bbuf = malloc(blen); 133 | memset(bbuf, 0, blen); 134 | while (read(fd, &byte, 1) > 0) { 135 | if ((iter+1) >= blen) { 136 | blen += 100; 137 | bbuf = realloc(bbuf, blen); 138 | memset(bbuf+iter, 0, blen-iter); 139 | } 140 | if (ascii) { 141 | if ((byte>='a'&&byte<='f') || (byte>='A'&&byte<='F')|| (byte >= 0x30 && byte <= 0x39)) 142 | bbuf[iter++] = byte; 143 | } else { 144 | bbuf[iter++] = byte; 145 | } 146 | } 147 | if (ascii) { 148 | unsigned char *abuf = malloc(iter/2+1); 149 | iter = ascii_to_hex(abuf, (char*)bbuf, iter); 150 | free(bbuf); 151 | bbuf = abuf; 152 | } 153 | if (!disassemble) { 154 | struct disassembler *ds = ds_init(arch, mode); 155 | ds_asm(ds, (char*)bbuf); 156 | 157 | ds_destroy(ds); 158 | } else { 159 | disas(arch, mode, (unsigned char *)bbuf, iter, addr); 160 | } 161 | 162 | free(bbuf); 163 | close(fd); 164 | return 0; 165 | //*/ 166 | } 167 | -------------------------------------------------------------------------------- /src/dis.c: -------------------------------------------------------------------------------- 1 | #include "dis.h" 2 | 3 | struct dis *dis_init() 4 | { 5 | struct dis *dis = malloc(sizeof(struct dis)); 6 | 7 | dis->id = 0; 8 | dis->address = 0; 9 | dis->operands = NULL; 10 | dis->num_operands = 0; 11 | memset(dis->mnemonic, 0, MNEM_SIZE); 12 | memset(dis->op_squash, 0, SQUASH_SIZE); 13 | memset(dis->group, 0, GROUP_SIZE); 14 | 15 | return dis; 16 | } 17 | 18 | void dis_destroy(struct dis *disasm) 19 | { 20 | if (!disasm) 21 | return; 22 | 23 | for (int i = 0; i < disasm->num_operands; i++) { 24 | operand_tree_destroy(disasm->operands[i]); 25 | } 26 | 27 | free(disasm->operands); 28 | free(disasm); 29 | } 30 | 31 | void dis_add_operand(struct dis *dis, struct operand_tree *tree) 32 | { 33 | if (!dis) 34 | return; 35 | dis->num_operands++; 36 | if (!dis->operands) 37 | dis->operands = malloc(sizeof(struct operand_tree *)); 38 | else 39 | dis->operands = 40 | realloc(dis->operands, 41 | sizeof(struct operand_tree *) * 42 | dis->num_operands); 43 | dis->operands[dis->num_operands - 1] = tree; 44 | } 45 | 46 | struct operand_tree *operand_tree_init(int type) 47 | { 48 | struct operand_tree *tree = malloc(sizeof(struct operand_tree)); 49 | 50 | tree->type = type; 51 | if (type == DIS_OPER) { 52 | tree->body.operand.operand_type = DIS_UNSET; 53 | } else if (type == DIS_BRANCH) { 54 | tree->body.op_tree.num_operands = 0; 55 | tree->body.op_tree.operands = NULL; 56 | memset(tree->body.op_tree.format, 0, FMT_SIZE); 57 | } else { 58 | //error 59 | } 60 | 61 | return tree; 62 | } 63 | 64 | void operand_tree_destroy(struct operand_tree *node) 65 | { 66 | if (!node) 67 | return; 68 | 69 | if (node->type == DIS_BRANCH) { 70 | for (int i = 0; i < TREE_NCHILD(node); i++) { 71 | operand_tree_destroy(TREE_CHILD(node, i)); 72 | } 73 | free(node->body.op_tree.operands); 74 | } 75 | free(node); 76 | } 77 | 78 | void operand_tree_fmt(struct operand_tree *node, const char *fmt, ...) 79 | { 80 | if (TREE_TYPE(node) != DIS_BRANCH) return; 81 | int iter = strlen(TREE_FORMAT(node)); 82 | va_list args; 83 | va_start(args, fmt); 84 | vsnprintf(TREE_FORMAT(node)+iter, FMT_SIZE-iter, fmt, args); 85 | va_end(args); 86 | } 87 | 88 | void operand_tree_add(struct operand_tree *node, 89 | struct operand_tree *child) 90 | { 91 | if (!node || node->type != DIS_BRANCH) 92 | return; 93 | TREE_NCHILD(node)++; 94 | if (!node->body.op_tree.operands) 95 | node->body.op_tree.operands = 96 | malloc(sizeof(struct operand_tree *)); 97 | else 98 | node->body.op_tree.operands = 99 | realloc(node->body.op_tree.operands, 100 | sizeof(struct operand_tree *) * 101 | TREE_NCHILD(node)); 102 | TREE_CHILD(node, TREE_NCHILD(node) - 1) = child; 103 | } 104 | 105 | struct operand_tree *operand_reg(const char *reg) 106 | { 107 | struct operand_tree *tree = operand_tree_init(DIS_OPER); 108 | 109 | tree->body.operand.operand_type = DIS_REG; 110 | if (!reg) 111 | return tree; 112 | long len = strlen(reg); 113 | len = len >= REG_SIZE ? (REG_SIZE - 2) : len; 114 | memcpy(TREE_REG(tree), reg, len); 115 | TREE_REG(tree)[len] = 0; 116 | 117 | return tree; 118 | } 119 | 120 | struct operand_tree *operand_imm(const u64 imm) 121 | { 122 | struct operand_tree *tree = operand_tree_init(DIS_OPER); 123 | 124 | tree->body.operand.operand_type = DIS_IMM; 125 | TREE_IMM(tree) = imm; 126 | 127 | return tree; 128 | } 129 | 130 | struct operand_tree *operand_addr(const u64 addr) 131 | { 132 | struct operand_tree *tree = operand_tree_init(DIS_OPER); 133 | 134 | tree->body.operand.operand_type = DIS_ADDR; 135 | TREE_ADDR(tree) = addr; 136 | 137 | return tree; 138 | } 139 | 140 | void dis_squash(struct dis *dis) 141 | { 142 | int iter = 0; 143 | for (int i = 0; i < dis->num_operands; i++) { 144 | iter += 145 | operand_squash(dis->op_squash + iter, 146 | SQUASH_SIZE - iter, dis->operands[i]); 147 | if ((i + 1) < dis->num_operands) 148 | iter += 149 | snprintf(dis->op_squash + iter, 150 | SQUASH_SIZE - iter, ", "); 151 | } 152 | } 153 | 154 | int operand_squash(char *buf, long max, struct operand_tree *tree) 155 | { 156 | if (!tree) 157 | return 0; 158 | 159 | long iter = 0; 160 | if (tree->type == DIS_OPER) { 161 | if (tree->body.operand.operand_type == DIS_ADDR) { 162 | iter += 163 | snprintf(buf + iter, max - iter, "%#"PRIx64, 164 | TREE_ADDR(tree)); 165 | } else if (tree->body.operand.operand_type == DIS_IMM) { 166 | int sign = SIGNED(TREE_IMM(tree)); 167 | if (sign) 168 | iter += 169 | snprintf(buf + iter, max - iter, "-"); 170 | iter += 171 | snprintf(buf + iter, max - iter, "%#"PRIX64, 172 | SIGN(TREE_IMM(tree))); 173 | } else if (tree->body.operand.operand_type == DIS_REG) { 174 | iter += 175 | snprintf(buf + iter, max - iter, "%s", 176 | TREE_REG(tree)); 177 | } 178 | } else if (tree->type == DIS_BRANCH) { 179 | if (!TREE_FORMAT(tree)) { 180 | for (int i = 0; i < TREE_NCHILD(tree); i++) { 181 | iter += 182 | operand_squash(buf + iter, max - iter, 183 | TREE_CHILD(tree, i)); 184 | } 185 | return iter; 186 | } 187 | char *format = TREE_FORMAT(tree); 188 | int flen = strlen(format), flast = 0; 189 | for (int i = 0; i < flen; i++) { 190 | if (format[i] == '$') { 191 | if (i - flast) { 192 | iter += 193 | snprintf(buf + iter, 194 | max - iter, "%*.*s", 195 | i - flast, i - flast, 196 | format + flast); 197 | } 198 | int num = 199 | (i + 1) < 200 | flen ? (signed int) format[i + 1] - 201 | 0x30 : -1; 202 | if (num >= 0 && num < 10 203 | && num < TREE_NCHILD(tree)) { 204 | iter += 205 | operand_squash(buf + iter, 206 | max - iter, 207 | TREE_CHILD(tree, 208 | num)); 209 | } 210 | i++; 211 | flast = i + 1; 212 | continue; 213 | } 214 | if ((i + 1) == flen) 215 | iter += 216 | snprintf(buf + iter, max - iter, "%s", 217 | format + flast); 218 | } 219 | } 220 | return iter; 221 | } 222 | -------------------------------------------------------------------------------- /src/dss.c: -------------------------------------------------------------------------------- 1 | #include "dss.h" 2 | 3 | const char *dss_instr_type_str[6] = { 4 | "arithmetic", "logical", "data processing", "stack", "conditional branch", "unconditional branch" 5 | }; 6 | 7 | void parse_sem_file(const char *file, struct hash_table *stable) 8 | { 9 | FILE *fp = fopen(file, "r"); 10 | if (!fp) { 11 | printf("Error opening semantic file %s\n", file); 12 | return; 13 | } 14 | fseek(fp, 0, SEEK_END); 15 | int len = ftell(fp); 16 | if (len < 0) { 17 | printf("Error seeking in file\n"); 18 | fclose(fp); 19 | return; 20 | } 21 | rewind(fp); 22 | char *buffer = malloc(len+1); 23 | memset(buffer, 0, len); 24 | if (fread(buffer, 1, len, fp) != (size_t)len) { 25 | printf("Error reading from file\n"); 26 | fclose(fp); 27 | free(buffer); 28 | return; 29 | } 30 | buffer[len] = 0; 31 | char *tbuf = buffer; /*Malleable ptr for parsing function*/ 32 | struct dsem *sem = NULL; 33 | while ((sem=parse_semantic(&tbuf))) { 34 | hash_table_insert(stable, hash_entry_init(sem->mnemonic, sem)); 35 | } 36 | free(buffer); 37 | fclose(fp); 38 | } 39 | 40 | struct dsem *parse_semantic(char **buffer) 41 | { 42 | char *buf = *buffer; 43 | if (!buf) return NULL; 44 | char *save_ptr = strchr(buf, '\n'); 45 | char *line = buf; 46 | if (!line || !save_ptr) return NULL; 47 | *save_ptr = 0; 48 | save_ptr++; 49 | struct dsem *sem = NULL; 50 | do { 51 | if (!sem) {/*If semantic is NULL, try and the starting line of "[mnem] grp-id"*/ 52 | char *dstr = strchr(line, ']'); 53 | if (!dstr) continue; 54 | dstr[0] = 0; 55 | dstr++; 56 | char *mstr = strchr(line, '['); 57 | if (!mstr) continue; 58 | mstr++; 59 | int v = strtol(dstr, NULL, 10); 60 | sem = dsem_init(mstr, v); 61 | /*Otherwise try and read the read/write flag/operand lines*/ 62 | } else if (!strncmp(line, "o:", 2)) { 63 | parse_rwoperands(sem, line+2); 64 | } else if (!strncmp(line, "i:", 2)) { 65 | parse_rwimplied(sem, line+2); 66 | } else if (!strncmp(line, "f:", 2)) { 67 | parse_rwflags(sem, line+2); 68 | } 69 | } while ((line=save_ptr) && (save_ptr=strchr(save_ptr, '\n')) && (*save_ptr=0,save_ptr++) && !!strncmp(line, "end", 3)); 70 | *buffer = save_ptr; 71 | return sem; 72 | } 73 | 74 | void parse_rwoperands(struct dsem *sem, char *line) 75 | { 76 | int len = strlen(line); 77 | char *val = line; 78 | while (val!=(line+len)) { 79 | char *rw = strchr(val, '$'); 80 | if (!rw) break; 81 | rw++; 82 | val = strchr(val, '='); 83 | if (!val) break; 84 | val++; 85 | val = strchr(val, '$'); 86 | if (!val) break; 87 | val++; 88 | int amask = 0; 89 | while (*rw=='r'||*rw=='w') { 90 | amask |= *rw=='r'?DSEM_READ:DSEM_WRITE; 91 | rw++; 92 | } 93 | dsem_add(sem, strtol(val, NULL, 0), amask); 94 | } 95 | } 96 | 97 | void parse_rwimplied(struct dsem *sem, char *line) 98 | { 99 | int len = strlen(line); 100 | char *val = line; 101 | while (val != (line+len)) { 102 | char *rw = strchr(val, '$'); 103 | if (!rw) break; 104 | rw++; 105 | val = strchr(val, '='); 106 | if (!val) break; 107 | val++; 108 | val = strchr(val, '\"'); 109 | if (!val) break; 110 | val++; 111 | int amask = 0; 112 | while (*rw == 'r' || *rw == 'w') { 113 | amask |= *rw == 'r'?DSEM_READ:DSEM_WRITE; 114 | rw++; 115 | } 116 | char *vend = strchr(val, '\"'); 117 | if (!vend) break; 118 | *vend = '\0'; 119 | dsem_addi(sem, val, amask); 120 | } 121 | } 122 | 123 | void parse_rwflags(struct dsem *sem, char *line) 124 | { 125 | int len = strlen(line); 126 | char *val = line; 127 | while (val && val!=(line+len) && *val) { 128 | char *ft = strchr(val, '$'); 129 | if (!ft) return; 130 | val = strchr(val, '='); 131 | if (!val) return; 132 | val++; 133 | unsigned char flags = 0; 134 | int fv = 0; 135 | while ((fv=val[0])) { 136 | flags |= FVAL(fv); 137 | val++; 138 | } 139 | if (!strncmp(ft, "$mf", 3)) 140 | sem->mflags |= flags; 141 | if (!strncmp(ft, "$rf", 3)) 142 | sem->rflags |= flags; 143 | } 144 | } 145 | 146 | struct dsem *dsem_init(char *mnemonic, int group) 147 | { 148 | struct dsem *sem = malloc(sizeof(struct dsem)); 149 | sem->group = group, sem->rflags = 0, sem->mflags = 0; 150 | int len = strlen(mnemonic); 151 | sem->mnemonic = malloc(len+1); 152 | strncpy(sem->mnemonic, mnemonic, len); 153 | sem->mnemonic[len] = 0; 154 | sem->read = NULL, sem->nread = 0; 155 | sem->write = NULL, sem->nwrite = 0; 156 | sem->implw = NULL, sem->nimplw = 0; 157 | sem->implr = NULL, sem->nimplr = 0; 158 | return sem; 159 | } 160 | 161 | void dsem_destroy(struct dsem *sem) 162 | { 163 | if (!sem) return; 164 | for (int i = 0; i < sem->nimplr; i++) 165 | free(sem->implr[i]); 166 | for (int i = 0; i < sem->nimplw; i++) 167 | free(sem->implw[i]); 168 | free(sem->implr); 169 | free(sem->implw); 170 | free(sem->read); 171 | free(sem->write); 172 | free(sem->mnemonic); 173 | free(sem); 174 | } 175 | 176 | void dsem_add(struct dsem *sem, int val, int rw) 177 | { 178 | if (rw & DSEM_READ) { 179 | sem->nread++; 180 | if (!sem->read) 181 | sem->read=malloc(sizeof(int)); 182 | else 183 | sem->read=realloc(sem->read, sizeof(int)*sem->nread); 184 | sem->read[sem->nread-1] = val; 185 | } 186 | if (rw & DSEM_WRITE) { 187 | sem->nwrite++; 188 | if (!sem->write) 189 | sem->write=malloc(sizeof(int)); 190 | else 191 | sem->write=realloc(sem->write, sizeof(int)*sem->nwrite); 192 | sem->write[sem->nwrite-1] = val; 193 | } 194 | } 195 | 196 | void dsem_addi(struct dsem *sem, char *val, int rw) 197 | { 198 | int len = strlen(val); 199 | if (rw & DSEM_READ) { 200 | sem->nimplr++; 201 | if (!sem->implr) 202 | sem->implr=malloc(sizeof(char*)); 203 | else 204 | sem->implr=realloc(sem->implr, sizeof(int)*sem->nimplr); 205 | char *vc = malloc(len+1); 206 | strncpy(vc, val, len); 207 | vc[len] = 0; 208 | sem->implr[sem->nimplr-1] = vc; 209 | } 210 | if (rw & DSEM_WRITE) { 211 | sem->nimplw++; 212 | if (!sem->implw) 213 | sem->implw=malloc(sizeof(char*)); 214 | else 215 | sem->implw=realloc(sem->implw, sizeof(char*)*sem->nimplw); 216 | char *vc = malloc(len+1); 217 | strncpy(vc, val, len); 218 | vc[len] = 0; 219 | sem->implw[sem->nimplw-1] = vc; 220 | } 221 | } 222 | 223 | void dsem_print(struct dsem *sem) 224 | { 225 | if (!sem) return; 226 | printf("MNEMONIC: %s\tGROUP: %d : %s\n", sem->mnemonic, sem->group, dss_instr_type_str[sem->group]); 227 | printf("READ: "); 228 | for (int i = 0; i < sem->nread; i++) { 229 | printf("opr_%d%c", sem->read[i], (i+1)==sem->nread?'\n':','); 230 | } 231 | printf("WRITE: "); 232 | for (int i = 0; i < sem->nwrite; i++) { 233 | printf("opr_%d%c", sem->write[i], (i+1)==sem->nwrite?'\n':','); 234 | } 235 | printf("Modified Flags: "); 236 | for (int i = 7; sem->mflags && i >= 0; i--) { 237 | int val = sem->mflags & (1 << i); 238 | printf("%c", VALF(val)); 239 | } 240 | printf("\n"); 241 | printf("Read Flags: "); 242 | for (int i = 7; sem->rflags && i >= 0; i--) { 243 | int val = sem->rflags & (1 << i); 244 | printf("%c", VALF(val)); 245 | } 246 | printf("\n"); 247 | } 248 | 249 | void print_semantics(struct dis *dis, struct dsem *sem) 250 | { 251 | if (!dis || !sem) return; 252 | printf("Disassembly: \"%s %s\"\n", dis->mnemonic, dis->op_squash); 253 | printf("Mnemonic: %s\nSemantic Group: %s\n", sem->mnemonic, dss_instr_type_str[sem->group]); 254 | char buf[64]; 255 | if (sem->nread) 256 | printf("READ: "); 257 | for (int i = 0; i < sem->nread; i++) { 258 | if (sem->read[i] >= dis->num_operands) continue; 259 | operand_squash(buf, 64, dis->operands[sem->read[i]]); 260 | printf("%s%c", buf, (i+1)==sem->nread?'\n':','); 261 | } 262 | if (sem->nwrite) 263 | printf("WRITE: "); 264 | for (int i = 0; i < sem->nwrite; i++) { 265 | if (sem->write[i] >= dis->num_operands) continue; 266 | operand_squash(buf, 64, dis->operands[sem->write[i]]); 267 | printf("%s%c", buf, (i+1)==sem->nwrite?'\n':','); 268 | } 269 | if (sem->nimplr) 270 | printf("IMPLIED READ: "); 271 | for (int i = 0; i < sem->nimplr; i++) { 272 | printf("%s%c", sem->implr[i], (i+1)==sem->nimplr?'\n':','); 273 | } 274 | if (sem->nimplw) 275 | printf("IMPLIED WRITE: "); 276 | for (int i = 0; i < sem->nimplw; i++) { 277 | printf("%s%c", sem->implw[i], (i+1)==sem->nimplw?'\n':','); 278 | } 279 | if (sem->mflags) printf("Modified Flags: "); 280 | for (int i = 7; sem->mflags && i >= 0; i--) { 281 | int val = sem->mflags & (1 << i); 282 | printf("%c", VALF(val)); 283 | } 284 | if (sem->mflags) printf("\n"); 285 | if (sem->rflags) printf("Read Flags: "); 286 | for (int i = 7; sem->rflags && i >= 0; i--) { 287 | int val = sem->rflags & (1 << i); 288 | printf("%c", VALF(val)); 289 | } 290 | if (sem->rflags) printf("\n"); 291 | printf("\n"); 292 | } 293 | -------------------------------------------------------------------------------- /src/arch/arm/arm.c: -------------------------------------------------------------------------------- 1 | #include "arm.h" 2 | 3 | struct dis *arm_disassemble(int mode, struct trie_node *node, u8 * stream, 4 | long max, uint64_t addr) 5 | { 6 | (void) mode; 7 | if (max < 4) { 8 | return NULL; 9 | } 10 | 11 | uint32_t instruction = *((uint32_t *) stream); 12 | unsigned char cond = COND(instruction); 13 | 14 | unsigned char tbits = THREE_OBITS(instruction); 15 | struct trie_node *n = trie_lookup(node, &tbits, 1); 16 | /*Invalid Data processing commands avoid the ambiguous instruction encoding */ 17 | if (CHECK_FLAG(n->flags, D_CROSS) && VALID_DPROC(instruction)) { 18 | unsigned char opcode = DATA_OPCODE(instruction); 19 | n = trie_lookup(n, &opcode, 1); 20 | } else if (CHECK_FLAG(n->flags, D_CROSS) && IS_MULT(instruction)) { 21 | unsigned char opcode = ((DATA_OPCODE(instruction)&7)+1)<<4; 22 | n = trie_lookup(n, &opcode, 1); 23 | /*Differentiate between load and store instructions*/ 24 | } else if (CHECK_FLAG(n->flags, LDSTC)) { 25 | unsigned char sto = LD_L_FIELD(instruction); 26 | n = trie_lookup(n, &sto, 1); 27 | } 28 | if (CHECK_FLAG(n->flags, LDM_STM)) { 29 | unsigned char spu = LDMSTM_BITS(instruction); 30 | n = trie_lookup(n, &spu, 1); 31 | } 32 | 33 | if (!n || !n->value) { 34 | return NULL; 35 | } 36 | struct arm_instr_entry *e = n->value; 37 | 38 | struct dis *disas = dis_init(); 39 | /*The end mnemonic contains several fields*/ 40 | int miter = 0; 41 | miter += snprintf(disas->mnemonic, MNEM_SIZE, "%s", e->mnemonic); 42 | if (S_FIELD(instruction) && e->instr_type == DATA_PROCESS) 43 | miter += snprintf(disas->mnemonic+miter, MNEM_SIZE-miter, "s"); 44 | if (B_L_FIELD(instruction) && e->instr_type == BRANCH) 45 | miter += snprintf(disas->mnemonic+miter, MNEM_SIZE-miter, "l"); 46 | if (cond != ALWAYS_EXECUTE) 47 | snprintf(disas->mnemonic+miter, MNEM_SIZE-miter, "%s", arm_conditions[cond]); 48 | 49 | arm_decode_operands(disas, e, addr, instruction, n->flags); 50 | 51 | /*Apply aliases (eg: stmfd to push)*/ 52 | if (!strncmp(disas->mnemonic, "stmfd", 5) && disas->num_operands) { 53 | snprintf(disas->mnemonic, MNEM_SIZE, "push"); 54 | operand_tree_destroy(disas->operands[0]); 55 | disas->num_operands--; 56 | memmove(disas->operands,disas->operands+1,sizeof(struct operand_tree*)*disas->num_operands); 57 | disas->operands=realloc(disas->operands, sizeof(struct operand_tree*)*disas->num_operands); 58 | } if (!strncmp(disas->mnemonic, "ldmfd", 5) && disas->num_operands) { 59 | snprintf(disas->mnemonic, MNEM_SIZE, "pop"); 60 | operand_tree_destroy(disas->operands[0]); 61 | disas->num_operands--; 62 | memmove(disas->operands,disas->operands+1,sizeof(struct operand_tree*)*disas->num_operands); 63 | disas->operands=realloc(disas->operands, sizeof(struct operand_tree*)*disas->num_operands); 64 | } 65 | 66 | disas->used_bytes = 4; 67 | return disas; 68 | } 69 | 70 | void arm_decode_operands(struct dis *disas, struct arm_instr_entry *e, 71 | uint64_t addr, uint32_t instr, u8 flags) 72 | { 73 | (void)flags; 74 | switch (e->instr_type) { 75 | /*Data processing Instruction*/ 76 | case 'D': 77 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RD(instr)])); 78 | if (!CHECK_FLAG(flags, ARM_NORN)) 79 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RN(instr)])); 80 | arm_shifter_operand(disas, NULL, instr, e->instr_type); 81 | break; 82 | case 'I': 83 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RD(instr)])); 84 | if (!CHECK_FLAG(flags, ARM_NORN)) 85 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RN(instr)])); 86 | uint32_t rot = ARM_ROTATE_IMM(instr); 87 | uint32_t rimm = ARM_IMM8(instr); 88 | rimm = (rimm >> rot) | (rimm>>(32-rot)); 89 | dis_add_operand(disas, operand_imm(rimm)); 90 | break; 91 | /*Branch instruction (word aligned so botton 2 bits are always 0*/ 92 | case 'B':; 93 | dis_add_operand(disas, operand_addr((ARM_OFFSET24(instr)<<2)+(addr+8))); 94 | break; 95 | /*Load/Store Immediate Offset*/ 96 | case 'O': 97 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RD(instr)])); 98 | struct operand_tree *ioff = operand_tree_init(DIS_BRANCH); 99 | operand_tree_add(ioff, operand_reg(arm_registers[ARM_RN(instr)])); 100 | operand_tree_add(ioff, operand_imm(ARM_OFFSET12(instr))); 101 | operand_tree_fmt(ioff, "[$0"); 102 | int ineg = !ARM_ADDSUB(instr); 103 | if (ARM_PREINDEX(instr)) operand_tree_fmt(ioff, ",%s$1]", ineg?" -":" "); 104 | else operand_tree_fmt(ioff, "],%s$1", ineg?" -":" "); 105 | dis_add_operand(disas, ioff); 106 | break; 107 | /*Load/Store Register Offset*/ 108 | case 'R': 109 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RD(instr)])); 110 | struct operand_tree *ireg = operand_tree_init(DIS_BRANCH); 111 | operand_tree_add(ireg, operand_reg(arm_registers[ARM_RN(instr)])); 112 | operand_tree_add(ireg, operand_reg(arm_registers[ARM_RM(instr)])); 113 | operand_tree_fmt(ireg, "[$0"); 114 | int rneg = !ARM_ADDSUB(instr); 115 | if (ARM_PREINDEX(instr)) operand_tree_fmt(ireg, ",%s$1]", rneg?" -":" "); 116 | else operand_tree_fmt(ireg, "],%s$1", rneg?" -":" "); 117 | arm_shifter_operand(disas, ireg, instr, e->instr_type); 118 | dis_add_operand(disas, ireg); 119 | break; 120 | /*Load/Store Multiple*/ 121 | case 'L':; 122 | struct operand_tree *breg = operand_tree_init(DIS_BRANCH); 123 | if (LDST_W_FIELD(instr)) operand_tree_fmt(breg, "$0!"); 124 | else operand_tree_fmt(breg, "$0"); 125 | operand_tree_add(breg, operand_reg(arm_registers[ARM_RN(instr)])); 126 | dis_add_operand(disas, breg); 127 | struct operand_tree *rlist = operand_tree_init(DIS_BRANCH); 128 | uint16_t rl = RLIST(instr); 129 | operand_tree_fmt(rlist, "{"); 130 | for (int i = 0; i < 16; i++) { 131 | if (rl & (1<1) 134 | operand_tree_fmt(rlist, ","); 135 | operand_tree_fmt(rlist, "$%d", TREE_NCHILD(rlist)-1); 136 | } 137 | } 138 | operand_tree_fmt(rlist, "}"); 139 | dis_add_operand(disas, rlist); 140 | break; 141 | /*Multiply*/ 142 | case 'M': 143 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RN(instr)])); 144 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RM(instr)])); 145 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RS(instr)])); 146 | break; 147 | /*Long multiply*/ 148 | case 'U': 149 | /*RdHi==Rn Rdlo==Rd */ 150 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RD(instr)])); 151 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RN(instr)])); 152 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RM(instr)])); 153 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RS(instr)])); 154 | break; 155 | /*Multiply Accumulate*/ 156 | case 'A': 157 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RN(instr)])); 158 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RM(instr)])); 159 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RS(instr)])); 160 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RD(instr)])); 161 | break; 162 | } 163 | } 164 | 165 | void arm_shifter_operand(struct dis *disas, struct operand_tree *opr, uint32_t instr, int type) 166 | { 167 | switch (type) { 168 | case 'D': 169 | dis_add_operand(disas, operand_reg(arm_registers[ARM_RM(instr)])); 170 | if (ARM_SHIFT_AMOUNT(instr)) { 171 | struct operand_tree *shift = operand_tree_init(DIS_BRANCH); 172 | unsigned char st = ARM_SHIFT(instr); 173 | unsigned char sa = ARM_SHIFT_AMOUNT(instr); 174 | unsigned char rm = ARM_RS(instr); 175 | if (st==ARM_LSLI) { 176 | operand_tree_add(shift, operand_imm(sa)); 177 | operand_tree_fmt(shift, "lsl #$0"); 178 | } else if (st == ARM_LSLR) { 179 | operand_tree_add(shift, operand_reg(arm_registers[rm])); 180 | operand_tree_fmt(shift, "lsl $0"); 181 | } else if (st==ARM_LSRI) { 182 | operand_tree_add(shift, operand_imm(sa)); 183 | operand_tree_fmt(shift, "lsr #$0"); 184 | } 185 | dis_add_operand(disas, shift); 186 | } 187 | break; 188 | case 'R': 189 | if (ARM_SHIFT_AMOUNT(instr)||ARM_SHIFT(instr)) { 190 | unsigned char st = ARM_SHIFT(instr); 191 | unsigned char si = ARM_SHIFT_AMOUNT(instr); 192 | (void)strtok(TREE_FORMAT(opr), "]"); 193 | if (st==ARM_LSLI) { 194 | operand_tree_add(opr, operand_imm(si)); 195 | operand_tree_fmt(opr, ", lsl #$2]"); 196 | //strcpy(fmt, ", lsl #$2]"); 197 | } else if (st==ARM_LSRI) { 198 | operand_tree_add(opr, operand_imm(!si?32:si)); 199 | operand_tree_fmt(opr, ", lsr #$2]"); 200 | //strcpy(fmt, ", lsr #$2]"); 201 | } else if (st==ARM_ASR) { 202 | operand_tree_add(opr, operand_imm(!si?32:si)); 203 | operand_tree_fmt(opr, ", asr #$2]"); 204 | //strcpy(fmt, ", asr #$2]"); 205 | } else if (st==ARM_ROR_RRX) { 206 | if (!si) { 207 | operand_tree_add(opr, operand_imm(si)); 208 | operand_tree_fmt(opr, ", ror #$2]"); 209 | //strcpy(fmt, ", ror #$2]"); 210 | } else { 211 | operand_tree_fmt(opr, ", rrx]"); 212 | //strcpy(fmt, ", rrx]"); 213 | } 214 | } 215 | } 216 | break; 217 | } 218 | } 219 | -------------------------------------------------------------------------------- /src/arch/x86/x86.ins: -------------------------------------------------------------------------------- 1 | 00 add Eb Gb 2 | 01 add Ev Gv 3 | 02 add Gb Eb 4 | 03 add Gv Ev 5 | 04 add al Ib 6 | 05 add eax Iv 7 | 06 push es 8 | 07 pop es 9 | 08 or Eb Gb 10 | 09 or Ev Gv 11 | 0a or Gb Eb 12 | 0b or Gv Ev 13 | 0c or al Ib 14 | 0d or eax Iv 15 | 0e push cs 16 | 10 adc Eb Gb 17 | 11 adc Ev Gv 18 | 12 adc Gb Eb 19 | 13 adc Gv Ev 20 | 14 adc al Ib 21 | 15 adc eax Iv 22 | 16 push ss 23 | 17 pop ss 24 | 18 sbb Eb Gb 25 | 19 sbb Ev Gv 26 | 1a sbb Gb Eb 27 | 1b sbb Gv Ev 28 | 1c sbb al Ib 29 | 1d sbb eax Iv 30 | 1e push ds 31 | 1f pop ds 32 | 20 and Eb Gb 33 | 21 and Ev Gv 34 | 22 and Gb Eb 35 | 23 and Gv Ev 36 | 24 and al Ib 37 | 25 and eax Iv 38 | 26 es f:4 f:8 39 | 27 daa 40 | 28 sub Eb Gb 41 | 29 sub Ev Gv 42 | 2a sub Gb Eb 43 | 2b sub Gv Ev 44 | 2c sub al Ib 45 | 2d sub eax Iv 46 | 2e cs f:4 f:8 47 | 2f das 48 | 30 xor Eb Gb 49 | 31 xor Ev Gv 50 | 32 xor Gb Eb 51 | 33 xor Gv Ev 52 | 34 xor al Ib 53 | 35 xor eax Iv 54 | 36 invalid 55 | 37 aaa 56 | 38 cmp Eb Gb 57 | 39 cmp Ev Gv 58 | 3a cmp Gb Eb 59 | 3b cmp Gv Ev 60 | 3c cmp al Ib 61 | 3d cmp eax Iv 62 | 3e 63 | 3f aas 64 | 40 inc eax 65 | 41 inc ecx 66 | 42 inc edx 67 | 43 inc ebx 68 | 44 inc esp 69 | 45 inc ebp 70 | 46 inc esi 71 | 47 inc edi 72 | 48 dec eax 73 | 49 dec ecx 74 | 4a dec edx 75 | 4b dec ebx 76 | 4c dec esp 77 | 4d dec ebp 78 | 4e dec esi 79 | 4f dec edi 80 | 50 push eax 81 | 51 push ecx 82 | 52 push edx 83 | 53 push ebx 84 | 54 push esp 85 | 55 push ebp 86 | 56 push esi 87 | 57 push edi 88 | 58 pop eax 89 | 59 pop ecx 90 | 5a pop edx 91 | 5b pop ebx 92 | 5c pop esp 93 | 5d pop ebp 94 | 5e pop esi 95 | 5f pop edi 96 | 60 pusha 97 | 61 popa 98 | 62 bound Gv Ma 99 | 63 arpl Ew Rw 100 | 64 101 | 65 102 | 66 ov f:4 103 | 67 av f:4 104 | 68 push Iv 105 | 69 imul Gv Ev Iv 106 | 6a push Ib 107 | 6b imul Gv Ev Iv 108 | 6c insb Yb dx 109 | 6d insw Yb dx 110 | 6e outsb Dx Xb 111 | 6f outsw dx Xv 112 | 70 jo Jb 113 | 71 jno Jb 114 | 72 jb Jb 115 | 73 jnb Jb 116 | 74 jz Jb 117 | 75 jnz Jb 118 | 76 jbe Jb 119 | 77 jnbe Jb 120 | 78 js Jb 121 | 79 jns Jb 122 | 7a jp Jb 123 | 7b jnp Jb 124 | 7c jl Jb 125 | 7d jge Jb 126 | 7e jle Jb 127 | 7f jg Jb 128 | 80 invalid f:2 129 | 8000 and Eb Ib 130 | 8008 or Eb Ib 131 | 8010 adc Eb Ib 132 | 8018 sbb Eb Ib 133 | 8020 and Eb Ib 134 | 8028 sub Eb Ib 135 | 8030 xor Eb Ib 136 | 8038 cmp Eb Ib 137 | 138 | 81 invalid f:2 139 | 8100 add Eb Iv 140 | 8108 or Eb Iv 141 | 8110 adc Eb Iv 142 | 8118 sbb Eb Iv 143 | 8120 and Eb Iv 144 | 8128 sub Eb Iv 145 | 8130 xor Eb Iv 146 | 8138 cmp Eb Iv 147 | 82 invalid f:2 148 | 8200 add Eb Ib 149 | 8208 or Eb Ib 150 | 8210 adc Eb Ib 151 | 8218 sbb Eb Ib 152 | 8220 and Eb Ib 153 | 8228 sub Eb Ib 154 | 8230 xor Eb Ib 155 | 8238 cmp Eb Ib 156 | 83 invalid f:2 157 | 8300 add Ev Ib 158 | 8308 or Ev Ib 159 | 8310 adc Ev Ib 160 | 8318 sbb Ev Ib 161 | 8320 and Ev Ib 162 | 8328 sub Ev Ib 163 | 8330 xor Ev Ib 164 | 8338 cmp Ev Ib 165 | 84 test Eb Gb 166 | 85 test Ev Gv 167 | 86 xchg Eb Gb 168 | 87 xchg Ev Gv 169 | 88 mov Eb Gb 170 | 89 mov Ev Gv 171 | 8a mov Gb Eb 172 | 8b mov Gv Ev 173 | 8c mov Ew Sw 174 | 8d lea Gv M 175 | 8e mov Sw Ew 176 | 8f pop Ev 177 | 90 nop 178 | 91 xchg ecx eax 179 | 92 xchg edx eax 180 | 93 xchg ebx eax 181 | 94 xchg esp eax 182 | 95 xchg ebp eax 183 | 96 xchg esi eax 184 | 97 xchg edi eax 185 | 98 cbw 186 | 99 cwd 187 | 9a call Ap 188 | 9b wait 189 | 9c pushf Fv 190 | 9d popf Fv 191 | 9e sahf 192 | 9f lahf 193 | a0 mov al Ob 194 | a1 mov eax Ov 195 | a2 mov Ob al 196 | a3 mov Ov eax 197 | a4 movsb Xb Yb 198 | a5 movsw Xv Yv 199 | a6 cmpsb Xb Yb 200 | a7 cmpsw Xv Yv 201 | a8 test al Ib 202 | a9 test eax Iv 203 | aa stosb Yb al 204 | ab stosw Yv eax 205 | ac lodsb al Xb 206 | ad lodsw eax Xv 207 | ae scasb al Xb 208 | af scasw eax Xv 209 | b0 mov al Ib 210 | b1 mov cl Ib 211 | b2 mov dl Ib 212 | b3 mov bl Ib 213 | b4 mov ah Ib 214 | b5 mov ch Ib 215 | b6 mov dh Ib 216 | b7 mov bh Ib 217 | b8 mov eax Iv 218 | b9 mov ecx Iv 219 | ba mov edx Iv 220 | bb mov ebx Iv 221 | bc mov esp Iv 222 | bd mov ebp Iv 223 | be mov esi Iv 224 | bf mov edi Iv 225 | c0 invalid f:2 226 | c000 rol Eb Ib 227 | c008 ror Eb Ib 228 | c010 rcl Eb Ib 229 | c018 rcr Eb Ib 230 | c020 shl Eb Ib 231 | c028 shr Eb Ib 232 | c030 sal Eb Ib 233 | c038 sar Eb Ib 234 | c1 invalid f:2 235 | c100 rol Ev Ib 236 | c108 ror Ev Ib 237 | c110 rcl Ev Ib 238 | c118 rcr Ev Ib 239 | c120 shl Ev Ib 240 | c128 shr Ev Ib 241 | c130 sal Ev Ib 242 | c138 sar Ev Ib 243 | c2 ret Iw 244 | c3 ret 245 | c4 les Gv Mp 246 | c5 lds Gv Mp 247 | c6 mov Eb Ib 248 | c7 mov Ev Iv 249 | c8 enter Iw Ib 250 | c9 leave 251 | ca retf Iw 252 | cb retf 253 | cc int 254 | cd int Ib 255 | ce into 256 | cf iret 257 | d0 invalid f:2 258 | d000 rol Eb 1 259 | d008 ror Eb 1 260 | d010 rcl Eb 1 261 | d018 rcr Eb 1 262 | d020 shl Eb 1 263 | d028 shr Eb 1 264 | d030 sal Eb 1 265 | d038 sar Eb 1 266 | d1 invalid f:2 267 | d100 rol Ev 1 268 | d108 ror Ev 1 269 | d110 rcl Ev 1 270 | d118 rcr Ev 1 271 | d120 shl Ev 1 272 | d128 shr Ev 1 273 | d130 sal Ev 1 274 | d138 sar Ev 1 275 | d2 invalid f:2 276 | d200 rol Eb cl 277 | d208 ror Eb cl 278 | d210 rcl Eb cl 279 | d218 rcr Eb cl 280 | d220 shl Eb cl 281 | d228 shr Eb cl 282 | d230 sal Eb cl 283 | d238 sar Eb cl 284 | d3 invalid f:2 285 | d300 rol Ev cl 286 | d308 ror Ev cl 287 | d310 rcl Ev cl 288 | d318 rcr Ev cl 289 | d320 shl Ev cl 290 | d328 shr Ev cl 291 | d330 sal Ev cl 292 | d338 sar Ev cl 293 | d4 aam 294 | d5 aad 295 | d6 296 | d7 xlat 297 | d8 298 | d9 299 | da 300 | db 301 | dc 302 | dd 303 | de 304 | df 305 | e0 loopne Jb 306 | e1 loope Jb 307 | e2 loop Jb 308 | e3 jcxz Jb 309 | e4 in al Ib 310 | e5 in eaxa Ib 311 | e6 out Ib al 312 | e7 out Ib eax 313 | e8 call Av 314 | e9 jnp Jv 315 | ea jnp Ap 316 | eb jmp Jb 317 | ec in al dx 318 | ed in eax dx 319 | ee out dx al 320 | ef out dx eax 321 | f0 322 | f1 323 | f2 324 | f3 325 | f4 hlt 326 | f5 cmc 327 | f6 invalid f:2 328 | f600 test Eb Ib 329 | f601 test Eb Ib 330 | f602 not Eb 331 | f603 neg Eb 332 | f604 mul Eb 333 | f605 imul Eb 334 | f606 div Eb 335 | f607 idiv Eb 336 | f7 invalid f:2 337 | f700 test Ev Iv 338 | f708 test Ev Iv 339 | f710 not Ev 340 | f718 neg Ev 341 | f720 mul Ev 342 | f728 imul Ev 343 | f730 div Ev 344 | f738 idiv Ev 345 | f8 clc 346 | f9 stc 347 | fa cli 348 | fb sti 349 | fc cld 350 | fd std 351 | fe invalid f:2 352 | fe00 inc Eb 353 | fe08 dec Eb 354 | ff invalid f:2 355 | ff00 inc Ev 356 | ff08 dec Ev 357 | ff10 call Ev 358 | ff18 callf 359 | ff20 jmp Ev 360 | ff28 jmp Ep 361 | ff30 push Ev 362 | 0f00 grp6d Ew 363 | 0f01 grp7a 364 | 0f02 lar Gw Ew 365 | 0f03 lsl Gv Ew 366 | 0f04 367 | 0f05 368 | 0f06 ctls 369 | 0f07 370 | 0f08 371 | 0f09 372 | 0f0a 373 | 0f0b 374 | 0f0c 375 | 0f0d 376 | 0f0e 377 | 0f0f 378 | 0f10 379 | 0f11 380 | 0f12 381 | 0f13 382 | 0f14 383 | 0f15 384 | 0f16 385 | 0f17 386 | 0f18 387 | 0f19 388 | 0f1a 389 | 0f1b 390 | 0f1c 391 | 0f1d 392 | 0f1e 393 | 0f1f 394 | 0f20 mov Rd Cd 395 | 0f21 mov Rd Dd 396 | 0f22 mov Cd Rd 397 | 0f23 mov Dd Rd 398 | 0f24 mov Rd Td 399 | 0f25 400 | 0f26 mov Td Rd 401 | 0f27 402 | 0f28 403 | 0f29 404 | 0f2a 405 | 0f2b 406 | 0f2c 407 | 0f2d 408 | 0f2e 409 | 0f2f 410 | 0f30 411 | 0f31 412 | 0f32 413 | 0f33 414 | 0f34 415 | 0f35 416 | 0f36 417 | 0f37 418 | 0f38 419 | 0f39 420 | 0f3a 421 | 0f3b 422 | 0f3c 423 | 0f3d 424 | 0f3e 425 | 0f3f 426 | 0f40 427 | 0f41 428 | 0f42 429 | 0f43 430 | 0f44 431 | 0f45 432 | 0f46 433 | 0f47 434 | 0f48 435 | 0f49 436 | 0f4a 437 | 0f4b 438 | 0f4c 439 | 0f4d 440 | 0f4e 441 | 0f4f 442 | 0f50 443 | 0f51 444 | 0f52 445 | 0f53 446 | 0f54 447 | 0f55 448 | 0f56 449 | 0f57 450 | 0f58 451 | 0f59 452 | 0f5a 453 | 0f5b 454 | 0f5c 455 | 0f5d 456 | 0f5e 457 | 0f5f 458 | 0f60 459 | 0f61 460 | 0f62 461 | 0f63 462 | 0f64 463 | 0f65 464 | 0f66 465 | 0f67 466 | 0f68 467 | 0f69 468 | 0f6a 469 | 0f6b 470 | 0f6c 471 | 0f6d 472 | 0f6e 473 | 0f6f 474 | 0f70 475 | 0f71 476 | 0f72 477 | 0f73 478 | 0f74 479 | 0f75 480 | 0f76 481 | 0f77 482 | 0f78 483 | 0f79 484 | 0f7a 485 | 0f7b 486 | 0f7c 487 | 0f7d 488 | 0f7e 489 | 0f7f 490 | 0f80 jo Jv 491 | 0f81 jno Jv 492 | 0f82 jb Jv 493 | 0f83 jnb Jv 494 | 0f84 jz Jv 495 | 0f85 jnz Jv 496 | 0f86 jbe Jv 497 | 0f87 jnbe Jv 498 | 0f88 js Jv 499 | 0f89 jns Jv 500 | 0f8a jp Jv 501 | 0f8b jnp Jv 502 | 0f8c jl Jv 503 | 0f8d jge Jv 504 | 0f8e jle Jv 505 | 0f8f jg Jv 506 | 0f90 seto Eb 507 | 0f91 setno Eb 508 | 0f92 setb Eb 509 | 0f93 setnb Eb 510 | 0f94 setz Eb 511 | 0f95 setnz Eb 512 | 0f96 setbe Eb 513 | 0f97 setnbe Eb 514 | 0f98 sets Eb 515 | 0f99 setns Eb 516 | 0f9a setp Eb 517 | 0f9b setnp Eb 518 | 0f9c setl Eb 519 | 0f9d setnl Eb 520 | 0f9e setle Eb 521 | 0f9f setnle Eb 522 | 0fa0 push fs 523 | 0fa1 pop fs 524 | 0fa2 525 | 0fa3 bt Ev Gv 526 | 0fa4 shld Ev Gv Ib 527 | 0fa5 shld Ev Gv cl 528 | 0fa6 529 | 0fa7 530 | 0fa8 push gs 531 | 0fa9 pop gs 532 | 0faa 533 | 0fab bts Ev Gv 534 | 0fac shrd Ev Gv Ib 535 | 0fad shrd Ev Gv cl 536 | 0fae 537 | 0faf imul Gv Ev 538 | 0fb0 539 | 0fb1 540 | 0fb2 lss Mp 541 | 0fb3 btr Ev Gv 542 | 0fb4 lfs Mp 543 | 0fb5 lgs Mp 544 | 0fb6 movzx Gv Eb 545 | 0fb7 movzx Gv Ew 546 | 0fb8 547 | 0fb9 548 | 0fba grp8d Ev Ib 549 | 0fbb btc Ev Gv 550 | 0fbc bsf Gv Ev 551 | 0fbd bsr Gv Ev 552 | 0fbe movsx Gv Eb 553 | 0fbf movsx Gv Ew 554 | 0fc0 555 | 0fc1 556 | 0fc2 557 | 0fc3 558 | 0fc4 559 | 0fc5 560 | 0fc6 561 | 0fc7 562 | 0fc8 563 | 0fc9 564 | 0fca 565 | 0fcb 566 | 0fcc 567 | 0fcd 568 | 0fce 569 | 0fcf 570 | 0fd0 571 | 0fd1 572 | 0fd2 573 | 0fd3 574 | 0fd4 575 | 0fd5 576 | 0fd6 577 | 0fd7 578 | 0fd8 579 | 0fd9 580 | 0fda 581 | 0fdb 582 | 0fdc 583 | 0fdd 584 | 0fde 585 | 0fdf 586 | 0fe0 587 | 0fe1 588 | 0fe2 589 | 0fe3 590 | 0fe4 591 | 0fe5 592 | 0fe6 593 | 0fe7 594 | 0fe8 595 | 0fe9 596 | 0fea 597 | 0feb 598 | 0fec 599 | 0fed 600 | 0fee 601 | 0fef 602 | 0ff0 603 | 0ff1 604 | 0ff2 605 | 0ff3 606 | 0ff4 607 | 0ff5 608 | 0ff6 609 | 0ff7 610 | 0ff8 611 | 0ff9 612 | 0ffa 613 | 0ffb 614 | 0ffc 615 | 0ffd 616 | 0ffe 617 | 0fff 618 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /src/arch/x86/x86asm.c: -------------------------------------------------------------------------------- 1 | #include "x86asm.h" 2 | 3 | /*Loop through possible instructions and remove the ones with 4 | * mismatches addressing modes. Then attempt to encode using the 5 | * instructions encoding. Print errors if any. Resolve flags backwards 6 | * from leaf in trie node. Returns the shortest encoding 7 | * */ 8 | u8 *x86_assemble(char **tokens, int num_tokens, int mode, struct hash_entry *instr_head, int *len) 9 | { 10 | struct hash_entry *cur = instr_head; 11 | u8 ** enc = NULL, * barr = NULL; 12 | int * nsize = 0, nenc = 0, narr = 0; 13 | while (cur && !strcmp(cur->mnemonic, instr_head->mnemonic)) { 14 | struct trie_node *node = cur->value; 15 | struct x86_instr_entry *e = node->value; 16 | /*If token array matches the classification of the current instruction node then encode it*/ 17 | if (x86_classify_operand(tokens+1, num_tokens-1, e->operand, e->num_op)) { 18 | u8 *barr=NULL; 19 | int narr=0; 20 | barr=x86_encode(tokens+1, num_tokens-1, mode, node, e, &narr); 21 | if (narr) { 22 | nenc++; 23 | if (!enc) enc = malloc(sizeof(u8*)); 24 | else enc = realloc(enc, sizeof(u8*)*nenc); 25 | if (!nsize) nsize = malloc(sizeof(int)); 26 | else nsize = realloc(nsize, sizeof(int)*nenc); 27 | enc[nenc-1] = barr, nsize[nenc-1] = narr; 28 | } else { 29 | free(barr); 30 | } 31 | } 32 | cur = cur->next; 33 | } 34 | /*Return the shortest encoding*/ 35 | if (!enc||!nenc) { 36 | printf("Error encoding "); 37 | for (int i = 0; i < num_tokens; i++) 38 | printf("%s%s", tokens[i], (i+1)==num_tokens?"\n":" "); 39 | return NULL; 40 | } 41 | barr = enc[0], narr = nsize[0]; 42 | for (int i = 0; i < nenc; i++) { 43 | if (nsize[i] < narr) 44 | barr = enc[i], narr = nsize[i]; 45 | } 46 | for (int i = 0; i < nenc; i++) { 47 | if (enc[i] != barr) 48 | free(enc[i]); 49 | } 50 | free(enc); 51 | free(nsize); 52 | *len = narr; 53 | return barr; 54 | } 55 | 56 | /*Loops through operands and checks if the operands are using the same addressing mode as the instruction*/ 57 | int x86_classify_operand(char **tokens, int num_tokens, char operands[][MAX_OPER_LEN], int num_operands) 58 | { 59 | if (!num_operands && !num_tokens) return 1; 60 | int idx = 0, len = 0, operand = 0; 61 | while ((idx=x86_next_operand(tokens, num_tokens, operand, &len)) != -1) { 62 | if (operand >= num_operands) return 0; 63 | if (!x86_match_operand(tokens+idx, len, operands[operand])) 64 | return 0; 65 | operand++; 66 | } 67 | return operand == num_operands; 68 | } 69 | 70 | /*Returns the token idx for a operand (delimiters are commas)*/ 71 | int x86_next_operand(char ** tokens, int nt, int op, int *len) 72 | { 73 | int idx = 0, lidx=0; 74 | int opr = 0; 75 | while (idx <= nt) { 76 | if((idx= 'A' && op_type[0] <= 'Z') { 94 | int idx = 0; 95 | int size = op_type[1]; 96 | int nsize = 0; 97 | uint64_t num= 0; 98 | char ot = op_type[0]; 99 | if (ot=='G') { /*Register*/ 100 | idx = get_register_index(tokens[0]); 101 | if (idx != -1 && X86_SIZE_COMPAT(size, REG_SIZE_IDX(idx)) && num_tokens == 1) 102 | return 1; 103 | } else if (ot == 'E' || ot == 'M' || ot == 'O') {/*Modrm or mem-only modrm*/ 104 | if (x86_valid_modrm(tokens, num_tokens, op_type[1])) 105 | return 1; 106 | } else if (ot == 'I' || ot == 'J' || ot == 'A') {/*Immediate, relative, or call relative*/ 107 | if (**tokens == '+') tokens++; 108 | if (**tokens == '-') tokens++; 109 | if (!isdigit(**tokens)) return 0; 110 | num = strtol(*tokens, NULL, 0); 111 | if (num < MAX(8)) nsize = 1; 112 | else if (num < MAX(16)) nsize = 2; 113 | else if (num < MAX(32)) nsize = 3; 114 | else nsize = 4; 115 | if (X86_SIZE_MIN(op_type[1], nsize)) return 1; 116 | } 117 | } else { 118 | if (!strcmp(tokens[0], op_type)) {/*String hardcoded operands*/ 119 | return 1; 120 | } 121 | int tridx = get_register_index(tokens[0]); 122 | int ridx = get_register_index(op_type); 123 | if (ridx != -1 && tridx != -1) { 124 | if (REG_BIN_IDX(tridx)==REG_BIN_IDX(ridx)) return 1; 125 | if (REG_BIN_IDX(tridx)>7 && REG_BIN_IDX(ridx)==(REG_BIN_IDX(tridx)-8)) return 1; 126 | } 127 | } 128 | return 0; 129 | } 130 | 131 | /*Doesn't check for illegal combinations but just checks that in format of size [ addr ]*/ 132 | int x86_valid_modrm(char **tokens, int num_tokens, int size) 133 | { 134 | if (!tokens || !num_tokens) return 0; 135 | 136 | int ridx = get_register_index(tokens[0]); 137 | if (ridx != -1 && X86_SIZE_COMPAT(size, REG_SIZE_IDX(ridx)) && num_tokens == 1) 138 | return 1; 139 | if (X86_SIZE_COMPAT(size, x86_size(*tokens))) 140 | return 1; 141 | return 0; 142 | } 143 | 144 | /*Returns operand size given the operand width prefix*/ 145 | int x86_size(char *tok) 146 | { 147 | if (!tok) return 0; 148 | if (!strcmp(tok,"byte")) return 1; 149 | if (!strcmp(tok, "word")) return 2; 150 | if (!strcmp(tok, "dword")) return 3; 151 | if (!strcmp(tok, "qword")) return 4; 152 | return 0; 153 | } 154 | 155 | /*Encodes the operands then walks backwards from the trie leaf to the root and resolves all flags*/ 156 | u8 *x86_encode(char**tokens,int num_tokens,int mode,struct trie_node *n,struct x86_instr_entry *e,int*alen) 157 | { 158 | *alen = 0; 159 | /*Figure out the default size, so that the encoder can set override bits*/ 160 | int os = DEF_OPER_SIZE(mode), as = DEF_ADDR_SIZE(mode); 161 | u8 *barr = NULL; 162 | u8 flags = 0; 163 | int blen = 0, idx = 0, len = 0; 164 | for (int i = 0; i < e->num_op; i++) { 165 | int cos = e->operand[i][1]; 166 | cos = (cos=='v'||cos=='d')&&(flags&1)?'w':cos; 167 | if (!isupper(*e->operand[i])) { 168 | idx = x86_next_operand(tokens, num_tokens, i, &len); 169 | if (!strcmp(e->operand[i], tokens[idx])) continue; 170 | int r1 = get_register_index(e->operand[i]), r2 = get_register_index(tokens[idx]); 171 | if (REG_BIN_IDX(r1)==REG_BIN_IDX(r2)&®_SIZE_IDX(r1)==3&®_SIZE_IDX(r2)==2) 172 | SET_FLAG(flags, OPER_SIZE_OVERRIDE); 173 | else if (REG_BIN_IDX(r2)>7&&(REG_BIN_IDX(r2)-8)==REG_BIN_IDX(r1)&&(REG_SIZE_IDX(r1)==REG_SIZE_IDX(r2))) 174 | SET_FLAG(flags, REX_B); 175 | else return barr; 176 | continue; 177 | } 178 | idx = x86_next_operand(tokens, num_tokens, i, &len); 179 | if (*e->operand[i]=='E' || *e->operand[i]=='M') { 180 | if (!x86_encode_modrm(tokens+idx, len, &barr, &blen, os, as, &flags)) 181 | return 0; 182 | } else if (*e->operand[i] == 'O') { 183 | char *base=NULL,*index=NULL; 184 | int s=0, ds = 0; 185 | uint64_t disp = 0; 186 | int sz=x86_get_indir(tokens, num_tokens, &base,&index,&s,&disp,&ds); 187 | (void)sz; 188 | if (base || index || s || (as!=ds)) return barr; 189 | for (int i = 0; i < (ds==4?8:4); i++) 190 | x86_add_byte(&barr, &blen, ((u8*)&disp)[i]); 191 | } else if (*e->operand[i] == 'G') { 192 | int reg = get_register_index(tokens[idx]); 193 | if (reg == -1) continue; 194 | if (REG_SIZE_IDX(reg) == 4 && os == 3) 195 | SET_FLAG(flags, REX_W); 196 | if (REG_SIZE_IDX(reg) == 2 && os == 3) 197 | SET_FLAG(flags, OPER_SIZE_OVERRIDE); 198 | reg = REG_BIN_IDX(reg); 199 | if (reg>7) { 200 | reg -= 8; 201 | SET_FLAG(flags, REX_R); 202 | } 203 | if (blen) barr[0] |= (reg<<3); 204 | else x86_add_byte(&barr, &blen, reg<<3); 205 | } else if (*e->operand[i]=='I'||*e->operand[i]=='J'||*e->operand[i]=='A') { 206 | int neg = 1; 207 | if (*tokens[idx] == '-') neg=-1, idx++; 208 | uint64_t val = neg*strtol(tokens[idx], NULL, 0); 209 | if (*e->operand[i]=='J'||*e->operand[i]=='A') 210 | val -= n->dist + X86_SIZE_IMM(cos); 211 | for (int i = 0; i < X86_SIZE_IMM(cos); i++) 212 | x86_add_byte(&barr, &blen, ((u8*)&val)[i]); 213 | } 214 | } 215 | int ops = 0; 216 | struct trie_node *p = n; 217 | u8 tflags = 0; 218 | for (int i = 0; p && (p->value || p->parent); p=p->parent, i++) { 219 | if (p->parent) tflags = p->parent->flags; 220 | else tflags = 0; 221 | if (!tflags) { 222 | x86_add_pbyte(&barr, &blen, p->key); 223 | ops++; 224 | } else if (CHECK_FLAG(tflags, REG_EXT_FLAG)) { 225 | if (blen > ops) barr[ops] |= p->key; 226 | } 227 | } 228 | /*If rex prefix*/ 229 | if ((flags>>2)) { 230 | if (mode != MODE_X64) return barr; 231 | x86_add_pbyte(&barr, &blen, 0x40 + (flags>>2)); 232 | } if ((flags&2)) { 233 | x86_add_pbyte(&barr, &blen, 0x67); 234 | } if ((flags&1)) { 235 | x86_add_pbyte(&barr, &blen, 0x66); 236 | } 237 | *alen = blen; 238 | return barr; 239 | } 240 | 241 | /*Encodes a operand in the mod/rm addressing mode, using sib if necessary*/ 242 | int x86_encode_modrm(char **tokens, int num_tokens, u8 **barr, int *blen, int os, int as, u8 * flags) 243 | { 244 | u8 modrm=0; 245 | int reg; 246 | if ((reg=get_register_index(*tokens)) != -1) {/*Direct Encoding: Mod=11*/ 247 | modrm |= (3<<6); 248 | if (REG_SIZE_IDX(reg) == 4 && os == 3) 249 | SET_FLAG(*flags, REX_W); 250 | if (REG_SIZE_IDX(reg) == 2 && os == 3) 251 | SET_FLAG(*flags, OPER_SIZE_OVERRIDE); 252 | reg = REG_BIN_IDX(reg); 253 | if (reg > 7) { 254 | reg -= 8; 255 | SET_FLAG(*flags, REX_B); 256 | } 257 | modrm |= reg; 258 | if (!(*blen)) x86_add_byte(barr, blen, modrm); 259 | else (*barr)[0] |= modrm; 260 | } else {/*Indirect encoding: mod=0,01,10*/ 261 | char *base=NULL,*index=NULL; 262 | int s=0, ds = 0; 263 | uint64_t disp = 0; 264 | int sz=x86_get_indir(tokens, num_tokens, &base,&index,&s,&disp,&ds); 265 | if (sz == 4 && os == 3) { 266 | SET_FLAG(*flags, REX_W); 267 | } 268 | u8 idxr = 0, bsr = 0; 269 | int idxs = 0, bss = 0; 270 | if (index) { 271 | idxr = REG_BIN_IDX(get_register_index(index)); 272 | idxs = REG_SIZE_IDX(get_register_index(index)); 273 | if (idxr > 7) { 274 | idxr -= 8; 275 | SET_FLAG(*flags, REX_X); 276 | } 277 | } 278 | if (base) { 279 | bsr = REG_BIN_IDX(get_register_index(base)); 280 | bss = REG_SIZE_IDX(get_register_index(base)); 281 | if (bsr > 7) { 282 | bsr -= 8; 283 | SET_FLAG(*flags, REX_B); 284 | } 285 | } 286 | if (base && index && bss != idxs) { 287 | return 0; 288 | } 289 | if (bss == (as-1) || idxs == (as-1)) { 290 | SET_FLAG(*flags, ADDR_SIZE_OVERRIDE); 291 | } 292 | if (ds>1) ds = 4; 293 | if (s || index) {/*If scale or index, then using SIB encoding*/ 294 | modrm |= 4; 295 | if (ds==1) modrm |= 1<<6; 296 | else if (ds==4) modrm |= 2<<6; 297 | u8 sib = 0; 298 | if (s) sib |= X64_SCALE(s)<<6; 299 | if (index) sib |= idxr<<3; 300 | else sib |= 4<<3; 301 | if (base) { 302 | sib |= bsr; 303 | } else { 304 | sib |= 5; 305 | if (!ds) disp=0,ds=4; 306 | else modrm ^= modrm&0xc0; 307 | } 308 | if (!(*blen)) x86_add_byte(barr, blen, modrm); 309 | else (*barr)[0] |= modrm; 310 | x86_add_byte(barr, blen, sib); 311 | } else { 312 | if (base && ds==1) modrm |= 1<<6; 313 | else if (base && ds > 1) modrm |= 2 << 6; 314 | if ((!base && ds) || (base && !strcmp(base, "rip"))) { 315 | ds = 4; 316 | modrm |= 5; 317 | } else if (base) modrm |= bsr; 318 | if (!(*blen)) x86_add_byte(barr, blen, modrm); 319 | else (*barr)[0] |= modrm; 320 | } 321 | for (int i = 0; i < ds; i++) 322 | x86_add_byte(barr, blen, ((u8*)&disp)[i]); 323 | } 324 | return 1; 325 | } 326 | 327 | /*Parses an Indirect Memory string and sets the base, index, scale, displacement, and displacement size*/ 328 | int x86_get_indir(char **tokens, int nt, char **b, char **i, int*s, uint64_t*d, int*ds) 329 | { 330 | *b=NULL,*i=NULL,*s=0,*d=0,*ds=0; 331 | if (!nt||!tokens) return 0; 332 | int idx = 0, io=-1; 333 | char *iter=tokens[0],*l=NULL; 334 | int size=0, sz = 0, riter = 0; 335 | while (idx < nt && (l=iter) && (iter=tokens[idx++])) { 336 | if ((size=x86_size(iter)) && (sz=size)) continue; 337 | if (*iter=='[') { 338 | io=0; 339 | continue; 340 | } 341 | if (*iter==']') break; 342 | 343 | riter = get_register_index(iter); 344 | if (io==0&&riter!=-1) io++, *b = iter; 345 | if ((*l=='+') &&io==1&&riter!=-1) *i = iter; 346 | if ((*l=='*')&&io>0&&riter==-1) io++, *s = strtol(iter, NULL, 0); 347 | if (((*l=='+'||*l=='-')&&riter==-1)||(io==0&&riter==-1)) { 348 | uint64_t num = strtol(iter, NULL, 0); 349 | int nsize = 0; 350 | if (num < MAX(8)) nsize = 1; 351 | else if (num < MAX(16)) nsize = 2; 352 | else if (num < MAX(32)) nsize = 3; 353 | else nsize = 4; 354 | num = *l=='-'?-num:num; 355 | *d = num, *ds = nsize; 356 | io++; 357 | } 358 | } 359 | return sz; 360 | } 361 | 362 | /*Allocates and appends a byte*/ 363 | void x86_add_byte(u8 **barr, int *len, u8 b) 364 | { 365 | int l = *len; 366 | l++; 367 | u8 *arr = *barr; 368 | if (!arr) arr = malloc(l); 369 | else arr = realloc(arr, l); 370 | arr[l-1] = b; 371 | *len = l; 372 | *barr = arr; 373 | } 374 | 375 | /*Allocates and adds a byte prefix*/ 376 | void x86_add_pbyte(u8 **barr, int *len, u8 b) 377 | { 378 | int l = *len; 379 | l++; 380 | u8 *arr = *barr; 381 | if (!arr) arr = malloc(l); 382 | else arr = realloc(arr, l); 383 | arr[l-1] = 0; 384 | memmove(arr+1, arr, l-1); 385 | arr[0] = b; 386 | *len = l; 387 | *barr = arr; 388 | } 389 | -------------------------------------------------------------------------------- /src/arch/x86/x64.ins: -------------------------------------------------------------------------------- 1 | 00 add Eb Gb 2 | 01 add Ev Gv 3 | 02 add Gb Eb 4 | 03 add Gv Ev 5 | 04 add al Ib 6 | 05 add rax Id 7 | 06 invalid 8 | 07 invalid 9 | 08 or Eb Gb 10 | 09 or Ev Gv 11 | 0a or Gb Eb 12 | 0b or Gv Ev 13 | 0c or al Ib 14 | 0d or rax Id 15 | 0e invalid 16 | 10 adc Eb Gb 17 | 11 adc Ev Gv 18 | 12 adc Gb Eb 19 | 13 adc Gv Ev 20 | 14 adc al Ib 21 | 15 adc rax Id 22 | 16 invalid 23 | 17 invalid 24 | 18 sbb Eb Gb 25 | 19 sbb Ev Gv 26 | 1a sbb Gb Eb 27 | 1b sbb Gv Ev 28 | 1c sbb al Ib 29 | 1d sbb rax Id 30 | 1e invalid 31 | 1f invalid 32 | 20 and Eb Gb 33 | 21 and Ev Gv 34 | 22 and Gb Eb 35 | 23 and Gv Ev 36 | 24 and al Ib 37 | 25 and eax Id 38 | 26 null 39 | 27 invalid 40 | 28 sub Eb Gb 41 | 29 sub Ev Gv 42 | 2a sub Gb Eb 43 | 2b sub Gv Ev 44 | 2c sub al Ib 45 | 2d sub rax Id 46 | 2e null 47 | 2f invalid 48 | 30 xor Eb Gb 49 | 31 xor Ev Gv 50 | 32 xor Gb Eb 51 | 33 xor Gv Ev 52 | 34 xor al Ib 53 | 35 xor eax Id 54 | 36 null 55 | 37 invalid 56 | 38 cmp Eb Gb 57 | 39 cmp Ev Gv 58 | 3a cmp Gb Eb 59 | 3b cmp Gv Ev 60 | 3c cmp al Ib 61 | 3d cmp rax Id 62 | 3e null 63 | 3f invalid 64 | 40 rex f:4 65 | 41 rex.b f:4 66 | 42 rex.x f:4 67 | 43 rex.xb f:4 68 | 44 rex.r f:4 69 | 45 rex.rb f:4 70 | 46 rex.rx f:4 71 | 47 rex.rxb f:4 72 | 48 rex.w f:4 73 | 49 rex.wb f:4 74 | 4a rex.wx f:4 75 | 4b rex.wxb f:4 76 | 4c rex.wr f:4 77 | 4d rex.wrb f:4 78 | 4e rex.wrx f:4 79 | 4f rex.wrxb f:4 80 | 50 push rax 81 | 51 push rcx 82 | 52 push rdx 83 | 53 push rbx 84 | 54 push rsp 85 | 55 push rbp 86 | 56 push rsi 87 | 57 push rdi 88 | 58 pop rax 89 | 59 pop rcx 90 | 5a pop rdx 91 | 5b pop rbx 92 | 5c pop rsp 93 | 5d pop rbp 94 | 5e pop rsi 95 | 5f pop rdi 96 | 60 invalid 97 | 61 invalid 98 | 62 invalid 99 | 63 movsxd Gv Ed 100 | 64 fs f:4 f:8 101 | 65 gs f:4 f:8 102 | 66 ov f:4 103 | 67 av f:4 104 | 68 push Id 105 | 69 imul Gv Ev Id 106 | 6a push Ib 107 | 6b imul Gv Ev Ib 108 | 6c insb Yb dx 109 | 6d insw Yb dx 110 | 6e outsb Dx Xb 111 | 6f outsw dx Xv 112 | 70 jo Jb 113 | 71 jno Jb 114 | 72 jb Jb 115 | 73 jnb Jb 116 | 74 jz Jb 117 | 75 jnz Jb 118 | 76 jbe Jb 119 | 77 jnbe Jb 120 | 78 js Jb 121 | 79 jns Jb 122 | 7a jp Jb 123 | 7b jnp Jb 124 | 7c jl Jb 125 | 7d jge Jb 126 | 7e jle Jb 127 | 7f jg Jb 128 | 80 invalid f:2 129 | 8000 and Eb Ib 130 | 8008 or Eb Ib 131 | 8010 adc Eb Ib 132 | 8018 sbb Eb Ib 133 | 8020 and Eb Ib 134 | 8028 sub Eb Ib 135 | 8030 xor Eb Ib 136 | 8038 cmp Eb Ib 137 | 81 invalid f:2 138 | 8100 add Eb Id 139 | 8108 or Eb Id 140 | 8110 adc Ev Id 141 | 8118 sbb Ev Id 142 | 8120 and Ev Id 143 | 8128 sub Ev Id 144 | 8130 xor Ev Id 145 | 8138 cmp Ev Id 146 | 82 invalid 147 | 83 invalid f:2 148 | 8300 add Ev Ib 149 | 8308 or Ev Ib 150 | 8310 adc Ev Ib 151 | 8318 sbb Ev Ib 152 | 8320 and Ev Ib 153 | 8328 sub Ev Ib 154 | 8330 xor Ev Ib 155 | 8338 cmp Ev Ib 156 | 84 test Eb Gb 157 | 85 test Ev Gv 158 | 86 xchg Eb Gb 159 | 87 xchg Ev Gv 160 | 88 mov Eb Gb 161 | 89 mov Ev Gv 162 | 8a mov Gb Eb 163 | 8b mov Gv Ev 164 | 8c mov Ew Sw 165 | 8d lea Gv M 166 | 8e mov Sw Ew 167 | 8f pop Ev 168 | 90 nop 169 | 91 xchg rcx rax 170 | 92 xchg rdx rax 171 | 93 xchg rbx rax 172 | 94 xchg rsp rax 173 | 95 xchg rbp rax 174 | 96 xchg rsi rax 175 | 97 xchg rdi rax 176 | 98 cbw 177 | 99 cwd 178 | 9a invalid 179 | 9b wait 180 | 9c pushf Fv 181 | 9d popf Fv 182 | 9e sahf 183 | 9f lahf 184 | a0 mov al Ob 185 | a1 mov eax Ov 186 | a2 mov Ob al 187 | a3 mov Ov eax 188 | a4 movsb Xb Yb 189 | a5 movsw Xv Yv 190 | a6 cmpsb Xb Yb 191 | a7 cmpsw Xv Yv 192 | a8 test al Ib 193 | a9 test eax Id 194 | aa stosb Yb al 195 | ab stosw Yv eax 196 | ac lodsb al Xb 197 | ad lodsw eax Xv 198 | ae scasb al Xb 199 | af scasw eax Xv 200 | b0 mov al Ib 201 | b1 mov cl Ib 202 | b2 mov dl Ib 203 | b3 mov bl Ib 204 | b4 mov ah Ib 205 | b5 mov ch Ib 206 | b6 mov dh Ib 207 | b7 mov bh Ib 208 | b8 mov eax Iv 209 | b9 mov ecx Iv 210 | ba mov edx Iv 211 | bb mov ebx Iv 212 | bc mov esp Iv 213 | bd mov ebp Iv 214 | be mov esi Iv 215 | bf mov edi Iv 216 | c0 invalid f:2 217 | c000 rol Eb Ib 218 | c008 ror Eb Ib 219 | c010 rcl Eb Ib 220 | c018 rcr Eb Ib 221 | c020 shl Eb Ib 222 | c028 shr Eb Ib 223 | c030 sal Eb Ib 224 | c038 sar Eb Ib 225 | c1 invalid f:2 226 | c100 rol Ev Ib 227 | c108 ror Ev Ib 228 | c110 rcl Ev Ib 229 | c118 rcr Ev Ib 230 | c120 shl Ev Ib 231 | c128 shr Ev Ib 232 | c130 sal Ev Ib 233 | c138 sar Ev Ib 234 | c2 ret Iw 235 | c3 ret 236 | c4 invalid 237 | c5 invalid 238 | c6 invext f:2 239 | c600 mov Eb Ib 240 | c7 invalid f:2 241 | c700 mov Ev Id 242 | c8 enter Iw Ib 243 | c9 leave 244 | ca retf Iw 245 | cb retf 246 | cc int 247 | cd int Ib 248 | ce into 249 | cf iret 250 | d0 invalid f:2 251 | d000 rol Eb 1 252 | d008 ror Eb 1 253 | d010 rcl Eb 1 254 | d018 rcr Eb 1 255 | d020 shl Eb 1 256 | d028 shr Eb 1 257 | d030 sal Eb 1 258 | d038 sar Eb 1 259 | d1 invalid f:2 260 | d100 rol Ev 1 261 | d108 ror Ev 1 262 | d110 rcl Ev 1 263 | d118 rcr Ev 1 264 | d120 shl Ev 1 265 | d128 shr Ev 1 266 | d130 sal Ev 1 267 | d138 sar Ev 1 268 | d2 invalid f:2 269 | d200 rol Eb cl 270 | d208 ror Eb cl 271 | d210 rcl Eb cl 272 | d218 rcr Eb cl 273 | d220 shl Eb cl 274 | d228 shr Eb cl 275 | d230 sal Eb cl 276 | d238 sar Eb cl 277 | d3 invalid f:2 278 | d300 rol Ev cl 279 | d308 ror Ev cl 280 | d310 rcl Ev cl 281 | d318 rcr Ev cl 282 | d320 shl Ev cl 283 | d328 shr Ev cl 284 | d330 sal Ev cl 285 | d338 sar Ev cl 286 | d4 invalid 287 | d5 invalid 288 | d6 invalid 289 | d7 xlat 290 | d8 invalid f:2 291 | d800 fadd M f:16 292 | d80003 fadd B 293 | d808 fmul M f:16 294 | d80803 fmul B 295 | d810 fcom B 296 | d8d1 fcom st1 297 | d818 fcomp B 298 | d8d9 fcomp st1 299 | d820 fsub M f:16 300 | d82003 fsub B 301 | d828 fsubr M f:16 302 | d82803 fsubr B 303 | d830 fdiv M f:16 304 | d83003 fdiv B 305 | d838 fdivr M f:16 306 | d83803 fdivr B f:16 307 | d9 invalid f:2 308 | d900 fld B 309 | d908 fxch B 310 | d9c9 fxch st1 311 | d910 fst M 312 | d9d0 fnop 313 | d918 fstp M 314 | d91803 fstp1 B 315 | d920 fldenv M 316 | d9e0 fchs 317 | d9e1 fabs 318 | d9e4 ftst 319 | d9e5 fxam 320 | d928 fldcw M 321 | d9e8 fld1 322 | d9e9 fld2t 323 | d9ea fldl2e 324 | d9eb fldpi 325 | d9ec fldlg2 326 | d9ed fldln2 327 | d9ee fldz 328 | d930 fnstenv M 329 | 9bd930 fstenv M 330 | d9f0 f2xm1 331 | d9f1 fyl2x st1 332 | d9f2 fptan 333 | d9f3 fpatan st1 334 | d9f4 fxtract 335 | d9f5 fprem1 st1 336 | d9f6 fdecstp 337 | d9f7 fincstp 338 | d938 fnstcw M 339 | 9bd938 fstcw M 340 | d9f8 fprem st1 341 | d9f9 fyl2xp1 st1 342 | d9fa fsqrt 343 | d9fb fsincos 344 | d9fc frndint 345 | d9fd fscale 346 | d9fe fsin 347 | d9ff fcos 348 | da invalid f:2 349 | da00 fiadd M f:16 350 | da0003 fcmovb B 351 | da08 fimul M f:16 352 | da0803 fcmove B 353 | da10 ficom M f:16 354 | da1003 fcmovbe B 355 | da18 ficomp M f:16 356 | da1803 fcmovu B 357 | da20 fisub M 358 | da28 fisubr M 359 | dae8 fucompp st1 360 | da30 fidiv M 361 | da38 fidivr M 362 | db invalid f:2 363 | db00 fild M f:16 364 | db0003 fcmovnb B 365 | db08 fisttp M f:16 366 | db0803 fcmovne 367 | db10 fistp M 368 | db1003 fcmovnu B 369 | dbe0 fneni 370 | 9bdbe0 feni 371 | dbe1 fndisi 372 | 9bdbe1 dsisi 373 | dbe2 fnclex 374 | 9bdbe2 fclex 375 | dbe3 fninit 376 | 9bdbe3 finit 377 | dbe4 fnsetpm 378 | 9bdbe4 fsetpm 379 | db20 fucomi B 380 | db28 fcomi B 381 | db30 fstp M 382 | dc invalid f:2 383 | dc00 fadd M f:16 384 | dc0003 fadd B 385 | dc08 fmul M f:16 386 | dc0803 fmul B 387 | dc10 fcom M f:16 388 | dc1003 fcom2 B 389 | dc18 fcomp M f:16 390 | dc1803 fcomp3 B 391 | dc20 fsub M f:16 392 | dc2003 fsubr B 393 | dc28 fsubr M f:16 394 | dc2803 fsub B 395 | dc30 fdiv M f:16 396 | dc3003 fdivr B 397 | dc38 fdivr N fL16 398 | dc3803 fdiv B 399 | dd invalid f:2 400 | dd00 fld M f:16 401 | dd0003 ffree B 402 | dd08 fisttp M f:16 403 | dd0803 fxch4 B 404 | dd10 fst M f:16 405 | dd1003 fstp B 406 | dd18 fstp M f:16 407 | dd1803 fstp B 408 | dd20 frstor 409 | dde1 fucom st1 410 | dd28 fucomp B 411 | dde9 fucomp st1 412 | dd30 fnsave M 413 | 9bdd30 fsave M 414 | dd38 fnstsw M 415 | 9bdd38 fstsw M 416 | de invalid f:2 417 | de00 fiadd M f:16 418 | de0003 faddp B 419 | dec1 faddp st1 420 | de08 fimul M f:16 421 | de0803 fmulp B 422 | dec9 fmulp st1 423 | de10 ficom M f:16 424 | de1003 fcomp5 B 425 | de18 ficomp M f:16 426 | de1803 ficompp st1 427 | de20 fisub M f:16 428 | de2003 fsubrp B 429 | dee1 fsubrp st1 430 | de28 fisubr M f:16 431 | de2803 fsubp B 432 | dee9 fsubp st1 433 | de30 fidiv M f:16 434 | de3003 fdivrp B 435 | def1 fdivp st1 436 | de38 fdivr M f:16 437 | de3803 fdivp B 438 | def9 fdivp st1 439 | df invalid f:2 440 | df00 fild M f:16 441 | df0003 ffreep B 442 | df08 fisttp M f:16 443 | df0803 fxch7 B 444 | df10 fist M f:16 445 | df1003 fstp8 B 446 | df18 fistp M f:16 447 | df1803 fstp9 B 448 | df20 fbld M 449 | dfe0 fnstsw ax 450 | 9bdfe0 fstsw ax 451 | df28 fild M 452 | df30 fbstp M 453 | df38 fistp 454 | e0 loopne Jb 455 | e1 loope Jb 456 | e2 loop Jb 457 | e3 jcxz Jb 458 | e4 in al Ib 459 | e5 in eax Ib 460 | e6 out Ib al 461 | e7 out Ib eax 462 | e8 call Av 463 | e9 jmp Jv 464 | ea invalid 465 | eb jmp Jb 466 | ec in al dx 467 | ed in eax dx 468 | ee out dx al 469 | ef out dx eax 470 | f0 lock f:4 471 | f1 icebp 472 | f2 repne f:4 473 | f3 repe f:4 474 | f4 hlt 475 | f5 cmc 476 | f6 invalid f:2 477 | f600 test Eb Ib 478 | f601 test Eb Ib 479 | f602 not Eb 480 | f603 neg Eb 481 | f604 mul Eb 482 | f605 imul Eb 483 | f606 div Eb 484 | f607 idiv Eb 485 | f7 invalid f:2 486 | f700 test Ev Iv 487 | f708 test Ev Iv 488 | f710 not Ev 489 | f718 neg Ev 490 | f720 mul Ev 491 | f728 imul Ev 492 | f730 div Ev 493 | f738 idiv Ev 494 | f8 clc 495 | f9 stc 496 | fa cli 497 | fb sti 498 | fc cld 499 | fd std 500 | fe invalid f:2 501 | fe00 inc Eb 502 | fe08 dec Eb 503 | ff invalid f:2 504 | ff00 inc Ev 505 | ff08 dec Ev 506 | ff10 call Ev 507 | ff18 callf 508 | ff20 jmp Eq 509 | ff28 jmp Ep 510 | ff30 push Eq 511 | 0f00 invalid f:2 512 | 0f0000 sldr Ew 513 | 0f0008 str Ew 514 | 0f0010 lldt Ew 515 | 0f0018 ltr Ew 516 | 0f0020 verr Ew 517 | 0f0028 verw Ew 518 | 0f01 invalid f:2 519 | 0f0100 sgdt Ms 520 | 0f0108 sidt Ms 521 | 0f0110 ldgt Ms 522 | 0f0118 lidt Ms 523 | 0f0120 smsw Ew 524 | 0f0128 lmsw Ew 525 | 0f02 lar Gw Ew 526 | 0f03 lsl Gv Ew 527 | 0f04 invalid 528 | 0f05 syscall 529 | 0f06 ctls 530 | 0f07 sysret //operands 531 | 0f08 invd 532 | 0f09 wbinvd 533 | 0f0b invalid 534 | 0f0d nop Ev 535 | 0f10 movups Vd Wd 536 | f30f10 movss Vq Wq 537 | 660f10 movupd Vq Wq 538 | f20f10 movsd Vq Wq 539 | 0f11 movups Wq Vq 540 | f30f11 movss Wq Vq 541 | 660f11 movupd Wq Vq 542 | f20f11 movsd Wq Vq 543 | 0f12 544 | 0f13 545 | 0f14 546 | 0f15 547 | 0f16 548 | 0f17 549 | 0f18 550 | 0f19 551 | 0f1a 552 | 0f1b 553 | 0f1c 554 | 0f1d 555 | 0f1e 556 | 0f1f grp9 f:2 557 | 0f1f00 nop Ev 558 | 0f1f08 nop Ev 559 | 0f1f10 nop Ev 560 | 0f1f18 nop Ev 561 | 0f1f20 nop Ev 562 | 0f1f28 nop Ev 563 | 0f1f30 nop Ev 564 | 0f1f38 nop Ev 565 | 0f20 mov Rd Cd 566 | 0f21 mov Rd Dd 567 | 0f22 mov Cd Rd 568 | 0f23 mov Dd Rd 569 | 0f24 mov Rd Td 570 | 0f25 571 | 0f26 mov Td Rd 572 | 0f27 573 | 0f28 574 | 0f29 575 | 0f2a cvt 576 | 0f2b 577 | 0f2c cvtt 578 | 0f2d 579 | 0f2e 580 | 0f2f 581 | 0f30 582 | 0f31 583 | 0f32 584 | 0f33 585 | 0f34 586 | 0f35 587 | 0f36 588 | 0f37 589 | 0f38 590 | 0f39 591 | 0f3a 592 | 0f3b 593 | 0f3c 594 | 0f3d 595 | 0f3e 596 | 0f3f 597 | 0f40 cmovo Gv Ev 598 | 0f41 cmovno Gv Ev 599 | 0f42 cmovb Gv Ev 600 | 0f43 cmovnb Gv Ev 601 | 0f44 cmovz Gv Ev 602 | 0f45 cmovnz Gv Ev 603 | 0f46 cmovbe Gv Ev 604 | 0f47 cmova Gv Ev 605 | 0f48 cmovs Gv Ev 606 | 0f49 cmovns Gv Ev 607 | 0f4a cmovp Gv Ev 608 | 0f4b cmovnp Gv Ev 609 | 0f4c cmovl Gv Ev 610 | 0f4d cmovge Gv Ev 611 | 0f4e cmovng Gv Ev 612 | 0f4f cmovg Gv Ev 613 | 0f50 614 | 0f51 615 | 0f52 616 | 0f53 617 | 0f54 618 | 0f55 619 | 0f56 620 | 0f57 621 | 0f58 addps Vq Wq 622 | f30f58 addss Vq Wq 623 | 660f58 addpd Vq Wq 624 | f20f58 addsd Vq Wq 625 | 0f59 mulps Vd Wd 626 | f30f59 mulss Vq Wq 627 | 660f59 mulpd Vq Wq 628 | f20f59 mulsd Vq Wq 629 | 0f5a cvtps2pd Vd Wd 630 | 660f5a cvtpd2ps Vq Wq 631 | f30f5a cvtss2sd Vq Wq 632 | f20f5a cvtsd2ss Vq Wq 633 | 0f5b 634 | 0f5c 635 | 0f5d 636 | 0f5e divps Vq Wq 637 | f30f5e divss Vq Wq 638 | 660f5e divpd Vq Wq 639 | f20f5e divsd Vq Wq 640 | 0f5f 641 | 0f60 642 | 0f61 643 | 0f62 644 | 0f63 645 | 0f64 646 | 0f65 647 | 0f66 648 | 0f67 649 | 0f68 650 | 0f69 651 | 0f6a 652 | 0f6b 653 | 0f6c 654 | 0f6d 655 | 0f6e 656 | 0f6f 657 | 0f70 658 | 0f71 659 | 0f72 660 | 0f73 661 | 0f74 662 | 0f75 663 | 0f76 664 | 0f77 665 | 0f78 666 | 0f79 667 | 0f7a 668 | 0f7b 669 | 0f7c 670 | 0f7d 671 | 0f7e 672 | 0f7f 673 | 0f80 jo Jv 674 | 0f81 jno Jv 675 | 0f82 jb Jv 676 | 0f83 jnb Jv 677 | 0f84 jz Jv 678 | 0f85 jnz Jv 679 | 0f86 jbe Jv 680 | 0f87 jnbe Jv 681 | 0f88 js Jv 682 | 0f89 jns Jv 683 | 0f8a jp Jv 684 | 0f8b jnp Jv 685 | 0f8c jl Jv 686 | 0f8d jge Jv 687 | 0f8e jle Jv 688 | 0f8f jg Jv 689 | 0f90 seto Eb 690 | 0f91 setno Eb 691 | 0f92 setb Eb 692 | 0f93 setnb Eb 693 | 0f94 setz Eb 694 | 0f95 setnz Eb 695 | 0f96 setbe Eb 696 | 0f97 setnbe Eb 697 | 0f98 sets Eb 698 | 0f99 setns Eb 699 | 0f9a setp Eb 700 | 0f9b setnp Eb 701 | 0f9c setl Eb 702 | 0f9d setnl Eb 703 | 0f9e setle Eb 704 | 0f9f setnle Eb 705 | 0fa0 push fs 706 | 0fa1 pop fs 707 | 0fa2 708 | 0fa3 bt Ev Gv 709 | 0fa4 shld Ev Gv Ib 710 | 0fa5 shld Ev Gv cl 711 | 0fa6 712 | 0fa7 713 | 0fa8 push gs 714 | 0fa9 pop gs 715 | 0faa 716 | 0fab bts Ev Gv 717 | 0fac shrd Ev Gv Ib 718 | 0fad shrd Ev Gv cl 719 | 0fae 720 | 0faf imul Gv Ev 721 | 0fb0 722 | 0fb1 723 | 0fb2 lss Mp 724 | 0fb3 btr Ev Gv 725 | 0fb4 lfs Mp 726 | 0fb5 lgs Mp 727 | 0fb6 movzx Gv Eb 728 | 0fb7 movzx Gv Ew 729 | 0fb8 730 | 0fb9 731 | 0fba grp8d Ev Ib 732 | 0fbb btc Ev Gv 733 | 0fbc bsf Gv Ev 734 | 0fbd bsr Gv Ev 735 | 0fbe movsx Gv Eb 736 | 0fbf movsx Gv Ew 737 | 0fc0 738 | 0fc1 739 | 0fc2 740 | 0fc3 741 | 0fc4 742 | 0fc5 743 | 0fc6 744 | 0fc7 745 | 0fc8 746 | 0fc9 747 | 0fca 748 | 0fcb 749 | 0fcc 750 | 0fcd 751 | 0fce 752 | 0fcf 753 | 0fd0 754 | 0fd1 755 | 0fd2 756 | 0fd3 757 | 0fd4 758 | 0fd5 759 | 0fd6 760 | 0fd7 761 | 0fd8 762 | 0fd9 763 | 0fda 764 | 0fdb 765 | 0fdc 766 | 0fdd 767 | 0fde 768 | 0fdf 769 | 0fe0 770 | 0fe1 771 | 0fe2 772 | 0fe3 773 | 0fe4 774 | 0fe5 775 | 0fe6 776 | 0fe7 777 | 0fe8 778 | 0fe9 779 | 0fea 780 | 0feb 781 | 0fec 782 | 0fed 783 | 0fee 784 | 0fef pxor Pq Qq 785 | 660fef pxor Vq Wq 786 | 0ff0 787 | 0ff1 788 | 0ff2 789 | 0ff3 790 | 0ff4 791 | 0ff5 792 | 0ff6 793 | 0ff7 794 | 0ff8 psubb Pq Qq 795 | 660ff8 psubb Vq Wq 796 | 0ff9 psubw Pq Qq 797 | 660ff9 psubw Vq Wq 798 | 0ffa psubd Pq Qq 799 | 660ffa psubd Vq Wq 800 | 0ffb psubq Pq Qq 801 | 660ffb psubq Vq Wq 802 | 0ffc paddb Pq Qq 803 | 660ffc paddb Vq Wq 804 | 0ffd paddw Pq Qq 805 | 660ffd paddw Vq Wq 806 | 0ffe paddd Pq Qq 807 | 660ffe paddd Vq Wq 808 | -------------------------------------------------------------------------------- /src/arch/x86/x86.c: -------------------------------------------------------------------------------- 1 | #include "x86.h" 2 | 3 | struct dis *x86_disassemble(int mode, struct trie_node *root, u8 * stream, 4 | long max, uint64_t addr) 5 | { 6 | if (!max) 7 | return 0; 8 | long iter = 0; 9 | /*Find instruction match in the trie */ 10 | struct trie_node *n = trie_lookup(root, stream + iter, max - iter); 11 | const char *offset = NULL; 12 | iter += n->dist; 13 | u8 flags = 0; 14 | while (n->flags & PREFIX_FLAG) { 15 | char prefix = n->key; 16 | if (mode == MODE_X64 && prefix >= 0x40 && prefix <= 0x50) { 17 | prefix = prefix & 0x0f; 18 | if (prefix & 0x1) 19 | flags |= REX_B; 20 | if (prefix & 0x2) 21 | flags |= REX_X; 22 | if (prefix & 0x4) 23 | flags |= REX_R; 24 | if (prefix & 0x8) 25 | flags |= REX_W; 26 | } 27 | if (n->flags & OFFSET_FLAG && !offset) { 28 | offset = 29 | n->value ? ((struct x86_instr_entry *) n-> 30 | value)->mnemonic : NULL; 31 | } 32 | if (prefix == 0x66) 33 | flags |= OPER_SIZE_OVERRIDE; 34 | if (prefix == 0x67) 35 | flags |= ADDR_SIZE_OVERRIDE; 36 | n = trie_lookup(root, stream + iter, max - iter); 37 | iter += n->dist; 38 | } 39 | /*Some instructions have an opcode extension in the reg part of the modrm byte */ 40 | if (n->flags == REG_EXT_FLAG) { 41 | u8 sv = stream[iter]; 42 | //Zero out everything but the reg 3 bits 43 | sv = (sv & 0x38); 44 | n = trie_lookup(n, &sv, 1); 45 | } 46 | /*Some x87 instructions use the mod field as an opcode extension */ 47 | if (n->flags == MOD_EXT_FLAG) { 48 | u8 sv = stream[iter] >> 6; 49 | if (sv == MODRM_REG) 50 | n = trie_lookup(n, &sv, 1); 51 | } 52 | /*If the instruction is not found, then die */ 53 | if (!n || !n->value) { 54 | return NULL; 55 | } 56 | 57 | u8 *operand_stream = stream + iter; 58 | long operand_max = max - iter; 59 | 60 | struct x86_instr_entry *e = n->value; 61 | struct dis *disas = dis_init(); 62 | /*Set mnemonic */ 63 | memcpy(disas->mnemonic, e->mnemonic, strlen(e->mnemonic)); 64 | 65 | /*Used First Byte */ 66 | int ufb = iter; 67 | /*Create operands based on addressing modes */ 68 | for (int i = 0; i < e->num_op; i++) { 69 | struct operand_tree *operand = NULL; 70 | /*The reg operand in may need a byte previous in the stream, so pass in the first byte */ 71 | char fv = e->operand[i][0]; 72 | if (fv == 'G' || fv == 'V' || fv == 'P' || fv == 'B') { 73 | (void) x86_decode_operand(&operand, mode, 74 | e->operand[i], flags, 75 | operand_stream, 76 | operand_max); 77 | ufb++; 78 | } else { 79 | iter += 80 | x86_decode_operand(&operand, mode, 81 | e->operand[i], flags, 82 | stream + iter, max - iter); 83 | } 84 | /*If there is a segment offset, add it onto any modrm memory operand */ 85 | if (e->operand[i][0] == 'E' && offset) { 86 | fmt_offset_str(operand, offset); 87 | } 88 | if (operand) 89 | dis_add_operand(disas, operand); 90 | } 91 | if (ufb > iter) 92 | iter = ufb; 93 | /*Check if the operands type was relative, if so add the start address and the used bytes */ 94 | for (int i = 0; i < disas->num_operands && i < e->num_op; i++) { 95 | if (e->operand[i][0] == 'J' || e->operand[i][0] == 'A') { 96 | if (disas->operands[i]->type == DIS_OPER 97 | && TREE_OPTYPE(disas->operands[i]) == 98 | DIS_ADDR) { 99 | TREE_ADDR(disas->operands[i]) += 100 | addr + iter; 101 | } 102 | } 103 | } 104 | 105 | disas->used_bytes = iter; 106 | return disas; 107 | } 108 | 109 | /*Decodes operand information and passes it on to be disassembled. Returns used bytes*/ 110 | long x86_decode_operand(struct operand_tree **opt, int mode, char *operand, 111 | u8 flags, u8 * stream, long max) 112 | { 113 | /*Stream iterator */ 114 | long iter = 0; 115 | /*Set initial size based on defaults and flags */ 116 | 117 | int operand_size = DEF_OPER_SIZE(mode) + CHECK_FLAG(flags, REX_W); 118 | operand_size = operand_size == 3 && CHECK_FLAG(flags, OPER_SIZE_OVERRIDE) ? 2 : operand_size; 119 | int addr_size = 120 | DEF_ADDR_SIZE(mode) - CHECK_FLAG(flags, ADDR_SIZE_OVERRIDE); 121 | 122 | /*If the operand is an addressing mode, then it will be a capital letter, otherwise a value */ 123 | if (operand[0] >= 'A' && operand[0] <= 'Z') { 124 | /*Set operand size */ 125 | if (operand[1]) 126 | operand_size = 127 | x86_operand_size(mode, operand_size, 128 | operand[1], flags); 129 | iter += 130 | x86_disassemble_operand(opt, mode, operand[0], operand_size, 131 | addr_size, stream + iter, 132 | max - iter, flags); 133 | } else { 134 | /*Check if its a register */ 135 | int ridx = get_register_index(operand); 136 | int xidx = get_xmm_index(operand); 137 | int fidx = get_x87_index(operand); 138 | int midx = get_mm_index(operand); 139 | if (midx != -1 || fidx != -1 || xidx != -1) { 140 | *opt = operand_reg(operand); 141 | } 142 | if (ridx != -1) { 143 | int size = REG_SIZE_IDX(ridx); 144 | /*If the register is set as 4 and in 32 bit mode, scale it down */ 145 | size = size == 4 146 | && mode == MODE_X86 ? size - 1 : size; 147 | size = CHECK_FLAG(flags, OPER_SIZE_OVERRIDE) ? size - 1 : size; 148 | *opt = 149 | operand_reg(get_register 150 | (REG_BIN_IDX(ridx), size, 151 | CHECK_FLAG(flags, REX_B))); 152 | } else { 153 | long val = strtol(operand, NULL, 0); 154 | *opt = operand_imm(val); 155 | } 156 | } 157 | return iter; 158 | } 159 | 160 | /*Disassembles operand and returns used bytes*/ 161 | long x86_disassemble_operand(struct operand_tree **operand, int mode, u8 addr_mode, 162 | int op_size, int addr_size, u8 * stream, 163 | long max, u8 flags) 164 | { 165 | long iter = 0; 166 | /*Construct operand based on addressing mode, size, and flags */ 167 | switch (addr_mode) { 168 | /*MODRM Encoding */ 169 | case 'E': /*Modrm encoding dword [ebx+4] <- this thing */ 170 | iter += 171 | x86_decode_modrm(operand, mode, op_size, addr_size, stream, 172 | max, flags); 173 | break; 174 | case 'G':; /*Register modrm encoding ->>> eax, <<-- dword [eax] */ 175 | u8 mrmreg = (stream[0] & 0x38) >> 3; 176 | *operand = 177 | operand_reg(get_register 178 | (mrmreg, op_size, 179 | CHECK_FLAG(flags, REX_R))); 180 | break; 181 | case 'I':; /*Immediate Value */ 182 | uint64_t imm = 0; 183 | iter += get_integer(&imm, op_size, stream, max); 184 | //if (op_size == 1 && imm > 0x80) imm = 0x100 - imm; 185 | *operand = operand_imm(imm); 186 | break; 187 | case 'J':; /*Relative address */ 188 | uint64_t addr = 0; 189 | iter += get_integer(&addr, op_size, stream, max); 190 | *operand = operand_addr(addr); 191 | break; 192 | case 'O':; /*Offset */ 193 | uint64_t offset = 0; 194 | iter += get_integer(&offset, addr_size, stream, max); 195 | *operand = 196 | x86_indir_operand_tree(op_size, NULL, NULL, 1, offset); 197 | break; 198 | case 'A':; /*Direct Addressing */ 199 | uint64_t daddr = 0; 200 | iter += 201 | get_integer(&daddr, op_size, stream + iter, 202 | max - iter); 203 | *operand = operand_addr(daddr); 204 | break; 205 | case 'M':; /*Memory addres. (modrm but with an operand size of 0 */ 206 | iter += 207 | x86_decode_modrm(operand, mode, 0, addr_size, stream, max, 208 | flags); 209 | break; 210 | case 'P': 211 | *operand = 212 | operand_reg(mm_registers[(stream[0] & 0x38) >> 3]); 213 | break; 214 | case 'Q': 215 | if ((stream[0] >> 6) != MODRM_REG) { 216 | iter += 217 | x86_decode_modrm(operand, mode, op_size, addr_size, 218 | stream, max, flags); 219 | } else { 220 | *operand = 221 | operand_reg(mm_registers 222 | [(stream[iter++] & 0x38) >> 3]); 223 | } 224 | break; 225 | case 'V': /*Selects XMM Register */ 226 | *operand = 227 | operand_reg(xmm_registers[(stream[0] & 0x38) >> 3]); 228 | break; 229 | case 'W': /*Selects XMM Register or memory location */ 230 | if ((stream[0] >> 6) != MODRM_REG) { 231 | iter += 232 | x86_decode_modrm(operand, mode, op_size, addr_size, 233 | stream, max, flags); 234 | } else { 235 | *operand = 236 | operand_reg(xmm_registers 237 | [(stream[iter++] & 0x7)]); 238 | } 239 | break; 240 | case 'B': /*Reg field of modrm selects a x87 fpu stack register */ 241 | *operand = operand_reg(x87_registers[(stream[0] & 0x7)]); 242 | break; 243 | case 'H': /*The modrm byte specifies either a x87 fpu stack register or memory address */ 244 | if ((stream[0] >> 6) != MODRM_REG) { 245 | iter += 246 | x86_decode_modrm(operand, mode, op_size, addr_size, 247 | stream, max, flags); 248 | } else { 249 | *operand = 250 | operand_reg(x87_registers 251 | [(stream[iter++] & 0x38) >> 3]); 252 | } 253 | break; 254 | case 'X': /*DS:RSI offset addressing mode */ 255 | *operand = 256 | x86_indir_operand_tree(op_size, 257 | get_register(6, addr_size, 0), 258 | NULL, 1, 0); 259 | fmt_offset_str(*operand, "ds"); 260 | break; 261 | case 'Y': /*ES:RDI Offset addressing mode */ 262 | *operand = 263 | x86_indir_operand_tree(op_size, 264 | get_register(7, addr_size, 0), 265 | NULL, 1, 0); 266 | fmt_offset_str(*operand, "es"); 267 | break; 268 | } 269 | return iter; 270 | } 271 | 272 | /*Decodes modrm byte*/ 273 | long x86_decode_modrm(struct operand_tree **operand, int mode, int op_size, 274 | int addr_size, u8 * stream, long max, u8 flags) 275 | { 276 | long iter = 0; 277 | /*Get modrm byte */ 278 | u8 modrm = stream[iter++]; 279 | /*MODRM byte. mod = xx000000, reg = 00xxx000, rm = 00000xxx */ 280 | u8 mod = (modrm & 0xc0) >> 6; 281 | //u8 reg = (modrm & 0x38) >> 3; 282 | u8 rm = (modrm & 0x7); 283 | /*Scale Index Base Encoding when rm == 4 and mod != 3 */ 284 | if (rm == SIB_RM && mod != MODRM_REG) { 285 | iter += 286 | x86_decode_sib(operand, op_size, addr_size, 287 | stream + iter, max - iter, flags); 288 | return iter; 289 | } 290 | /*If op_size is 0 it means that it was a memory argument and its encoded wrong if it gets this far */ 291 | //op_size = op_size == 0 ? addr_size : op_size; 292 | /*MODRM: EBP is invalid rm byte in indirect mode, so it means disp only */ 293 | if (MODRM_DISPONLY(mod, rm)) { 294 | if (max < 4) 295 | return iter; 296 | *operand = 297 | x86_indir_operand_tree(op_size, mode == MODE_X64 ? "rip" : NULL, NULL, 1, 298 | *(uint32_t *) (stream + iter)); 299 | iter += 4; 300 | return iter; 301 | } 302 | 303 | const char *reg; 304 | /*Indirect registers are the address size, while the data size is the operand size */ 305 | switch (mod) { 306 | case MODRM_INDIR: 307 | reg = 308 | get_register(rm, addr_size, CHECK_FLAG(flags, REX_B)); 309 | *operand = 310 | x86_indir_operand_tree(op_size, reg, NULL, 1, 0); 311 | break; 312 | case MODRM_1DISP: 313 | reg = 314 | get_register(rm, addr_size, CHECK_FLAG(flags, REX_B)); 315 | uint64_t bdisp = (int64_t) (signed char) stream[iter++]; 316 | *operand = 317 | x86_indir_operand_tree(op_size, reg, NULL, 1, 318 | (signed long) bdisp); 319 | break; 320 | case MODRM_4DISP: 321 | reg = 322 | get_register(rm, addr_size, CHECK_FLAG(flags, REX_B)); 323 | uint64_t disp = *(int32_t *) (stream + iter); 324 | iter += 4; 325 | *operand = 326 | x86_indir_operand_tree(op_size, reg, NULL, 1, disp); 327 | break; 328 | case MODRM_REG:; 329 | *operand = 330 | operand_reg(get_register 331 | (rm, op_size, CHECK_FLAG(flags, REX_B))); 332 | break; 333 | } 334 | 335 | return iter; 336 | } 337 | 338 | /*Decodes sib byte*/ 339 | long x86_decode_sib(struct operand_tree **operand, int op_size, 340 | int addr_size, u8 * stream, long max, u8 flags) 341 | { 342 | long iter = 0; 343 | if (!max) 344 | return 0; 345 | /*This is safe because decode sib must be called from the decode modrm function */ 346 | u8 mod = ((stream[-1] & 0xc0) >> 6); 347 | u8 sib = stream[iter++]; 348 | int power = (sib & 0xc0) >> 6; 349 | int scale = 1; 350 | /*Scale is a power of two */ 351 | for (int i = 0; i < power; i++) 352 | scale *= 2; 353 | u8 idx = ((sib & 0x38) >> 3); 354 | u8 bse = sib & 0x7; 355 | const char *index = NULL, *base = NULL; 356 | if (!SIB_NO_INDEX(idx)) 357 | index = 358 | get_register(idx, addr_size, CHECK_FLAG(flags, REX_X)); 359 | if (!SIB_NO_BASE(mod, bse)) 360 | base = 361 | get_register(bse, addr_size, CHECK_FLAG(flags, REX_B)); 362 | 363 | uint64_t offset = 0; 364 | if (mod == 1) { 365 | if (!(max - iter)) 366 | return iter; 367 | offset = (signed char) stream[iter++]; 368 | //offset = offset > 0x80 ? 0x100 - offset : offset; 369 | } else if (mod == 2) { 370 | if ((max - iter) < 4) 371 | return iter; 372 | offset = *(int32_t *) (stream + iter); 373 | iter += 4; 374 | } else if (mod == 0 && bse == 5) { 375 | if ((max - iter) < 4) 376 | return iter; 377 | offset = *(int32_t *) (stream + iter); 378 | iter += 4; 379 | } 380 | *operand = 381 | x86_indir_operand_tree(op_size, base, index, scale, offset); 382 | 383 | return iter; 384 | } 385 | 386 | 387 | /*Create an operand tree given the possible parameters for an indirect memory address*/ 388 | struct operand_tree *x86_indir_operand_tree(int op_size, const char *base, 389 | const char *index, long scale, 390 | unsigned long offset) 391 | { 392 | struct operand_tree *indir = operand_tree_init(DIS_BRANCH); 393 | 394 | /*Create a format string based on possible parameters */ 395 | if (op_size >= 1 && op_size <= 4) 396 | operand_tree_fmt(indir, "%s ", operand_size_prefix[op_size-1]); 397 | 398 | operand_tree_fmt(indir, "["); 399 | /*Possible parameters: base op, index op, scale op, offset op */ 400 | struct operand_tree *bop = NULL, *iop = NULL, *sop = NULL, *oop = 401 | NULL; 402 | if (base) { 403 | bop = operand_reg(base); 404 | operand_tree_add(indir, bop); 405 | operand_tree_fmt(indir, "$%d", TREE_NCHILD(indir) - 1); 406 | } 407 | if (index) { 408 | iop = operand_reg(index); 409 | operand_tree_add(indir, iop); 410 | if (base) 411 | operand_tree_fmt(indir, "+$%d", TREE_NCHILD(indir) - 1); 412 | else 413 | operand_tree_fmt(indir, "$%d", TREE_NCHILD(indir) - 1); 414 | } 415 | if (scale != 1) { 416 | sop = operand_imm(scale); 417 | operand_tree_add(indir, sop); 418 | operand_tree_fmt(indir, "*$%d", TREE_NCHILD(indir) - 1); 419 | } 420 | 421 | /*Correct sign for offset */ 422 | char sign = '+'; 423 | if (offset > 0x80000000 && (bop || sop || iop)) { 424 | offset = -offset; 425 | sign = '-'; 426 | } 427 | if (offset != 0) { 428 | oop = operand_addr(offset); 429 | operand_tree_add(indir, oop); 430 | if (TREE_NCHILD(indir) == 1 && sign != '-') 431 | operand_tree_fmt(indir, "$%d", TREE_NCHILD(indir)-1); 432 | else 433 | operand_tree_fmt(indir, "%c$%d", sign, TREE_NCHILD(indir)-1); 434 | 435 | } 436 | operand_tree_fmt(indir, "]"); 437 | return indir; 438 | } 439 | 440 | int x86_operand_size(int mode, int op_size, char size_byte, u8 flags) 441 | { 442 | switch (size_byte) { 443 | /*Byte */ 444 | case 'b': 445 | return 1; 446 | /*Byte Or Word */ 447 | case 'c': 448 | return 1 + CHECK_FLAG(flags, OPER_SIZE_OVERRIDE); 449 | /*Double Word or Word */ 450 | case 'd': 451 | return 3 - CHECK_FLAG(flags, OPER_SIZE_OVERRIDE); 452 | /*Quad word, double word or word */ 453 | case 'v': 454 | return 3 - CHECK_FLAG(flags, 455 | OPER_SIZE_OVERRIDE) + 456 | CHECK_FLAG(flags, REX_W); 457 | /*Word */ 458 | case 'w': 459 | return 2; 460 | /*Quad word, unless in 32 bit mode */ 461 | case 'q': 462 | return mode == MODE_X64 ? 4 : 3; 463 | } 464 | return op_size; 465 | } 466 | 467 | long get_integer(uint64_t * val, int size, u8 * stream, long max) 468 | { 469 | if (size == 4) { 470 | if (max < 8) 471 | return max; 472 | *val = (int64_t) * (int64_t *) stream; 473 | return 8; 474 | } else if (size == 3) { 475 | if (max < 4) 476 | return max; 477 | *val = (int64_t) * (int32_t *) stream; 478 | return 4; 479 | } else if (size == 2) { 480 | if (max < 2) 481 | return max; 482 | *val = (int64_t) * (int16_t *) stream; 483 | return 2; 484 | } else { 485 | if (!max) 486 | return 0; 487 | *val = (int64_t) (signed char) stream[0]; 488 | return 1; 489 | } 490 | } 491 | 492 | /*Inserts offset into a operand tree*/ 493 | void fmt_offset_str(struct operand_tree *operand, const char *offset) 494 | { 495 | if (!operand || operand->type != DIS_BRANCH) 496 | return; 497 | char buf[64]; 498 | memset(buf, 0, 64); 499 | char *format = TREE_FORMAT(operand); 500 | int mlen = strlen(format); 501 | memcpy(buf, format, mlen); 502 | int off = 0; 503 | while (buf[off] != '[' && off < mlen) 504 | off++; 505 | int iter = 0; 506 | iter += snprintf(format, FMT_SIZE, "%*.*s", off, off, buf); 507 | iter += snprintf(format + iter, FMT_SIZE - iter, "%s:", offset); 508 | iter += snprintf(format + iter, FMT_SIZE - iter, "%s", buf + off); 509 | } 510 | --------------------------------------------------------------------------------