├── .gitignore ├── LICENSE ├── Makefile ├── README ├── bb.cc ├── bb.h ├── cfg.cc ├── cfg.h ├── dataregion.cc ├── dataregion.h ├── disasm-aarch64.cc ├── disasm-aarch64.h ├── disasm-arm.cc ├── disasm-arm.h ├── disasm-mips.cc ├── disasm-mips.h ├── disasm-ppc.cc ├── disasm-ppc.h ├── disasm-x86.cc ├── disasm-x86.h ├── disasm.cc ├── disasm.h ├── edge.cc ├── edge.h ├── endian.cc ├── endian.h ├── exception.cc ├── exception.h ├── export.cc ├── export.h ├── function.cc ├── function.h ├── insn.cc ├── insn.h ├── loader.cc ├── loader.h ├── log.cc ├── log.h ├── nucleus.cc ├── nucleus.h ├── options.cc ├── options.h ├── strategy.cc ├── strategy.h ├── testout ├── util.cc └── util.h /.gitignore: -------------------------------------------------------------------------------- 1 | /nucleus 2 | /obj 3 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2016, 2017, 2018 Dennis Andriesse, Vrije Universiteit Amsterdam. 2 | All rights reserved. 3 | 4 | Redistribution and use in source and binary forms, with or without modification, 5 | are permitted provided that the following conditions are met: 6 | 7 | * Redistributions of source code must retain the above copyright notice, this 8 | list of conditions and the following disclaimer. 9 | 10 | * Redistributions in binary form must reproduce the above copyright notice, 11 | this list of conditions and the following disclaimer in the documentation 12 | and/or other materials provided with the distribution. 13 | 14 | * Neither the name of the copyright holders nor the names of its contributors 15 | may be used to endorse or promote products derived from this software 16 | without specific prior written permission. 17 | 18 | THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 19 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 20 | WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 21 | DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE 22 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 24 | SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 25 | CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 26 | OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 | OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 | 29 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | CXX=g++ 2 | CXXFLAGS=-Wall -std=c++11 -O2 -DNDEBUG -fpermissive 3 | LDFLAGS=-lcapstone -lbfd-multiarch 4 | 5 | SRC=$(wildcard *.cc) 6 | OBJ=$(patsubst %.cc, obj/%.o, $(SRC)) 7 | BIN=nucleus 8 | 9 | .PHONY: all clean setup 10 | 11 | all: $(BIN) 12 | 13 | $(OBJ): | obj 14 | 15 | obj: 16 | @mkdir -p $@ 17 | 18 | obj/%.o: %.cc %.h 19 | $(CXX) $(CXXFLAGS) -c -o $@ $< 20 | 21 | $(BIN): $(OBJ) 22 | $(CXX) $(CXXFLAGS) -o $(BIN) $(OBJ) $(LDFLAGS) 23 | 24 | setup: 25 | sudo apt install binutils-multiarch-dev libcapstone-dev 26 | 27 | clean: 28 | rm -f $(OBJ) 29 | rm -Rf obj 30 | rm -f $(BIN) 31 | 32 | -------------------------------------------------------------------------------- /README: -------------------------------------------------------------------------------- 1 | Based on the paper "Compiler-Agnostic Function Detection in Binaries", 2 | published at EuroS&P 2017. 3 | 4 | Requirements: 5 | - libcapstone (tested with 3.0) 6 | - libbfd-multiarch 7 | 8 | Platform: 9 | - Tested on Ubuntu 15.10 and 16.04 10 | On these platforms you can install the required libraries by executing: 11 | make setup 12 | 13 | Suggested usage: 14 | make 15 | ./nucleus -e /bin/ls -d linear -i idainfo.py 16 | (idainfo.py can be run in IDA Pro to import the functions found by nucleus) 17 | -------------------------------------------------------------------------------- /bb.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "bb.h" 4 | #include "insn.h" 5 | 6 | 7 | void 8 | BB::print(FILE *out) 9 | { 10 | fprintf(out, "BB @0x%016jx (score %.10f) %s%s%s%s {\n", 11 | start, score, invalid ? "i" : "-", privileged ? "p" : "-", 12 | addrtaken ? "a" : "-", padding ? "n" : "-"); 13 | if(invalid) { 14 | fprintf(out, " 0x%016jx (bad)", start); 15 | } else { 16 | for(auto &ins: insns) { 17 | ins.print(out); 18 | } 19 | } 20 | if(!ancestors.empty()) { 21 | fprintf(out, "--A ancestors:\n"); 22 | for(auto &e: ancestors) { 23 | fprintf(out, "--A 0x%016jx (%s)\n", e.src->insns.back().start, e.type2str().c_str()); 24 | } 25 | } 26 | if(!targets.empty()) { 27 | fprintf(out, "--T targets:\n"); 28 | for(auto &e: targets) { 29 | fprintf(out, "--T 0x%016jx (%s)\n", e.dst->start+e.offset, e.type2str().c_str()); 30 | } 31 | } 32 | fprintf(out, "}\n\n"); 33 | } 34 | 35 | 36 | bool 37 | BB::is_called() 38 | { 39 | for(auto &e: ancestors) { 40 | if((e.type == Edge::EDGE_TYPE_CALL) 41 | || (e.type == Edge::EDGE_TYPE_CALL_INDIRECT)) { 42 | return true; 43 | } 44 | } 45 | 46 | return false; 47 | } 48 | 49 | 50 | bool 51 | BB::returns() 52 | { 53 | return (insns.back().flags & Instruction::INS_FLAG_RET); 54 | } 55 | 56 | -------------------------------------------------------------------------------- /bb.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_BB_H 2 | #define NUCLEUS_BB_H 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "insn.h" 10 | #include "edge.h" 11 | #include "loader.h" 12 | 13 | class Function; 14 | 15 | class BB { 16 | public: 17 | BB() : start(0), end(0), function(NULL), section(NULL), score(0.0), 18 | alive(false), invalid(false), privileged(false), addrtaken(false), padding(false), trap(false) {} 19 | BB(const BB &bb) : start(bb.start), end(bb.end), insns(bb.insns), function(bb.function), section(bb.section), score(bb.score), 20 | alive(bb.alive), invalid(bb.invalid), privileged(bb.privileged), addrtaken(bb.addrtaken), padding(bb.padding), trap(bb.trap), 21 | ancestors(bb.ancestors), targets(bb.targets) {} 22 | 23 | void reset() { start = 0; end = 0; insns.clear(); function = NULL; section = NULL; score = 0.0; 24 | alive = false; invalid = false; privileged = false; addrtaken = false; padding = false; trap = false; 25 | ancestors.clear(); targets.clear(); } 26 | void set(uint64_t start, uint64_t end) { reset(); this->start = start; this->end = end; } 27 | 28 | bool is_addrtaken () { return addrtaken; } 29 | bool is_invalid () { return invalid; } 30 | bool is_padding () { return padding; } 31 | bool is_trap () { return trap; } 32 | bool is_called (); 33 | bool returns (); 34 | 35 | void print(FILE *out); 36 | 37 | static bool comparator (BB& bb, BB& cc) { return bb.start < cc.start; } 38 | inline bool operator< (const BB& cc) const { return this->start < cc.start; } 39 | 40 | uint64_t start; 41 | uint64_t end; 42 | std::list insns; 43 | Function *function; 44 | Section *section; 45 | 46 | double score; 47 | bool alive; 48 | bool invalid; 49 | bool privileged; 50 | bool addrtaken; 51 | bool padding; 52 | bool trap; 53 | 54 | std::list ancestors; 55 | std::list targets; 56 | }; 57 | 58 | #endif /* NUCLEUS_BB_H */ 59 | 60 | -------------------------------------------------------------------------------- /cfg.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | 9 | #include "bb.h" 10 | #include "edge.h" 11 | #include "function.h" 12 | #include "disasm.h" 13 | #include "loader.h" 14 | #include "cfg.h" 15 | #include "log.h" 16 | #include "options.h" 17 | #include "endian.h" 18 | 19 | 20 | void 21 | CFG::print_functions(FILE *out) 22 | { 23 | for(auto &f: this->functions) { 24 | f.print(out); 25 | } 26 | } 27 | 28 | 29 | void 30 | CFG::print_function_summaries(FILE *out) 31 | { 32 | for(auto &f: this->functions) { 33 | f.print_summary(out); 34 | } 35 | } 36 | 37 | 38 | void 39 | CFG::mark_addrtaken(uint64_t addr) 40 | { 41 | BB *cc; 42 | 43 | if (this->start2bb.count(addr)) { 44 | cc = this->start2bb[addr]; 45 | if (!cc->addrtaken) { 46 | cc->addrtaken = true; 47 | verbose(3, "marking addrtaken bb@0x%016jx", cc->start); 48 | } 49 | } 50 | } 51 | 52 | 53 | void 54 | CFG::analyze_addrtaken_ppc() 55 | { 56 | BB *bb; 57 | 58 | /* Instructions can get reordered, so we emulate the ISA subset relevant for the patterns below, 59 | * clearing the intermediate register values with with -1 if the result is irrelevant or undefined. */ 60 | int64_t registers[32]; 61 | 62 | for(auto &kv: this->start2bb) { 63 | bb = kv.second; 64 | for(auto &ins: bb->insns) { 65 | if(ins.operands.size() < 2) { 66 | continue; 67 | } 68 | /* Pattern #1 (32-bit) 69 | * Load the address from its word halves. Following variants are supported: 70 | * - Using addis/addi (gcc): 71 | * lis rN, .L@ha 72 | * addi rN, rN, L@l 73 | * - Using addis/ori: 74 | * lis rN, .L@ha 75 | * ori rN, rN, .L@l */ 76 | if(ins.id == PPC_INS_LIS) { 77 | int64_t dst = ins.operands[0].ppc_value.reg - PPC_REG_R0; 78 | int64_t imm = ins.operands[1].ppc_value.imm; 79 | assert(dst < 32); 80 | registers[dst] = imm << 16; 81 | } 82 | else if(ins.id == PPC_INS_ADDI || ins.id == PPC_INS_ORI) { 83 | int64_t lhs = ins.operands[1].ppc_value.reg - PPC_REG_R0; 84 | int64_t rhs = ins.operands[2].ppc_value.imm; 85 | assert(lhs < 32); 86 | if (registers[lhs] != -1) { 87 | mark_addrtaken(registers[lhs] | rhs); 88 | } 89 | } 90 | else if(ins.operands[0].type == Operand::OP_TYPE_REG 91 | && ins.operands[0].ppc_value.reg >= PPC_REG_R0 92 | && ins.operands[0].ppc_value.reg <= PPC_REG_R31) { 93 | int64_t dst = ins.operands[0].ppc_value.reg - PPC_REG_R0; 94 | registers[dst] = -1; 95 | } 96 | } 97 | } 98 | } 99 | 100 | 101 | void 102 | CFG::analyze_addrtaken_x86() 103 | { 104 | BB *bb; 105 | Operand *op_src, *op_dst; 106 | 107 | for(auto &kv: this->start2bb) { 108 | bb = kv.second; 109 | for(auto &ins: bb->insns) { 110 | if(ins.operands.size() < 2) { 111 | continue; 112 | } 113 | op_dst = &ins.operands[0]; 114 | op_src = &ins.operands[1]; 115 | if(((op_dst->type == Operand::OP_TYPE_REG) || (op_dst->type == Operand::OP_TYPE_MEM)) 116 | && (op_src->type == Operand::OP_TYPE_IMM)) { 117 | mark_addrtaken(op_src->x86_value.imm); 118 | } 119 | } 120 | } 121 | } 122 | 123 | 124 | void 125 | CFG::analyze_addrtaken() 126 | { 127 | verbose(1, "starting address-taken analysis"); 128 | 129 | switch(this->binary->arch) { 130 | case Binary::ARCH_PPC: 131 | analyze_addrtaken_ppc(); 132 | break; 133 | case Binary::ARCH_X86: 134 | analyze_addrtaken_x86(); 135 | break; 136 | default: 137 | print_warn("address-taken analysis not yet supported for %s", this->binary->arch_str.c_str()); 138 | break; 139 | } 140 | 141 | verbose(1, "address-taken analysis complete"); 142 | } 143 | 144 | 145 | void 146 | CFG::mark_jmptab_as_data(uint64_t start, uint64_t end) 147 | { 148 | uint64_t addr; 149 | BB *bb, *cc; 150 | 151 | bb = NULL; 152 | cc = NULL; 153 | for(addr = start; addr < end; addr++) { 154 | bb = this->get_bb(addr, NULL); 155 | if(!bb) continue; 156 | if(bb != cc) { 157 | bb->invalid = true; 158 | unlink_bb(bb); 159 | bad_bbs[bb->start] = bb; 160 | cc = bb; 161 | } 162 | } 163 | } 164 | 165 | 166 | void 167 | CFG::find_switches_aarch64(){ 168 | BB *bb, *cc; 169 | Edge *conflict_edge; 170 | Section *target_sec; 171 | int scale; 172 | unsigned offset; 173 | uint64_t jmptab_addr, jmptab_idx, jmptab_end; 174 | uint64_t case_addr, case_addr_abs, case_addr_rel; 175 | uint8_t *jmptab8; 176 | uint16_t *jmptab16; 177 | uint32_t *jmptab32; 178 | uint64_t *jmptab64; 179 | 180 | /* Instructions can get reordered, so we emulate the ISA subset relevant for the patterns below, 181 | * clearing the intermediate register values with with -1 if the result is irrelevant or undefined. */ 182 | int64_t registers[32]; 183 | for (size_t i = 0; i < 32; i++) { 184 | registers[i] = -1LL; 185 | } 186 | 187 | /* Trying to determine the size of the jump table entries by looking 188 | * at the instruction immediately following the described pattern. 189 | * - scale <= 0: signals the scale could not be determined. 190 | * - scale == 0: signals the current instruction may tell the scale. */ 191 | scale = -1; 192 | 193 | for(auto &kv: this->start2bb) { 194 | bb = kv.second; 195 | jmptab_addr = 0; 196 | target_sec = NULL; 197 | /* If this BB ends in an indirect jmp, scan the BB for what looks like 198 | * instructions loading a target from a jump table */ 199 | if(bb->insns.back().edge_type() == Edge::EDGE_TYPE_JMP_INDIRECT) { 200 | target_sec = bb->section; 201 | for(auto &ins: bb->insns) { 202 | if(ins.operands.size() < 2) { 203 | continue; 204 | } 205 | /* Pattern #1 206 | * Loading jump table address relative to the `pc` register. 207 | * adrp x0, #:pg_hi21:.L 208 | * add x0, x0, #:lo12:.L 209 | * ldr/ldrh/ldrb ... 210 | */ 211 | 212 | /* detect scale */ 213 | if (scale == 0) { 214 | if (ins.id == ARM64_INS_LDRB) { 215 | scale = 1; 216 | } 217 | else if (ins.id == ARM64_INS_LDRH) { 218 | scale = 2; 219 | } 220 | else if (ins.id == ARM64_INS_LDR 221 | && ins.operands[0].aarch64_value.reg >= ARM64_REG_W0 222 | && ins.operands[0].aarch64_value.reg <= ARM64_REG_W28) { 223 | scale = 4; 224 | } 225 | else if (ins.id == ARM64_INS_LDR 226 | && ins.operands[0].aarch64_value.reg >= ARM64_REG_X0 227 | && ins.operands[0].aarch64_value.reg <= ARM64_REG_X28) { 228 | scale = 8; 229 | } 230 | } 231 | /* detect jump-table address loading */ 232 | if(ins.id == ARM64_INS_ADRP) { 233 | int64_t dst = ins.operands[0].aarch64_value.reg - ARM64_REG_X0; 234 | int64_t imm = ins.operands[1].aarch64_value.imm; 235 | assert(dst < 29); 236 | registers[dst] = imm; 237 | } 238 | else if(ins.id == ARM64_INS_ADD 239 | && ins.operands[1].type == Operand::OP_TYPE_REG 240 | && ins.operands[2].type == Operand::OP_TYPE_IMM) { 241 | int64_t dst = ins.operands[0].aarch64_value.reg - ARM64_REG_X0; 242 | int64_t lhs = ins.operands[1].aarch64_value.reg - ARM64_REG_X0; 243 | int64_t rhs = ins.operands[2].aarch64_value.imm & 0xFFF; 244 | assert(dst < 29 && lhs < 29); 245 | registers[dst] = registers[lhs] + rhs; 246 | if (registers[dst] != -1) { 247 | jmptab_addr = (uint64_t)(registers[dst]); 248 | scale = 0; 249 | } 250 | } 251 | else if(ins.operands[0].type == Operand::OP_TYPE_REG 252 | && ins.operands[0].aarch64_value.reg >= ARM64_REG_X0 253 | && ins.operands[0].aarch64_value.reg <= ARM64_REG_X28) { 254 | int64_t dst = ins.operands[0].aarch64_value.reg - ARM64_REG_X0; 255 | registers[dst] = -1; 256 | } 257 | } 258 | } 259 | 260 | if(jmptab_addr && scale > 0) { 261 | jmptab_end = 0; 262 | for(auto &sec: this->binary->sections) { 263 | if(sec.contains(jmptab_addr)) { 264 | verbose(4, "parsing jump table at 0x%016jx (jump at 0x%016jx)", 265 | jmptab_addr, bb->insns.back().start); 266 | jmptab_idx = jmptab_addr-sec.vma; 267 | jmptab_end = jmptab_addr; 268 | jmptab8 = (uint8_t*)&sec.bytes[jmptab_idx]; 269 | jmptab16 = (uint16_t*)&sec.bytes[jmptab_idx]; 270 | jmptab32 = (uint32_t*)&sec.bytes[jmptab_idx]; 271 | jmptab64 = (uint64_t*)&sec.bytes[jmptab_idx]; 272 | while(1) { 273 | if((jmptab_idx+scale) > sec.size) break; 274 | jmptab_end += scale; 275 | jmptab_idx += scale; 276 | switch(scale) { 277 | case 1: 278 | case_addr_abs = uint8_t(*jmptab8++); 279 | break; 280 | case 2: 281 | case_addr_abs = uint16_t(read_le_i16(jmptab16++)); 282 | break; 283 | case 4: 284 | case_addr_abs = uint32_t(read_le_i32(jmptab32++)); 285 | break; 286 | case 8: 287 | case_addr_abs = uint64_t(read_le_i64(jmptab64++)); 288 | break; 289 | default: 290 | print_warn("Unexpected scale factor in memory operand: %d", scale); 291 | case_addr_abs = 0; 292 | break; 293 | } 294 | case_addr_rel = case_addr_abs + jmptab_addr; 295 | if(target_sec->contains(case_addr_abs)) { 296 | case_addr = case_addr_abs; 297 | } else if(target_sec->contains(case_addr_rel)) { 298 | case_addr = case_addr_rel; 299 | } else { 300 | break; 301 | } 302 | /* add target block */ 303 | cc = this->get_bb(case_addr, &offset); 304 | if(!cc) break; 305 | conflict_edge = NULL; 306 | for(auto &e: cc->ancestors) { 307 | if(e.is_switch) { 308 | conflict_edge = &e; 309 | break; 310 | } 311 | } 312 | if(conflict_edge && (conflict_edge->jmptab <= jmptab_addr)) { 313 | verbose(3, "removing switch edge 0x%016jx -> 0x%016jx (detected overlapping jump table or case)", 314 | conflict_edge->src->insns.back().start, case_addr); 315 | unlink_edge(conflict_edge->src, cc); 316 | conflict_edge = NULL; 317 | } 318 | if(!conflict_edge) { 319 | verbose(3, "adding switch edge 0x%016jx -> 0x%016jx", bb->insns.back().start, case_addr); 320 | link_bbs(Edge::EDGE_TYPE_JMP_INDIRECT, bb, case_addr, jmptab_addr); 321 | } 322 | } 323 | break; 324 | } 325 | } 326 | 327 | if(jmptab_addr && jmptab_end) { 328 | mark_jmptab_as_data(jmptab_addr, jmptab_end); 329 | } 330 | } 331 | } 332 | } 333 | 334 | 335 | void 336 | CFG::find_switches_arm() 337 | { 338 | BB *bb, *cc; 339 | Edge *conflict_edge; 340 | Section *target_sec; 341 | int scale = 4; 342 | unsigned offset; 343 | uint64_t jmptab_addr, jmptab_idx, jmptab_end, case_addr; 344 | uint32_t *jmptab; 345 | 346 | for(auto &kv: this->start2bb) { 347 | bb = kv.second; 348 | jmptab_addr = 0; 349 | target_sec = NULL; 350 | /* If this BB ends in an indirect jmp, scan the BB for what looks like 351 | * instructions loading a target from a jump table */ 352 | if(bb->insns.back().edge_type() == Edge::EDGE_TYPE_JMP_INDIRECT) { 353 | target_sec = bb->section; 354 | for(auto rit = bb->insns.rbegin(); rit != bb->insns.rend(); ++rit) { 355 | const auto& ins = *rit; 356 | if(ins.operands.size() < 2) { 357 | continue; 358 | } 359 | /* Pattern #1 360 | * Load the address relative to `pc`. Following variants are supported: 361 | * - Using add (clang): 362 | * add rN, pc, .L 363 | * - Using adr (clang, shorthand): 364 | * adr rN, .L 365 | * - Using ldrls (gcc) 366 | * ldrls pc, [pc, rN, lsl#2] 367 | */ 368 | if(ins.id == ARM_INS_ADD && 369 | ins.operands[1].type == Operand::OP_TYPE_REG && 370 | ins.operands[1].arm_value.reg == ARM_REG_PC && 371 | ins.operands[2].type == Operand::OP_TYPE_IMM) { 372 | int64_t imm = ins.operands[2].arm_value.imm; 373 | jmptab_addr = (ins.start + 8) + imm; 374 | break; 375 | } 376 | else if(ins.id == ARM_INS_ADR && ins.operands[0].arm_value.reg == ARM_REG_PC) { 377 | int64_t imm = ins.operands[1].arm_value.imm; 378 | jmptab_addr = (ins.start + 8) + imm; 379 | break; 380 | } 381 | else if(ins.id == ARM_INS_LDR 382 | && ins.operands[0].arm_value.reg == ARM_REG_PC 383 | && ins.operands[1].arm_value.reg == ARM_REG_PC) { 384 | jmptab_addr = (ins.start + 8); 385 | break; 386 | } 387 | } 388 | } 389 | 390 | if(jmptab_addr) { 391 | jmptab_end = 0; 392 | for(auto &sec: this->binary->sections) { 393 | if(sec.contains(jmptab_addr)) { 394 | verbose(4, "parsing jump table at 0x%016jx (jump at 0x%016jx)", 395 | jmptab_addr, bb->insns.back().start); 396 | jmptab_idx = jmptab_addr-sec.vma; 397 | jmptab_end = jmptab_addr; 398 | jmptab = (uint32_t*)&sec.bytes[jmptab_idx]; 399 | while(1) { 400 | if((jmptab_idx+scale) > sec.size) break; 401 | jmptab_end += scale; 402 | jmptab_idx += scale; 403 | case_addr = uint32_t(read_le_i32(jmptab++)); 404 | if(!case_addr) break; 405 | if(!target_sec->contains(case_addr)) { 406 | break; 407 | } else { 408 | cc = this->get_bb(case_addr, &offset); 409 | if(!cc) break; 410 | conflict_edge = NULL; 411 | for(auto &e: cc->ancestors) { 412 | if(e.is_switch) { 413 | conflict_edge = &e; 414 | break; 415 | } 416 | } 417 | if(conflict_edge && (conflict_edge->jmptab <= jmptab_addr)) { 418 | verbose(3, "removing switch edge 0x%016jx -> 0x%016jx (detected overlapping jump table or case)", 419 | conflict_edge->src->insns.back().start, case_addr); 420 | unlink_edge(conflict_edge->src, cc); 421 | conflict_edge = NULL; 422 | } 423 | if(!conflict_edge) { 424 | verbose(3, "adding switch edge 0x%016jx -> 0x%016jx", bb->insns.back().start, case_addr); 425 | link_bbs(Edge::EDGE_TYPE_JMP_INDIRECT, bb, case_addr, jmptab_addr); 426 | } 427 | } 428 | } 429 | break; 430 | } 431 | } 432 | 433 | if(jmptab_addr && jmptab_end) { 434 | mark_jmptab_as_data(jmptab_addr, jmptab_end); 435 | } 436 | } 437 | } 438 | } 439 | 440 | 441 | void 442 | CFG::find_switches_mips() 443 | { 444 | BB *bb, *cc; 445 | Edge *conflict_edge; 446 | Section *target_sec; 447 | int scale; 448 | unsigned offset; 449 | uint64_t jmptab_addr, jmptab_idx, jmptab_end, case_addr; 450 | uint32_t *jmptab32; 451 | uint64_t *jmptab64; 452 | 453 | /* Instructions can get reordered, so we emulate the ISA subset relevant for the patterns below, 454 | * clearing the intermediate register values with with -1 if the result is irrelevant or undefined. */ 455 | int64_t registers[32]; 456 | for (size_t i = 0; i < 32; i++) { 457 | registers[i] = -1LL; 458 | } 459 | 460 | /* Assume the jump-table entries are the same width as the GPRs */ 461 | scale = this->binary->bits / 8; 462 | 463 | for(auto &kv: this->start2bb) { 464 | bb = kv.second; 465 | jmptab_addr = 0; 466 | target_sec = NULL; 467 | /* If this BB ends in an indirect jmp, scan the BB for what looks like 468 | * instructions loading a target from a jump table */ 469 | if(bb->insns.back().edge_type() == Edge::EDGE_TYPE_JMP_INDIRECT) { 470 | target_sec = bb->section; 471 | for(auto &ins: bb->insns) { 472 | if(ins.operands.size() < 2) { 473 | continue; 474 | } 475 | /* Pattern #1 476 | * Load the address from its word halves. Following variants are supported: 477 | * - MIPS / 32-bit / non-PIC (clang): 478 | * lui $A, %hi(.L) 479 | * addiu $A, $A, %lo(.L) 480 | * - MIPS / 32-bit / non-PIC (gcc): 481 | * lui $A, %hi(.L) 482 | * addu $T, $A 483 | * lw $T, %lo(.L)($T) 484 | * - MIPS / 64-bit / non-PIC: 485 | * lui $A, %highest(.L) 486 | * daddiu $A, $A, %higher(.L) 487 | * dsll32 $A, $A, 0 488 | * lui $B, %hi(.L) 489 | * daddiu $B, $B, %lo(.L) 490 | * daddu $A, $A, $B 491 | */ 492 | if(ins.id == MIPS_INS_LUI) { 493 | int64_t dst = ins.operands[0].mips_value.reg - MIPS_REG_0; 494 | int64_t imm = ins.operands[1].mips_value.imm; 495 | assert(dst < 32); 496 | registers[dst] = imm << 16; 497 | } 498 | else if(ins.id == MIPS_INS_ADDIU || ins.id == MIPS_INS_DADDIU) { 499 | int64_t dst = ins.operands[0].mips_value.reg - MIPS_REG_0; 500 | int64_t lhs = ins.operands[1].mips_value.reg - MIPS_REG_0; 501 | int64_t rhs = ins.operands[2].mips_value.imm; 502 | assert(dst < 32 && lhs < 32); 503 | registers[dst] = registers[lhs] + rhs; 504 | if (registers[dst] != -1) { 505 | jmptab_addr = (uint64_t)(registers[dst]); 506 | } 507 | } 508 | else if(ins.id == MIPS_INS_ADDU) { 509 | int64_t dst = ins.operands[0].mips_value.reg - MIPS_REG_0; 510 | int64_t lhs = ins.operands[1].mips_value.reg - MIPS_REG_0; 511 | int64_t rhs = ins.operands[2].mips_value.reg - MIPS_REG_0; 512 | assert(dst < 32 && lhs < 32 && rhs < 32); 513 | /* addu emulation is intentionally wrong. the goal is replacing: 514 | * - `dst = jumptable + offset` => `dst = jumptable` 515 | * - `dst = offset + jumptable` => `dst = jumptable` */ 516 | if (registers[lhs] != -1) { 517 | registers[dst] = registers[lhs]; 518 | } else { 519 | registers[dst] = registers[rhs]; 520 | } 521 | } 522 | else if(ins.id == MIPS_INS_LW) { 523 | int64_t reg = ins.operands[1].mips_value.mem.base - MIPS_REG_0; 524 | int64_t imm = ins.operands[1].mips_value.mem.disp; 525 | assert(reg < 32); 526 | if (registers[reg] != -1) { 527 | jmptab_addr = (uint64_t)(registers[reg] + imm); 528 | } 529 | } 530 | else if(ins.id == MIPS_INS_DADDU) { 531 | int64_t dst = ins.operands[0].mips_value.reg - MIPS_REG_0; 532 | int64_t lhs = ins.operands[1].mips_value.reg - MIPS_REG_0; 533 | int64_t rhs = ins.operands[2].mips_value.reg - MIPS_REG_0; 534 | assert(dst < 32 && lhs < 32 && rhs < 32); 535 | registers[dst] = registers[lhs] + registers[rhs]; 536 | if (registers[dst] != -1) { 537 | jmptab_addr = (uint64_t)(registers[dst]); 538 | } 539 | } 540 | else if(ins.id == MIPS_INS_DSLL32 && ins.operands[2].mips_value.reg == 0) { 541 | int64_t dst = ins.operands[0].mips_value.reg - MIPS_REG_0; 542 | int64_t src = ins.operands[1].mips_value.reg - MIPS_REG_0; 543 | assert(dst < 32 && src < 32); 544 | registers[dst] = src << 32; 545 | } 546 | else if(ins.operands[0].type == Operand::OP_TYPE_REG 547 | && ins.operands[0].mips_value.reg >= MIPS_REG_0 548 | && ins.operands[0].mips_value.reg <= MIPS_REG_31) { 549 | int64_t dst = ins.operands[0].mips_value.reg - MIPS_REG_0; 550 | registers[dst] = -1; 551 | } 552 | } 553 | } 554 | 555 | if(jmptab_addr) { 556 | jmptab_end = 0; 557 | for(auto &sec: this->binary->sections) { 558 | if(sec.contains(jmptab_addr)) { 559 | verbose(4, "parsing jump table at 0x%016jx (jump at 0x%016jx)", 560 | jmptab_addr, bb->insns.back().start); 561 | jmptab_idx = jmptab_addr-sec.vma; 562 | jmptab_end = jmptab_addr; 563 | jmptab32 = (uint32_t*)&sec.bytes[jmptab_idx]; 564 | jmptab64 = (uint64_t*)&sec.bytes[jmptab_idx]; 565 | while(1) { 566 | if((jmptab_idx+scale) > sec.size) break; 567 | jmptab_end += scale; 568 | jmptab_idx += scale; 569 | switch(scale) { 570 | case 4: 571 | case_addr = uint32_t(read_be_i32(jmptab32++)); 572 | break; 573 | case 8: 574 | case_addr = uint64_t(read_be_i64(jmptab64++)); 575 | break; 576 | default: 577 | print_warn("Unexpected scale factor in memory operand: %d", scale); 578 | case_addr = 0; 579 | break; 580 | } 581 | if(!case_addr) break; 582 | if(!target_sec->contains(case_addr)) { 583 | break; 584 | } else { 585 | cc = this->get_bb(case_addr, &offset); 586 | if(!cc) break; 587 | conflict_edge = NULL; 588 | for(auto &e: cc->ancestors) { 589 | if(e.is_switch) { 590 | conflict_edge = &e; 591 | break; 592 | } 593 | } 594 | if(conflict_edge && (conflict_edge->jmptab <= jmptab_addr)) { 595 | verbose(3, "removing switch edge 0x%016jx -> 0x%016jx (detected overlapping jump table or case)", 596 | conflict_edge->src->insns.back().start, case_addr); 597 | unlink_edge(conflict_edge->src, cc); 598 | conflict_edge = NULL; 599 | } 600 | if(!conflict_edge) { 601 | verbose(3, "adding switch edge 0x%016jx -> 0x%016jx", bb->insns.back().start, case_addr); 602 | link_bbs(Edge::EDGE_TYPE_JMP_INDIRECT, bb, case_addr, jmptab_addr); 603 | } 604 | } 605 | } 606 | break; 607 | } 608 | } 609 | 610 | if(jmptab_addr && jmptab_end) { 611 | mark_jmptab_as_data(jmptab_addr, jmptab_end); 612 | } 613 | } 614 | } 615 | } 616 | 617 | 618 | void 619 | CFG::find_switches_ppc() 620 | { 621 | BB *bb, *cc; 622 | Edge *conflict_edge; 623 | Section *target_sec; 624 | int scale; 625 | unsigned offset; 626 | uint64_t jmptab_addr, jmptab_idx, jmptab_end, case_addr; 627 | uint32_t *jmptab32; 628 | uint64_t *jmptab64; 629 | 630 | /* Instructions can get reordered, so we emulate the ISA subset relevant for the patterns below, 631 | * clearing the intermediate register values with with -1 if the result is irrelevant or undefined. */ 632 | int64_t registers[32]; 633 | 634 | /* Assume the jump-table entries are the same width as the GPRs */ 635 | scale = this->binary->bits / 8; 636 | 637 | for(auto &kv: this->start2bb) { 638 | bb = kv.second; 639 | jmptab_addr = 0; 640 | target_sec = NULL; 641 | /* If this BB ends in an indirect jmp, scan the BB for what looks like 642 | * instructions loading a target from a jump table */ 643 | if(bb->insns.back().edge_type() == Edge::EDGE_TYPE_JMP_INDIRECT) { 644 | target_sec = bb->section; 645 | for(auto &ins: bb->insns) { 646 | if(ins.operands.size() < 2) { 647 | continue; 648 | } 649 | /* Pattern #1 (32-bit) 650 | * Load the address from its word halves. Following variants are supported: 651 | * - Using addis/addi (gcc): 652 | * lis rN, .L@ha 653 | * addi rN, rN, L@l 654 | * - Using addis/ori: 655 | * lis rN, .L@ha 656 | * ori rN, rN, .L@l */ 657 | if(ins.id == PPC_INS_LIS) { 658 | int64_t dst = ins.operands[0].ppc_value.reg - PPC_REG_R0; 659 | int64_t imm = ins.operands[1].ppc_value.imm; 660 | assert(dst < 32); 661 | registers[dst] = imm << 16; 662 | } 663 | else if(ins.id == PPC_INS_ADDI || ins.id == PPC_INS_ORI) { 664 | int64_t lhs = ins.operands[1].ppc_value.reg - PPC_REG_R0; 665 | int64_t rhs = ins.operands[2].ppc_value.imm; 666 | assert(lhs < 32); 667 | if (registers[lhs] != -1) { 668 | jmptab_addr = (uint64_t)(registers[lhs] | rhs); 669 | break; 670 | } 671 | } 672 | else if(ins.operands[0].type == Operand::OP_TYPE_REG 673 | && ins.operands[0].ppc_value.reg >= PPC_REG_R0 674 | && ins.operands[0].ppc_value.reg <= PPC_REG_R31) { 675 | int64_t dst = ins.operands[0].ppc_value.reg - PPC_REG_R0; 676 | registers[dst] = -1; 677 | } 678 | } 679 | } 680 | 681 | if(jmptab_addr) { 682 | jmptab_end = 0; 683 | for(auto &sec: this->binary->sections) { 684 | if(sec.contains(jmptab_addr)) { 685 | verbose(4, "parsing jump table at 0x%016jx (jump at 0x%016jx)", 686 | jmptab_addr, bb->insns.back().start); 687 | jmptab_idx = jmptab_addr-sec.vma; 688 | jmptab_end = jmptab_addr; 689 | jmptab32 = (uint32_t*)&sec.bytes[jmptab_idx]; 690 | jmptab64 = (uint64_t*)&sec.bytes[jmptab_idx]; 691 | while(1) { 692 | if((jmptab_idx+scale) > sec.size) break; 693 | jmptab_end += scale; 694 | jmptab_idx += scale; 695 | switch(scale) { 696 | case 4: 697 | case_addr = uint32_t(read_be_i32(jmptab32++) + jmptab_addr); 698 | break; 699 | case 8: 700 | case_addr = uint64_t(read_be_i64(jmptab64++) + jmptab_addr); 701 | break; 702 | default: 703 | print_warn("Unexpected scale factor in memory operand: %d", scale); 704 | case_addr = 0; 705 | break; 706 | } 707 | if(!case_addr) break; 708 | if(!target_sec->contains(case_addr)) { 709 | break; 710 | } else { 711 | cc = this->get_bb(case_addr, &offset); 712 | if(!cc) break; 713 | conflict_edge = NULL; 714 | for(auto &e: cc->ancestors) { 715 | if(e.is_switch) { 716 | conflict_edge = &e; 717 | break; 718 | } 719 | } 720 | if(conflict_edge && (conflict_edge->jmptab <= jmptab_addr)) { 721 | verbose(3, "removing switch edge 0x%016jx -> 0x%016jx (detected overlapping jump table or case)", 722 | conflict_edge->src->insns.back().start, case_addr); 723 | unlink_edge(conflict_edge->src, cc); 724 | conflict_edge = NULL; 725 | } 726 | if(!conflict_edge) { 727 | verbose(3, "adding switch edge 0x%016jx -> 0x%016jx", bb->insns.back().start, case_addr); 728 | link_bbs(Edge::EDGE_TYPE_JMP_INDIRECT, bb, case_addr, jmptab_addr); 729 | } 730 | } 731 | } 732 | break; 733 | } 734 | } 735 | 736 | if(jmptab_addr && jmptab_end) { 737 | mark_jmptab_as_data(jmptab_addr, jmptab_end); 738 | } 739 | } 740 | } 741 | } 742 | 743 | 744 | void 745 | CFG::find_switches_x86() 746 | { 747 | BB *bb, *cc; 748 | Edge *conflict_edge; 749 | Section *target_sec; 750 | Operand *op_target, *op_reg, *op_mem; 751 | int scale; 752 | unsigned offset; 753 | uint64_t jmptab_addr, jmptab_idx, jmptab_end, case_addr; 754 | uint8_t *jmptab8; 755 | uint16_t *jmptab16; 756 | uint32_t *jmptab32; 757 | uint64_t *jmptab64; 758 | std::list::iterator ins; 759 | 760 | for(auto &kv: this->start2bb) { 761 | bb = kv.second; 762 | jmptab_addr = 0; 763 | target_sec = NULL; 764 | /* If this BB ends in an indirect jmp, scan the BB for what looks like 765 | * an instruction loading a target from a jump table */ 766 | if(bb->insns.back().edge_type() == Edge::EDGE_TYPE_JMP_INDIRECT) { 767 | if(bb->insns.back().operands.size() < 1) { 768 | print_warn("Indirect jump has no target operand"); 769 | continue; 770 | } 771 | target_sec = bb->section; 772 | op_target = &bb->insns.back().operands[0]; 773 | if(op_target->type == Operand::OP_TYPE_MEM) { 774 | jmptab_addr = (uint64_t)op_target->x86_value.mem.disp; 775 | scale = op_target->x86_value.mem.scale; 776 | } else if(op_target->type != Operand::OP_TYPE_REG) { 777 | ins = bb->insns.end(); 778 | ins--; /* Skip the jmp itself */ 779 | while(ins != bb->insns.begin()) { 780 | ins--; 781 | if(ins->operands.empty()) { 782 | continue; 783 | } 784 | op_reg = &ins->operands[0]; 785 | if(op_reg->type != Operand::OP_TYPE_REG) { 786 | continue; 787 | } else if(op_reg->x86_value.reg != op_target->x86_value.reg) { 788 | continue; 789 | } else { 790 | /* This is the last instruction that loads the jump target register, 791 | * see if we can find a jump table address from it */ 792 | if(ins->operands.size() >= 2) { 793 | op_mem = &ins->operands[1]; 794 | if(op_mem->type == Operand::OP_TYPE_MEM) { 795 | jmptab_addr = (uint64_t)op_mem->x86_value.mem.disp; 796 | scale = op_mem->x86_value.mem.scale; 797 | } 798 | } else { 799 | /* No luck :-( */ 800 | } 801 | break; 802 | } 803 | } 804 | } 805 | } 806 | 807 | if(jmptab_addr) { 808 | jmptab_end = 0; 809 | for(auto &sec: this->binary->sections) { 810 | if(sec.contains(jmptab_addr)) { 811 | verbose(4, "parsing jump table at 0x%016jx (jump at 0x%016jx)", 812 | jmptab_addr, bb->insns.back().start); 813 | jmptab_idx = jmptab_addr-sec.vma; 814 | jmptab_end = jmptab_addr; 815 | jmptab8 = (uint8_t*) &sec.bytes[jmptab_idx]; 816 | jmptab16 = (uint16_t*)&sec.bytes[jmptab_idx]; 817 | jmptab32 = (uint32_t*)&sec.bytes[jmptab_idx]; 818 | jmptab64 = (uint64_t*)&sec.bytes[jmptab_idx]; 819 | while(1) { 820 | if((jmptab_idx+scale) >= sec.size) break; 821 | jmptab_end += scale; 822 | jmptab_idx += scale; 823 | switch(scale) { 824 | case 1: 825 | case_addr = (*jmptab8++); 826 | break; 827 | case 2: 828 | case_addr = (*jmptab16++); 829 | break; 830 | case 4: 831 | case_addr = (*jmptab32++); 832 | break; 833 | case 8: 834 | case_addr = (*jmptab64++); 835 | break; 836 | default: 837 | print_warn("Unexpected scale factor in memory operand: %d", scale); 838 | case_addr = 0; 839 | break; 840 | } 841 | if(!case_addr) break; 842 | if(!target_sec->contains(case_addr)) { 843 | break; 844 | } else { 845 | cc = this->get_bb(case_addr, &offset); 846 | if(!cc) break; 847 | conflict_edge = NULL; 848 | for(auto &e: cc->ancestors) { 849 | if(e.is_switch) { 850 | conflict_edge = &e; 851 | break; 852 | } 853 | } 854 | if(conflict_edge && (conflict_edge->jmptab <= jmptab_addr)) { 855 | verbose(3, "removing switch edge 0x%016jx -> 0x%016jx (detected overlapping jump table or case)", 856 | conflict_edge->src->insns.back().start, case_addr); 857 | unlink_edge(conflict_edge->src, cc); 858 | conflict_edge = NULL; 859 | } 860 | if(!conflict_edge) { 861 | verbose(3, "adding switch edge 0x%016jx -> 0x%016jx", bb->insns.back().start, case_addr); 862 | link_bbs(Edge::EDGE_TYPE_JMP_INDIRECT, bb, case_addr, jmptab_addr); 863 | } 864 | } 865 | } 866 | break; 867 | } 868 | } 869 | 870 | if(jmptab_addr && jmptab_end) { 871 | mark_jmptab_as_data(jmptab_addr, jmptab_end); 872 | } 873 | } 874 | } 875 | } 876 | 877 | 878 | void 879 | CFG::find_switches() 880 | { 881 | verbose(1, "starting switch analysis"); 882 | 883 | switch(this->binary->arch) { 884 | case Binary::ARCH_AARCH64: 885 | find_switches_aarch64(); 886 | break; 887 | case Binary::ARCH_ARM: 888 | find_switches_arm(); 889 | break; 890 | case Binary::ARCH_MIPS: 891 | find_switches_mips(); 892 | break; 893 | case Binary::ARCH_PPC: 894 | find_switches_ppc(); 895 | break; 896 | case Binary::ARCH_X86: 897 | find_switches_x86(); 898 | break; 899 | default: 900 | print_warn("switch analysis not yet supported for %s", this->binary->arch_str.c_str()); 901 | break; 902 | } 903 | 904 | verbose(1, "switch analysis complete"); 905 | } 906 | 907 | 908 | void 909 | CFG::expand_function(Function *f, BB *bb) 910 | { 911 | if(!bb) { 912 | bb = f->BBs.front(); 913 | } else { 914 | if(bb->section->is_import_table() || bb->is_invalid()) { 915 | return; 916 | } else if(bb->function) { 917 | return; 918 | } 919 | f->add_bb(bb); 920 | } 921 | 922 | /* XXX: follow links to ancestor blocks, but NOT if this BB is called; 923 | * in that case it is an entry point, and we don't want to backtrack along 924 | * inbound edges because that causes issues with tailcalls */ 925 | if(!bb->is_called()) { 926 | for(auto &e: bb->ancestors) { 927 | if((e.type == Edge::EDGE_TYPE_CALL) 928 | || (e.type == Edge::EDGE_TYPE_CALL_INDIRECT) 929 | || (e.type == Edge::EDGE_TYPE_RET)) { 930 | continue; 931 | } 932 | expand_function(f, e.src); 933 | } 934 | } 935 | 936 | /* Follow links to target blocks */ 937 | for(auto &e: bb->targets) { 938 | if((e.type == Edge::EDGE_TYPE_CALL) 939 | || (e.type == Edge::EDGE_TYPE_CALL_INDIRECT) 940 | || (e.type == Edge::EDGE_TYPE_RET)) { 941 | continue; 942 | } 943 | expand_function(f, e.dst); 944 | } 945 | } 946 | 947 | 948 | void 949 | CFG::find_functions() 950 | { 951 | BB *bb; 952 | 953 | verbose(1, "starting function analysis"); 954 | 955 | /* Create function headers for all BBs that are called directly */ 956 | for(auto &kv: this->start2bb) { 957 | bb = kv.second; 958 | if(bb->section->is_import_table() || bb->is_padding()) { 959 | continue; 960 | } 961 | if(bb->is_called()) { 962 | this->functions.push_back(Function()); 963 | this->functions.back().cfg = this; 964 | this->functions.back().add_bb(bb); 965 | } 966 | } 967 | 968 | /* Expand functions for the directly-called header BBs */ 969 | for(auto &f: this->functions) { 970 | expand_function(&f, NULL); 971 | f.find_entry(); 972 | } 973 | 974 | /* Detect functions for remaining BBs through connected-component analysis */ 975 | for(auto &kv: this->start2bb) { 976 | bb = kv.second; 977 | if(bb->section->is_import_table() || bb->is_padding() || bb->is_invalid()) { 978 | continue; 979 | } else if(bb->function) { 980 | continue; 981 | } 982 | this->functions.push_back(Function()); 983 | this->functions.back().cfg = this; 984 | expand_function(&this->functions.back(), bb); 985 | this->functions.back().find_entry(); 986 | } 987 | 988 | verbose(1, "function analysis complete"); 989 | } 990 | 991 | 992 | void 993 | CFG::find_entry() 994 | { 995 | uint64_t entry; 996 | 997 | if(this->entry.size() > 0) { 998 | /* entry point already known */ 999 | verbose(3, "cfg entry point@0x%016jx", this->entry.front()->start); 1000 | return; 1001 | } 1002 | 1003 | verbose(1, "scanning for cfg entry point"); 1004 | 1005 | entry = 0; 1006 | verbose(1, "cfg entry point@0x%016jx", entry); 1007 | } 1008 | 1009 | 1010 | void 1011 | CFG::verify_padding() 1012 | { 1013 | BB *bb; 1014 | bool call_fallthrough; 1015 | unsigned noplen; 1016 | 1017 | /* Fix incorrectly identified padding blocks (they turned out to be reachable) */ 1018 | for(auto &kv: this->start2bb) { 1019 | bb = kv.second; 1020 | if(bb->trap) continue; 1021 | if(bb->padding && !bb->ancestors.empty()) { 1022 | call_fallthrough = false; 1023 | noplen = (bb->end - bb->start); 1024 | for(auto &e: bb->ancestors) { 1025 | if((e.type == Edge::EDGE_TYPE_FALLTHROUGH) 1026 | && (e.src->insns.back().flags & Instruction::INS_FLAG_CALL)) { 1027 | /* This padding block may not be truly reachable; the preceding 1028 | * call may be non-returning */ 1029 | call_fallthrough = true; 1030 | break; 1031 | } 1032 | } 1033 | if(call_fallthrough && (noplen > 1)) continue; 1034 | bb->padding = false; 1035 | link_bbs(Edge::EDGE_TYPE_FALLTHROUGH, bb, bb->end); 1036 | } 1037 | } 1038 | } 1039 | 1040 | 1041 | void 1042 | CFG::detect_bad_bbs() 1043 | { 1044 | BB *bb, *cc; 1045 | bool invalid; 1046 | unsigned flags, offset; 1047 | std::list blacklist; 1048 | 1049 | /* This improves accuracy for code with inline data (otherwise it does nothing) */ 1050 | 1051 | for(auto &kv: this->bad_bbs) blacklist.push_back(kv.second); 1052 | for(auto &kv: this->start2bb) { 1053 | if(kv.second->trap) blacklist.push_back(kv.second); 1054 | } 1055 | 1056 | /* Mark BBs that may fall through to a blacklisted block as invalid */ 1057 | for(auto bb: blacklist) { 1058 | invalid = true; 1059 | cc = bb; 1060 | while(invalid) { 1061 | cc = get_bb(cc->start-1, &offset); 1062 | if(!cc) break; 1063 | flags = cc->insns.back().flags; 1064 | if((flags & Instruction::INS_FLAG_CFLOW) && (Instruction::INS_FLAG_INDIRECT)) { 1065 | invalid = false; 1066 | } else if((flags & Instruction::INS_FLAG_CALL) || (flags & Instruction::INS_FLAG_JMP)) { 1067 | invalid = (get_bb(cc->insns.back().target, &offset) == NULL); 1068 | } else if(flags & Instruction::INS_FLAG_RET) { 1069 | invalid = false; 1070 | } 1071 | if(invalid) { 1072 | cc->invalid = true; 1073 | unlink_bb(cc); 1074 | bad_bbs[cc->start] = cc; 1075 | } 1076 | } 1077 | } 1078 | 1079 | /* Remove bad BBs from the CFG map */ 1080 | for(auto &kv: this->bad_bbs) { 1081 | bb = kv.second; 1082 | if(this->start2bb.count(bb->start)) { 1083 | this->start2bb.erase(bb->start); 1084 | } 1085 | } 1086 | } 1087 | 1088 | 1089 | BB* 1090 | CFG::get_bb(uint64_t addr, unsigned *offset) 1091 | { 1092 | BB *bb; 1093 | std::map::iterator it; 1094 | 1095 | if(this->start2bb.count(addr)) { 1096 | if(offset) { 1097 | (*offset) = 0; 1098 | } 1099 | return this->start2bb[addr]; 1100 | } else if(!offset) { 1101 | return NULL; 1102 | } else if(start2bb.empty()) { 1103 | return NULL; 1104 | } 1105 | 1106 | it = this->start2bb.upper_bound(addr); 1107 | if(it == start2bb.begin()) { 1108 | return NULL; 1109 | } 1110 | bb = (*(--it)).second; 1111 | if((addr >= bb->start) && (addr < bb->end)) { 1112 | (*offset) = addr - bb->start; 1113 | return bb; 1114 | } 1115 | 1116 | return NULL; 1117 | } 1118 | 1119 | 1120 | void 1121 | CFG::link_bbs(Edge::EdgeType type, BB *bb, uint64_t target, uint64_t jmptab) 1122 | { 1123 | BB *cc; 1124 | bool is_switch; 1125 | unsigned offset; 1126 | 1127 | assert(type != Edge::EDGE_TYPE_NONE); 1128 | 1129 | is_switch = (jmptab > 0); 1130 | cc = this->get_bb(target, &offset); 1131 | if(cc) { 1132 | bb->targets.push_back(Edge(type, bb, cc, is_switch, jmptab, offset)); 1133 | cc->ancestors.push_back(Edge(type, bb, cc, is_switch, jmptab, offset)); 1134 | } 1135 | } 1136 | 1137 | 1138 | void 1139 | CFG::unlink_bb(BB *bb) 1140 | { 1141 | BB *cc; 1142 | std::list::iterator f; 1143 | 1144 | for(auto &e: bb->ancestors) { 1145 | cc = e.src; 1146 | for(f = cc->targets.begin(); f != cc->targets.end(); ) { 1147 | if(f->dst == bb) f = cc->targets.erase(f); 1148 | else f++; 1149 | } 1150 | } 1151 | 1152 | for(auto &e: bb->targets) { 1153 | cc = e.dst; 1154 | for(f = cc->ancestors.begin(); f != cc->ancestors.end(); ) { 1155 | if(f->src == bb) f = cc->ancestors.erase(f); 1156 | else f++; 1157 | } 1158 | } 1159 | 1160 | bb->ancestors.clear(); 1161 | bb->targets.clear(); 1162 | } 1163 | 1164 | 1165 | void 1166 | CFG::unlink_edge(BB *bb, BB *cc) 1167 | { 1168 | std::list::iterator f; 1169 | 1170 | for(f = bb->targets.begin(); f != bb->targets.end(); ) { 1171 | if(f->dst == cc) f = bb->targets.erase(f); 1172 | else f++; 1173 | } 1174 | 1175 | for(f = cc->ancestors.begin(); f != cc->ancestors.end(); ) { 1176 | if(f->src == bb) f = cc->ancestors.erase(f); 1177 | else f++; 1178 | } 1179 | } 1180 | 1181 | 1182 | int 1183 | CFG::make_cfg(Binary *bin, std::list *disasm) 1184 | { 1185 | uint64_t addr; 1186 | unsigned flags; 1187 | 1188 | verbose(1, "generating cfg"); 1189 | 1190 | this->binary = bin; 1191 | 1192 | for(auto &dis: (*disasm)) { 1193 | for(auto &bb: dis.BBs) { 1194 | if(bb.invalid) { 1195 | this->bad_bbs[bb.start] = &bb; 1196 | continue; 1197 | } 1198 | if(bb.start == bin->entry) { 1199 | this->entry.push_back(&bb); 1200 | } 1201 | if(this->start2bb.count(bb.start) > 0) { 1202 | print_warn("conflicting BBs at 0x%016jx", bb.start); 1203 | } 1204 | this->start2bb[bb.start] = &bb; 1205 | } 1206 | } 1207 | 1208 | /* Link basic blocks by direct and fallthrough edges */ 1209 | for(auto &dis: (*disasm)) { 1210 | for(auto &bb: dis.BBs) { 1211 | flags = bb.insns.back().flags; 1212 | if((flags & Instruction::INS_FLAG_CALL) || (flags & Instruction::INS_FLAG_JMP)) { 1213 | if(!(flags & Instruction::INS_FLAG_INDIRECT)) { 1214 | addr = bb.insns.back().target; 1215 | link_bbs(bb.insns.back().edge_type(), &bb, addr); 1216 | } 1217 | if((flags & Instruction::INS_FLAG_CALL) || (flags & Instruction::INS_FLAG_COND)) { 1218 | link_bbs(Edge::EDGE_TYPE_FALLTHROUGH, &bb, bb.end); 1219 | } 1220 | } else if(!(flags & Instruction::INS_FLAG_CFLOW) && !bb.padding) { 1221 | /* A block that doesn't have a control flow instruction at the end; 1222 | * this can happen if the next block is a nop block */ 1223 | link_bbs(Edge::EDGE_TYPE_FALLTHROUGH, &bb, bb.end); 1224 | } 1225 | } 1226 | } 1227 | 1228 | analyze_addrtaken(); 1229 | find_switches(); 1230 | verify_padding(); 1231 | detect_bad_bbs(); 1232 | 1233 | find_functions(); 1234 | find_entry(); 1235 | 1236 | verbose(1, "cfg generation complete"); 1237 | 1238 | return 0; 1239 | } 1240 | 1241 | -------------------------------------------------------------------------------- /cfg.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_CFG_H 2 | #define NUCLEUS_CFG_H 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #include "bb.h" 11 | #include "edge.h" 12 | #include "function.h" 13 | #include "disasm.h" 14 | #include "loader.h" 15 | 16 | class CFG { 17 | public: 18 | CFG() {} 19 | 20 | int make_cfg (Binary *bin, std::list *disasm); 21 | 22 | BB *get_bb (uint64_t addr, unsigned *offset = NULL); 23 | 24 | void print_functions (FILE *out); 25 | void print_function_summaries (FILE *out); 26 | 27 | Binary *binary; 28 | std::list entry; 29 | std::list functions; 30 | std::map start2bb; 31 | std::map bad_bbs; 32 | 33 | private: 34 | /* pass: address-taken detection */ 35 | void mark_addrtaken (uint64_t addr); 36 | void analyze_addrtaken_ppc (); 37 | void analyze_addrtaken_x86 (); 38 | void analyze_addrtaken (); 39 | 40 | /* pass: switch detection */ 41 | void mark_jmptab_as_data (uint64_t start, uint64_t end); 42 | void find_switches_aarch64 (); 43 | void find_switches_arm (); 44 | void find_switches_mips (); 45 | void find_switches_ppc (); 46 | void find_switches_x86 (); 47 | void find_switches (); 48 | 49 | void expand_function (Function *f, BB *bb); 50 | void find_functions (); 51 | void find_entry (); 52 | void verify_padding (); 53 | void detect_bad_bbs (); 54 | void link_bbs (Edge::EdgeType type, BB *bb, uint64_t target, uint64_t jmptab = 0); 55 | void unlink_bb (BB *bb); 56 | void unlink_edge (BB *bb, BB *cc); 57 | }; 58 | 59 | #endif /* NUCLEUS_CFG_H */ 60 | 61 | -------------------------------------------------------------------------------- /dataregion.cc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/uxmal/nucleus/e3ab49db579adbdd8451171e980e9b8f8a546a3c/dataregion.cc -------------------------------------------------------------------------------- /dataregion.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_DATAREGION_H 2 | #define NUCLEUS_DATAREGION_H 3 | 4 | #include 5 | 6 | class DataRegion { 7 | public: 8 | DataRegion() : start(0), end(0) {} 9 | DataRegion(const DataRegion &d) : start(d.start), end(d.end) {} 10 | 11 | uint64_t start; 12 | uint64_t end; 13 | }; 14 | 15 | #endif /* NUCLEUS_DATAREGION_H */ 16 | 17 | -------------------------------------------------------------------------------- /disasm-aarch64.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "disasm-aarch64.h" 4 | #include "log.h" 5 | 6 | 7 | static int 8 | is_cs_nop_ins(cs_insn *ins) 9 | { 10 | switch(ins->id) { 11 | case ARM64_INS_NOP: 12 | return 1; 13 | default: 14 | return 0; 15 | } 16 | } 17 | 18 | 19 | static int 20 | is_cs_trap_ins(cs_insn *ins) 21 | { 22 | switch(ins->id) { 23 | /* XXX: todo */ 24 | default: 25 | return 0; 26 | } 27 | } 28 | 29 | 30 | static int 31 | is_cs_cflow_ins(cs_insn *ins) 32 | { 33 | /* XXX: Capstone does not provide information for all generic groups 34 | * for aarch64 instructions, unlike x86, so we have to do it manually. 35 | * Once this is implemented, it will suffice to check for the following groups: 36 | * CS_GRP_JUMP, CS_GRP_CALL, CS_GRP_RET, CS_GRP_IRET */ 37 | 38 | switch(ins->id) { 39 | case ARM64_INS_B: 40 | case ARM64_INS_BR: 41 | case ARM64_INS_BL: 42 | case ARM64_INS_BLR: 43 | case ARM64_INS_CBNZ: 44 | case ARM64_INS_CBZ: 45 | case ARM64_INS_TBNZ: 46 | case ARM64_INS_TBZ: 47 | case ARM64_INS_RET: 48 | return 1; 49 | default: 50 | return 0; 51 | } 52 | } 53 | 54 | static int 55 | is_cs_call_ins(cs_insn *ins) 56 | { 57 | switch(ins->id) { 58 | case ARM64_INS_BL: 59 | case ARM64_INS_BLR: 60 | return 1; 61 | default: 62 | return 0; 63 | } 64 | } 65 | 66 | 67 | static int 68 | is_cs_ret_ins(cs_insn *ins) 69 | { 70 | /* ret */ 71 | if(ins->id == ARM64_INS_RET) { 72 | return 1; 73 | } 74 | 75 | return 0; 76 | } 77 | 78 | 79 | static int 80 | is_cs_unconditional_jmp_ins(cs_insn *ins) 81 | { 82 | switch(ins->id) { 83 | case ARM64_INS_B: 84 | if(ins->detail->arm64.cc != ARM64_CC_INVALID && 85 | ins->detail->arm64.cc != ARM64_CC_AL) { 86 | return 0; 87 | } 88 | return 1; 89 | case ARM64_INS_BR: 90 | return 1; 91 | default: 92 | return 0; 93 | } 94 | } 95 | 96 | 97 | static int 98 | is_cs_conditional_cflow_ins(cs_insn *ins) 99 | { 100 | switch(ins->id) { 101 | case ARM64_INS_B: 102 | if (ins->detail->arm64.cc != ARM64_CC_AL) { 103 | return 1; 104 | } 105 | return 0; 106 | case ARM64_INS_CBNZ: 107 | case ARM64_INS_CBZ: 108 | case ARM64_INS_TBNZ: 109 | case ARM64_INS_TBZ: 110 | return 1; 111 | default: 112 | return 0; 113 | } 114 | } 115 | 116 | 117 | static int 118 | is_cs_privileged_ins(cs_insn *ins) 119 | { 120 | switch(ins->id) { 121 | /* XXX: todo */ 122 | default: 123 | return 0; 124 | } 125 | } 126 | 127 | 128 | static int 129 | is_cs_indirect_ins(cs_insn *ins) 130 | { 131 | switch(ins->id) { 132 | case ARM64_INS_BR: 133 | case ARM64_INS_BLR: 134 | return 1; 135 | default: 136 | return 0; 137 | } 138 | } 139 | 140 | 141 | static uint8_t 142 | cs_to_nucleus_op_type(arm64_op_type op) 143 | { 144 | switch(op) { 145 | case ARM64_OP_REG: 146 | return Operand::OP_TYPE_REG; 147 | case ARM64_OP_IMM: 148 | return Operand::OP_TYPE_IMM; 149 | case ARM64_OP_MEM: 150 | return Operand::OP_TYPE_MEM; 151 | case ARM64_OP_FP: 152 | return Operand::OP_TYPE_FP; 153 | case ARM64_OP_INVALID: 154 | default: 155 | return Operand::OP_TYPE_NONE; 156 | } 157 | } 158 | 159 | 160 | int 161 | nucleus_disasm_bb_aarch64(Binary *bin, DisasmSection *dis, BB *bb) 162 | { 163 | int init, ret, jmp, indir, cflow, cond, call, nop, only_nop, priv, trap, ndisassembled; 164 | csh cs_dis; 165 | cs_mode cs_mode_flags; 166 | cs_insn *cs_ins; 167 | cs_arm64_op *cs_op; 168 | const uint8_t *pc; 169 | uint64_t pc_addr, offset; 170 | size_t i, j, n; 171 | Instruction *ins; 172 | Operand *op; 173 | 174 | init = 0; 175 | cs_ins = NULL; 176 | 177 | switch(bin->bits) { 178 | case 64: 179 | cs_mode_flags = (cs_mode)(CS_MODE_ARM); 180 | break; 181 | default: 182 | print_err("unsupported bit width %u for architecture %s", bin->bits, bin->arch_str.c_str()); 183 | goto fail; 184 | } 185 | 186 | if(cs_open(CS_ARCH_ARM64, cs_mode_flags, &cs_dis) != CS_ERR_OK) { 187 | print_err("failed to initialize libcapstone"); 188 | goto fail; 189 | } 190 | init = 1; 191 | cs_option(cs_dis, CS_OPT_DETAIL, CS_OPT_ON); 192 | 193 | cs_ins = cs_malloc(cs_dis); 194 | if(!cs_ins) { 195 | print_err("out of memory"); 196 | goto fail; 197 | } 198 | 199 | offset = bb->start - dis->section->vma; 200 | if((bb->start < dis->section->vma) || (offset >= dis->section->size)) { 201 | print_err("basic block address points outside of section '%s'", dis->section->name.c_str()); 202 | goto fail; 203 | } 204 | 205 | pc = dis->section->bytes + offset; 206 | n = dis->section->size - offset; 207 | pc_addr = bb->start; 208 | bb->end = bb->start; 209 | bb->section = dis->section; 210 | ndisassembled = 0; 211 | only_nop = 0; 212 | while(cs_disasm_iter(cs_dis, &pc, &n, &pc_addr, cs_ins)) { 213 | if(cs_ins->id == ARM64_INS_INVALID) { 214 | bb->invalid = 1; 215 | bb->end += 1; 216 | break; 217 | } 218 | if(!cs_ins->size) { 219 | break; 220 | } 221 | 222 | trap = is_cs_trap_ins(cs_ins); 223 | nop = is_cs_nop_ins(cs_ins); 224 | ret = is_cs_ret_ins(cs_ins); 225 | jmp = is_cs_unconditional_jmp_ins(cs_ins) || is_cs_conditional_cflow_ins(cs_ins); 226 | cond = is_cs_conditional_cflow_ins(cs_ins); 227 | cflow = is_cs_cflow_ins(cs_ins); 228 | call = is_cs_call_ins(cs_ins); 229 | priv = is_cs_privileged_ins(cs_ins); 230 | indir = is_cs_indirect_ins(cs_ins); 231 | 232 | if(!ndisassembled && nop) only_nop = 1; /* group nop instructions together */ 233 | if(!only_nop && nop) break; 234 | if(only_nop && !nop) break; 235 | 236 | ndisassembled++; 237 | 238 | bb->end += cs_ins->size; 239 | bb->insns.push_back(Instruction()); 240 | if(priv) { 241 | bb->privileged = true; 242 | } 243 | if(nop) { 244 | bb->padding = true; 245 | } 246 | if(trap) { 247 | bb->trap = true; 248 | } 249 | 250 | ins = &bb->insns.back(); 251 | ins->id = cs_ins->id; 252 | ins->start = cs_ins->address; 253 | ins->size = cs_ins->size; 254 | ins->mnem = std::string(cs_ins->mnemonic); 255 | ins->op_str = std::string(cs_ins->op_str); 256 | ins->privileged = priv; 257 | ins->trap = trap; 258 | if(nop) ins->flags |= Instruction::INS_FLAG_NOP; 259 | if(ret) ins->flags |= Instruction::INS_FLAG_RET; 260 | if(jmp) ins->flags |= Instruction::INS_FLAG_JMP; 261 | if(cond) ins->flags |= Instruction::INS_FLAG_COND; 262 | if(cflow) ins->flags |= Instruction::INS_FLAG_CFLOW; 263 | if(call) ins->flags |= Instruction::INS_FLAG_CALL; 264 | if(indir) ins->flags |= Instruction::INS_FLAG_INDIRECT; 265 | 266 | for(i = 0; i < cs_ins->detail->arm64.op_count; i++) { 267 | cs_op = &cs_ins->detail->arm64.operands[i]; 268 | ins->operands.push_back(Operand()); 269 | op = &ins->operands.back(); 270 | op->type = cs_to_nucleus_op_type(cs_op->type); 271 | if(op->type == Operand::OP_TYPE_IMM) { 272 | op->aarch64_value.imm = cs_op->imm; 273 | } else if(op->type == Operand::OP_TYPE_REG) { 274 | op->aarch64_value.reg = (arm64_reg)cs_op->reg; 275 | } else if(op->type == Operand::OP_TYPE_FP) { 276 | op->aarch64_value.fp = cs_op->fp; 277 | } else if(op->type == Operand::OP_TYPE_MEM) { 278 | op->aarch64_value.mem.base = cs_op->mem.base; 279 | op->aarch64_value.mem.index = cs_op->mem.index; 280 | op->aarch64_value.mem.disp = cs_op->mem.disp; 281 | if(cflow) ins->flags |= Instruction::INS_FLAG_INDIRECT; 282 | } 283 | } 284 | 285 | if(cflow) { 286 | for(j = 0; j < cs_ins->detail->arm64.op_count; j++) { 287 | cs_op = &cs_ins->detail->arm64.operands[j]; 288 | if(cs_op->type == ARM64_OP_IMM) { 289 | ins->target = cs_op->imm; 290 | } 291 | } 292 | } 293 | 294 | if(cflow) { 295 | /* end of basic block */ 296 | break; 297 | } 298 | } 299 | 300 | if(!ndisassembled) { 301 | bb->invalid = 1; 302 | bb->end += 1; /* ensure forward progress */ 303 | } 304 | 305 | ret = ndisassembled; 306 | goto cleanup; 307 | 308 | fail: 309 | ret = -1; 310 | 311 | cleanup: 312 | if(cs_ins) { 313 | cs_free(cs_ins, 1); 314 | } 315 | if(init) { 316 | cs_close(&cs_dis); 317 | } 318 | return ret; 319 | } 320 | -------------------------------------------------------------------------------- /disasm-aarch64.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_DISASM_AARCH64_H 2 | #define NUCLEUS_DISASM_AARCH64_H 3 | 4 | #include "disasm.h" 5 | 6 | int nucleus_disasm_bb_aarch64(Binary *bin, DisasmSection *dis, BB *bb); 7 | 8 | #endif /* NUCLEUS_DISASM_AARCH64_H */ 9 | -------------------------------------------------------------------------------- /disasm-arm.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "disasm-arm.h" 4 | #include "log.h" 5 | 6 | 7 | static int 8 | is_cs_nop_ins(cs_insn *ins) 9 | { 10 | switch(ins->id) { 11 | case ARM_INS_NOP: 12 | return 1; 13 | default: 14 | return 0; 15 | } 16 | } 17 | 18 | 19 | static int 20 | is_cs_trap_ins(cs_insn *ins) 21 | { 22 | switch(ins->id) { 23 | /* XXX: todo */ 24 | default: 25 | return 0; 26 | } 27 | } 28 | 29 | 30 | static int 31 | is_cs_call_ins(cs_insn *ins) 32 | { 33 | switch(ins->id) { 34 | case ARM_INS_BL: 35 | case ARM_INS_BLX: 36 | return 1; 37 | default: 38 | return 0; 39 | } 40 | } 41 | 42 | 43 | static int 44 | is_cs_ret_ins(cs_insn *ins) 45 | { 46 | size_t i; 47 | 48 | /* bx lr */ 49 | if(ins->id == ARM_INS_BX 50 | && ins->detail->arm.op_count == 1 51 | && ins->detail->arm.operands[0].type == ARM_OP_REG 52 | && ins->detail->arm.operands[0].reg == ARM_REG_LR) { 53 | return 1; 54 | } 55 | 56 | /* ldmfd sp!, {..., pc} */ 57 | if(ins->id == ARM_INS_POP) { 58 | for(i = 0; i < ins->detail->arm.op_count; i++) { 59 | if(ins->detail->arm.operands[i].type == ARM_OP_REG && 60 | ins->detail->arm.operands[i].reg == ARM_REG_PC) { 61 | return 1; 62 | } 63 | } 64 | } 65 | 66 | /* mov pc, lr */ 67 | if(ins->id == ARM_INS_MOV 68 | && ins->detail->arm.operands[0].type == ARM_OP_REG 69 | && ins->detail->arm.operands[0].reg == ARM_REG_PC 70 | && ins->detail->arm.operands[1].type == ARM_OP_REG 71 | && ins->detail->arm.operands[1].reg == ARM_REG_LR) { 72 | return 1; 73 | } 74 | 75 | return 0; 76 | } 77 | 78 | 79 | static int 80 | is_cs_unconditional_jmp_ins(cs_insn *ins) 81 | { 82 | /* b rN */ 83 | if(ins->id == ARM_INS_B 84 | && ins->detail->arm.cc == ARM_CC_AL) { 85 | return 1; 86 | } 87 | 88 | /* mov pc, rN */ 89 | if(ins->id == ARM_INS_MOV 90 | && ins->detail->arm.operands[0].type == ARM_OP_REG 91 | && ins->detail->arm.operands[0].reg == ARM_REG_PC 92 | && ins->detail->arm.operands[1].type == ARM_OP_REG 93 | && ins->detail->arm.operands[1].reg != ARM_REG_LR) { 94 | return 1; 95 | } 96 | 97 | /* ldrls pc, {...} */ 98 | if(ins->id == ARM_INS_LDR 99 | && ins->detail->arm.operands[0].type == ARM_OP_REG 100 | && ins->detail->arm.operands[0].reg == ARM_REG_PC) { 101 | return 1; 102 | } 103 | 104 | return 0; 105 | } 106 | 107 | 108 | static int 109 | is_cs_conditional_cflow_ins(cs_insn *ins) 110 | { 111 | switch(ins->id) { 112 | case ARM_INS_B: 113 | case ARM_INS_BL: 114 | case ARM_INS_BLX: 115 | if (ins->detail->arm.cc != ARM_CC_AL) { 116 | return 1; 117 | } 118 | return 0; 119 | default: 120 | return 0; 121 | } 122 | } 123 | 124 | 125 | static int 126 | is_cs_cflow_ins(cs_insn *ins) 127 | { 128 | size_t i; 129 | 130 | /* XXX: Capstone does not provide information for all generic groups 131 | * for arm instructions, unlike x86, so we have to do it manually. 132 | * Once this is implemented, it will suffice to check for the following groups: 133 | * CS_GRP_JUMP, CS_GRP_CALL, CS_GRP_RET, CS_GRP_IRET */ 134 | 135 | if(is_cs_unconditional_jmp_ins(ins) || 136 | is_cs_conditional_cflow_ins(ins) || 137 | is_cs_call_ins(ins) || 138 | is_cs_ret_ins(ins)) { 139 | return 1; 140 | } 141 | 142 | return 0; 143 | } 144 | 145 | 146 | static int 147 | is_cs_indirect_ins(cs_insn *ins) 148 | { 149 | /* mov pc, rN */ 150 | if(ins->id == ARM_INS_MOV 151 | && ins->detail->arm.operands[0].type == ARM_OP_REG 152 | && ins->detail->arm.operands[0].reg == ARM_REG_PC 153 | && ins->detail->arm.operands[1].type == ARM_OP_REG 154 | && ins->detail->arm.operands[1].reg != ARM_REG_LR) { 155 | return 1; 156 | } 157 | 158 | /* ldrls pc, {...} */ 159 | if(ins->id == ARM_INS_LDR 160 | && ins->detail->arm.operands[0].type == ARM_OP_REG 161 | && ins->detail->arm.operands[0].reg == ARM_REG_PC) { 162 | return 1; 163 | } 164 | 165 | switch(ins->id) { 166 | case ARM_INS_BX: 167 | case ARM_INS_BLX: 168 | case ARM_INS_BXJ: 169 | if(ins->detail->arm.operands[0].type == ARM_OP_REG && 170 | ins->detail->arm.operands[0].reg == ARM_REG_PC) { 171 | return 1; 172 | } 173 | return 0; 174 | default: 175 | return 0; 176 | } 177 | } 178 | 179 | 180 | static int 181 | is_cs_privileged_ins(cs_insn *ins) 182 | { 183 | switch(ins->id) { 184 | /* XXX: todo */ 185 | default: 186 | return 0; 187 | } 188 | } 189 | 190 | 191 | static uint8_t 192 | cs_to_nucleus_op_type(arm_op_type op) 193 | { 194 | switch(op) { 195 | case ARM_OP_REG: 196 | return Operand::OP_TYPE_REG; 197 | case ARM_OP_IMM: 198 | return Operand::OP_TYPE_IMM; 199 | case ARM_OP_MEM: 200 | return Operand::OP_TYPE_MEM; 201 | case ARM_OP_FP: 202 | return Operand::OP_TYPE_FP; 203 | case ARM_OP_INVALID: 204 | default: 205 | return Operand::OP_TYPE_NONE; 206 | } 207 | } 208 | 209 | 210 | int 211 | nucleus_disasm_bb_arm(Binary *bin, DisasmSection *dis, BB *bb) 212 | { 213 | int init, ret, jmp, indir, cflow, cond, call, nop, only_nop, priv, trap, ndisassembled; 214 | csh cs_dis; 215 | cs_mode cs_mode_flags; 216 | cs_insn *cs_ins; 217 | cs_arm_op *cs_op; 218 | const uint8_t *pc; 219 | uint64_t pc_addr, offset; 220 | size_t i, j, n; 221 | Instruction *ins; 222 | Operand *op; 223 | 224 | init = 0; 225 | cs_ins = NULL; 226 | 227 | switch(bin->bits) { 228 | case 32: 229 | cs_mode_flags = (cs_mode)(CS_MODE_ARM); 230 | break; 231 | default: 232 | print_err("unsupported bit width %u for architecture %s", bin->bits, bin->arch_str.c_str()); 233 | goto fail; 234 | } 235 | 236 | if(cs_open(CS_ARCH_ARM, cs_mode_flags, &cs_dis) != CS_ERR_OK) { 237 | print_err("failed to initialize libcapstone"); 238 | goto fail; 239 | } 240 | init = 1; 241 | cs_option(cs_dis, CS_OPT_DETAIL, CS_OPT_ON); 242 | 243 | cs_ins = cs_malloc(cs_dis); 244 | if(!cs_ins) { 245 | print_err("out of memory"); 246 | goto fail; 247 | } 248 | 249 | offset = bb->start - dis->section->vma; 250 | if((bb->start < dis->section->vma) || (offset >= dis->section->size)) { 251 | print_err("basic block address points outside of section '%s'", dis->section->name.c_str()); 252 | goto fail; 253 | } 254 | 255 | pc = dis->section->bytes + offset; 256 | n = dis->section->size - offset; 257 | pc_addr = bb->start; 258 | bb->end = bb->start; 259 | bb->section = dis->section; 260 | ndisassembled = 0; 261 | only_nop = 0; 262 | while(cs_disasm_iter(cs_dis, &pc, &n, &pc_addr, cs_ins)) { 263 | if(cs_ins->id == ARM_INS_INVALID) { 264 | bb->invalid = 1; 265 | bb->end += 1; 266 | break; 267 | } 268 | if(!cs_ins->size) { 269 | break; 270 | } 271 | 272 | trap = is_cs_trap_ins(cs_ins); 273 | nop = is_cs_nop_ins(cs_ins); 274 | ret = is_cs_ret_ins(cs_ins); 275 | jmp = is_cs_unconditional_jmp_ins(cs_ins) || is_cs_conditional_cflow_ins(cs_ins); 276 | cond = is_cs_conditional_cflow_ins(cs_ins); 277 | cflow = is_cs_cflow_ins(cs_ins); 278 | call = is_cs_call_ins(cs_ins); 279 | priv = is_cs_privileged_ins(cs_ins); 280 | indir = is_cs_indirect_ins(cs_ins); 281 | 282 | if(!ndisassembled && nop) only_nop = 1; /* group nop instructions together */ 283 | if(!only_nop && nop) break; 284 | if(only_nop && !nop) break; 285 | 286 | ndisassembled++; 287 | 288 | bb->end += cs_ins->size; 289 | bb->insns.push_back(Instruction()); 290 | if(priv) { 291 | bb->privileged = true; 292 | } 293 | if(nop) { 294 | bb->padding = true; 295 | } 296 | if(trap) { 297 | bb->trap = true; 298 | } 299 | 300 | ins = &bb->insns.back(); 301 | ins->id = cs_ins->id; 302 | ins->start = cs_ins->address; 303 | ins->size = cs_ins->size; 304 | ins->mnem = std::string(cs_ins->mnemonic); 305 | ins->op_str = std::string(cs_ins->op_str); 306 | ins->privileged = priv; 307 | ins->trap = trap; 308 | if(nop) ins->flags |= Instruction::INS_FLAG_NOP; 309 | if(ret) ins->flags |= Instruction::INS_FLAG_RET; 310 | if(jmp) ins->flags |= Instruction::INS_FLAG_JMP; 311 | if(cond) ins->flags |= Instruction::INS_FLAG_COND; 312 | if(cflow) ins->flags |= Instruction::INS_FLAG_CFLOW; 313 | if(call) ins->flags |= Instruction::INS_FLAG_CALL; 314 | if(indir) ins->flags |= Instruction::INS_FLAG_INDIRECT; 315 | 316 | for(i = 0; i < cs_ins->detail->arm.op_count; i++) { 317 | cs_op = &cs_ins->detail->arm.operands[i]; 318 | ins->operands.push_back(Operand()); 319 | op = &ins->operands.back(); 320 | op->type = cs_to_nucleus_op_type(cs_op->type); 321 | if(op->type == Operand::OP_TYPE_IMM) { 322 | op->arm_value.imm = cs_op->imm; 323 | } else if(op->type == Operand::OP_TYPE_REG) { 324 | op->arm_value.reg = (arm_reg)cs_op->reg; 325 | } else if(op->type == Operand::OP_TYPE_FP) { 326 | op->arm_value.fp = cs_op->fp; 327 | } else if(op->type == Operand::OP_TYPE_MEM) { 328 | op->arm_value.mem.base = cs_op->mem.base; 329 | op->arm_value.mem.index = cs_op->mem.index; 330 | op->arm_value.mem.scale = cs_op->mem.scale; 331 | op->arm_value.mem.disp = cs_op->mem.disp; 332 | if(cflow) ins->flags |= Instruction::INS_FLAG_INDIRECT; 333 | } 334 | } 335 | 336 | if(cflow) { 337 | for(j = 0; j < cs_ins->detail->arm.op_count; j++) { 338 | cs_op = &cs_ins->detail->arm.operands[j]; 339 | if(cs_op->type == ARM_OP_IMM) { 340 | ins->target = cs_op->imm; 341 | } 342 | } 343 | } 344 | 345 | if(cflow) { 346 | /* end of basic block */ 347 | break; 348 | } 349 | } 350 | 351 | if(!ndisassembled) { 352 | bb->invalid = 1; 353 | bb->end += 1; /* ensure forward progress */ 354 | } 355 | 356 | ret = ndisassembled; 357 | goto cleanup; 358 | 359 | fail: 360 | ret = -1; 361 | 362 | cleanup: 363 | if(cs_ins) { 364 | cs_free(cs_ins, 1); 365 | } 366 | if(init) { 367 | cs_close(&cs_dis); 368 | } 369 | return ret; 370 | } 371 | -------------------------------------------------------------------------------- /disasm-arm.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_DISASM_ARM_H 2 | #define NUCLEUS_DISASM_ARM_H 3 | 4 | #include "disasm.h" 5 | 6 | int nucleus_disasm_bb_arm(Binary *bin, DisasmSection *dis, BB *bb); 7 | 8 | #endif /* NUCLEUS_DISASM_ARM_H */ 9 | -------------------------------------------------------------------------------- /disasm-mips.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "disasm-mips.h" 4 | #include "log.h" 5 | 6 | 7 | static int 8 | is_cs_nop_ins(cs_insn *ins) 9 | { 10 | switch(ins->id) { 11 | case MIPS_INS_NOP: 12 | return 1; 13 | default: 14 | return 0; 15 | } 16 | } 17 | 18 | 19 | static int 20 | is_cs_trap_ins(cs_insn *ins) 21 | { 22 | switch(ins->id) { 23 | /* XXX: todo */ 24 | default: 25 | return 0; 26 | } 27 | } 28 | 29 | 30 | static int 31 | is_cs_cflow_ins(cs_insn *ins) 32 | { 33 | /* XXX: Capstone does not provide information for all generic groups 34 | * for mips instructions, unlike x86, so we have to do it manually. 35 | * Once this is implemented, it will suffice to check for the following groups: 36 | * CS_GRP_JUMP, CS_GRP_CALL, CS_GRP_RET, CS_GRP_IRET */ 37 | 38 | switch(ins->id) { 39 | case MIPS_INS_J: 40 | case MIPS_INS_JR: 41 | case MIPS_INS_B: 42 | case MIPS_INS_BAL: 43 | case MIPS_INS_JAL: 44 | case MIPS_INS_JALR: 45 | case MIPS_INS_BEQ: 46 | case MIPS_INS_BNE: 47 | case MIPS_INS_BGTZ: 48 | case MIPS_INS_BGEZ: 49 | case MIPS_INS_BNEZ: 50 | case MIPS_INS_BEQZ: 51 | case MIPS_INS_BLEZ: 52 | case MIPS_INS_BLTZ: 53 | return 1; 54 | default: 55 | return 0; 56 | } 57 | } 58 | 59 | 60 | static int 61 | is_cs_call_ins(cs_insn *ins) 62 | { 63 | switch(ins->id) { 64 | case MIPS_INS_BAL: 65 | case MIPS_INS_JAL: 66 | case MIPS_INS_JALR: 67 | return 1; 68 | default: 69 | return 0; 70 | } 71 | } 72 | 73 | 74 | static int 75 | is_cs_ret_ins(cs_insn *ins) 76 | { 77 | /* jr ra */ 78 | if(ins->id == MIPS_INS_JR 79 | && ins->detail->mips.operands[0].type == MIPS_OP_REG 80 | && ins->detail->mips.operands[0].reg == MIPS_REG_RA) { 81 | return 1; 82 | } 83 | 84 | return 0; 85 | } 86 | 87 | 88 | static int 89 | is_cs_unconditional_jmp_ins(cs_insn *ins) 90 | { 91 | switch(ins->id) { 92 | case MIPS_INS_B: 93 | case MIPS_INS_J: 94 | return 1; 95 | case MIPS_INS_JR: 96 | if (ins->detail->mips.operands[0].reg != MIPS_REG_RA) { 97 | return 1; 98 | } 99 | return 0; 100 | default: 101 | return 0; 102 | } 103 | } 104 | 105 | 106 | static int 107 | is_cs_conditional_cflow_ins(cs_insn *ins) 108 | { 109 | switch(ins->id) { 110 | case MIPS_INS_BEQ: 111 | case MIPS_INS_BNE: 112 | case MIPS_INS_BGTZ: 113 | case MIPS_INS_BGEZ: 114 | case MIPS_INS_BNEZ: 115 | case MIPS_INS_BEQZ: 116 | case MIPS_INS_BLEZ: 117 | case MIPS_INS_BLTZ: 118 | return 1; 119 | default: 120 | return 0; 121 | } 122 | } 123 | 124 | 125 | static int 126 | is_cs_privileged_ins(cs_insn *ins) 127 | { 128 | switch(ins->id) { 129 | /* XXX: todo */ 130 | default: 131 | return 0; 132 | } 133 | } 134 | 135 | 136 | static int 137 | is_cs_indirect_ins(cs_insn *ins) 138 | { 139 | /* jr rN */ 140 | if(ins->id == MIPS_INS_JR 141 | && ins->detail->mips.operands[0].type == MIPS_OP_REG 142 | && ins->detail->mips.operands[0].reg != MIPS_REG_RA) { 143 | return 1; 144 | } 145 | 146 | /* jalr rN */ 147 | if(ins->id == MIPS_INS_JALR) { 148 | return 1; 149 | } 150 | 151 | return 0; 152 | } 153 | 154 | 155 | static uint8_t 156 | cs_to_nucleus_op_type(mips_op_type op) 157 | { 158 | switch(op) { 159 | case MIPS_OP_REG: 160 | return Operand::OP_TYPE_REG; 161 | case MIPS_OP_IMM: 162 | return Operand::OP_TYPE_IMM; 163 | case MIPS_OP_MEM: 164 | return Operand::OP_TYPE_MEM; 165 | case MIPS_OP_INVALID: 166 | default: 167 | return Operand::OP_TYPE_NONE; 168 | } 169 | } 170 | 171 | 172 | int 173 | nucleus_disasm_bb_mips(Binary *bin, DisasmSection *dis, BB *bb) 174 | { 175 | int init, ret, jmp, cflow, indir, cond, call, nop, only_nop, priv, trap, ndisassembled; 176 | csh cs_dis; 177 | cs_mode cs_mode_flags; 178 | cs_insn *cs_ins; 179 | cs_mips_op *cs_op; 180 | const uint8_t *pc; 181 | uint64_t pc_addr, offset; 182 | size_t i, j, n; 183 | Instruction *ins, *last_cflow; 184 | Operand *op; 185 | 186 | init = 0; 187 | cs_ins = nullptr; 188 | last_cflow = nullptr; 189 | 190 | switch(bin->bits) { 191 | case 64: 192 | cs_mode_flags = (cs_mode)(CS_MODE_BIG_ENDIAN | CS_MODE_64); 193 | break; 194 | case 32: 195 | cs_mode_flags = (cs_mode)(CS_MODE_BIG_ENDIAN | CS_MODE_32); 196 | break; 197 | case 16: 198 | cs_mode_flags = (cs_mode)(CS_MODE_BIG_ENDIAN | CS_MODE_16); 199 | break; 200 | default: 201 | print_err("unsupported bit width %u for architecture %s", bin->bits, bin->arch_str.c_str()); 202 | goto fail; 203 | } 204 | 205 | if(cs_open(CS_ARCH_MIPS, cs_mode_flags, &cs_dis) != CS_ERR_OK) { 206 | print_err("failed to initialize libcapstone"); 207 | goto fail; 208 | } 209 | init = 1; 210 | cs_option(cs_dis, CS_OPT_DETAIL, CS_OPT_ON); 211 | 212 | cs_ins = cs_malloc(cs_dis); 213 | if(!cs_ins) { 214 | print_err("out of memory"); 215 | goto fail; 216 | } 217 | 218 | offset = bb->start - dis->section->vma; 219 | if((bb->start < dis->section->vma) || (offset >= dis->section->size)) { 220 | print_err("basic block address points outside of section '%s'", dis->section->name.c_str()); 221 | goto fail; 222 | } 223 | 224 | pc = dis->section->bytes + offset; 225 | n = dis->section->size - offset; 226 | pc_addr = bb->start; 227 | bb->end = bb->start; 228 | bb->section = dis->section; 229 | ndisassembled = 0; 230 | only_nop = 0; 231 | while(cs_disasm_iter(cs_dis, &pc, &n, &pc_addr, cs_ins)) { 232 | if(cs_ins->id == MIPS_INS_INVALID) { 233 | bb->invalid = 1; 234 | bb->end += 1; 235 | break; 236 | } 237 | if(!cs_ins->size) { 238 | break; 239 | } 240 | 241 | trap = is_cs_trap_ins(cs_ins); 242 | nop = is_cs_nop_ins(cs_ins); 243 | ret = is_cs_ret_ins(cs_ins); 244 | jmp = is_cs_unconditional_jmp_ins(cs_ins) || is_cs_conditional_cflow_ins(cs_ins); 245 | cond = is_cs_conditional_cflow_ins(cs_ins); 246 | cflow = is_cs_cflow_ins(cs_ins); 247 | call = is_cs_call_ins(cs_ins); 248 | priv = is_cs_privileged_ins(cs_ins); 249 | indir = is_cs_indirect_ins(cs_ins); 250 | 251 | if(!ndisassembled && nop) only_nop = 1; /* group nop instructions together */ 252 | if(!last_cflow && !only_nop && nop) break; 253 | if(!last_cflow && only_nop && !nop) break; 254 | 255 | ndisassembled++; 256 | 257 | bb->end += cs_ins->size; 258 | bb->insns.push_back(Instruction()); 259 | if(priv) { 260 | bb->privileged = true; 261 | } 262 | if(nop) { 263 | bb->padding = true; 264 | } 265 | if(trap) { 266 | bb->trap = true; 267 | } 268 | 269 | ins = &bb->insns.back(); 270 | ins->id = cs_ins->id; 271 | ins->start = cs_ins->address; 272 | ins->size = cs_ins->size; 273 | ins->mnem = std::string(cs_ins->mnemonic); 274 | ins->op_str = std::string(cs_ins->op_str); 275 | ins->privileged = priv; 276 | ins->trap = trap; 277 | if(nop) ins->flags |= Instruction::INS_FLAG_NOP; 278 | if(ret) ins->flags |= Instruction::INS_FLAG_RET; 279 | if(jmp) ins->flags |= Instruction::INS_FLAG_JMP; 280 | if(cond) ins->flags |= Instruction::INS_FLAG_COND; 281 | if(cflow) ins->flags |= Instruction::INS_FLAG_CFLOW; 282 | if(call) ins->flags |= Instruction::INS_FLAG_CALL; 283 | if(indir) ins->flags |= Instruction::INS_FLAG_INDIRECT; 284 | 285 | for(i = 0; i < cs_ins->detail->mips.op_count; i++) { 286 | cs_op = &cs_ins->detail->mips.operands[i]; 287 | ins->operands.push_back(Operand()); 288 | op = &ins->operands.back(); 289 | op->type = cs_to_nucleus_op_type(cs_op->type); 290 | if(op->type == Operand::OP_TYPE_IMM) { 291 | op->mips_value.imm = cs_op->imm; 292 | } else if(op->type == Operand::OP_TYPE_REG) { 293 | op->mips_value.reg = (mips_reg)cs_op->reg; 294 | } else if(op->type == Operand::OP_TYPE_MEM) { 295 | op->mips_value.mem.base = cs_op->mem.base; 296 | op->mips_value.mem.disp = cs_op->mem.disp; 297 | if(cflow) ins->flags |= Instruction::INS_FLAG_INDIRECT; 298 | } 299 | } 300 | 301 | if(cflow) { 302 | for(j = 0; j < cs_ins->detail->mips.op_count; j++) { 303 | cs_op = &cs_ins->detail->mips.operands[j]; 304 | if(cs_op->type == MIPS_OP_IMM) { 305 | ins->target = cs_op->imm; 306 | } 307 | } 308 | } 309 | 310 | /* end of basic block occurs after delay slot of cflow instructions */ 311 | if(last_cflow) { 312 | ins->flags = last_cflow->flags; 313 | ins->target = last_cflow->target; 314 | last_cflow->flags = 0; 315 | break; 316 | } 317 | if(cflow) { 318 | last_cflow = ins; 319 | } 320 | } 321 | 322 | if(!ndisassembled) { 323 | bb->invalid = 1; 324 | bb->end += 1; /* ensure forward progress */ 325 | } 326 | 327 | ret = ndisassembled; 328 | goto cleanup; 329 | 330 | fail: 331 | ret = -1; 332 | 333 | cleanup: 334 | if(cs_ins) { 335 | cs_free(cs_ins, 1); 336 | } 337 | if(init) { 338 | cs_close(&cs_dis); 339 | } 340 | return ret; 341 | } 342 | -------------------------------------------------------------------------------- /disasm-mips.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_DISASM_MIPS_H 2 | #define NUCLEUS_DISASM_MIPS_H 3 | 4 | #include "disasm.h" 5 | 6 | int nucleus_disasm_bb_mips(Binary *bin, DisasmSection *dis, BB *bb); 7 | 8 | #endif /* NUCLEUS_DISASM_MIPS_H */ 9 | -------------------------------------------------------------------------------- /disasm-ppc.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "disasm-ppc.h" 6 | #include "log.h" 7 | 8 | 9 | static int 10 | is_cs_nop_ins(cs_insn *ins) 11 | { 12 | cs_ppc *ppc; 13 | 14 | ppc = &ins->detail->ppc; 15 | switch(ins->id) { 16 | case PPC_INS_NOP: 17 | /* nop */ 18 | return 1; 19 | case PPC_INS_ORI: 20 | /* ori r0,r0,r0 */ 21 | if((ppc->op_count == 3) 22 | && (ppc->operands[0].type == PPC_OP_REG) 23 | && (ppc->operands[1].type == PPC_OP_REG) 24 | && (ppc->operands[2].type == PPC_OP_REG) 25 | && (ppc->operands[0].reg == 0) 26 | && (ppc->operands[1].reg == 0) 27 | && (ppc->operands[2].reg == 0)) { 28 | return 1; 29 | } 30 | return 0; 31 | default: 32 | return 0; 33 | } 34 | } 35 | 36 | 37 | static int 38 | is_cs_trap_ins(cs_insn *ins) 39 | { 40 | switch(ins->id) { 41 | case PPC_INS_TW: 42 | case PPC_INS_TWI: 43 | return 1; 44 | default: 45 | return 0; 46 | } 47 | } 48 | 49 | 50 | static int 51 | is_cs_cflow_ins(cs_insn *ins) 52 | { 53 | /* XXX: Capstone does not provide information for all generic groups 54 | * for ppc instructions, unlike x86, so we have to do it manually. 55 | * Once this is implemented, it will suffice to check for the following groups: 56 | * CS_GRP_JUMP, CS_GRP_CALL, CS_GRP_RET, CS_GRP_IRET */ 57 | 58 | switch(ins->id) { 59 | case PPC_INS_B: 60 | case PPC_INS_BA: 61 | case PPC_INS_BC: 62 | case PPC_INS_BCA: 63 | case PPC_INS_BL: 64 | case PPC_INS_BLA: 65 | case PPC_INS_BLR: 66 | case PPC_INS_BCL: 67 | case PPC_INS_BCLA: 68 | case PPC_INS_BCTR: 69 | case PPC_INS_BCTRL: 70 | case PPC_INS_BCCTR: 71 | case PPC_INS_BCCTRL: 72 | return 1; 73 | default: 74 | return 0; 75 | } 76 | } 77 | 78 | 79 | static int 80 | is_cs_call_ins(cs_insn *ins) 81 | { 82 | switch(ins->id) { 83 | case PPC_INS_BL: 84 | case PPC_INS_BLA: 85 | return 1; 86 | default: 87 | return 0; 88 | } 89 | } 90 | 91 | 92 | static int 93 | is_cs_ret_ins(cs_insn *ins) 94 | { 95 | int32_t bo, bi; 96 | switch(ins->id) { 97 | case PPC_INS_BLR: 98 | return 1; 99 | case PPC_INS_BCLR: 100 | assert(ins->detail->ppc.op_count >= 2); 101 | assert(ins->detail->ppc.operands[0].type == PPC_OP_IMM); 102 | assert(ins->detail->ppc.operands[1].type == PPC_OP_IMM); 103 | bo = ins->detail->ppc.operands[0].imm; 104 | bi = ins->detail->ppc.operands[1].imm; 105 | if (bo == 20 && bi == 0) { 106 | return 1; 107 | } 108 | default: 109 | return 0; 110 | } 111 | } 112 | 113 | 114 | static int 115 | is_cs_unconditional_jmp_ins(cs_insn *ins) 116 | { 117 | int32_t bo, bi; 118 | switch(ins->id) { 119 | case PPC_INS_B: 120 | case PPC_INS_BA: 121 | case PPC_INS_BCTR: 122 | return 1; 123 | case PPC_INS_BCCTR: 124 | assert(ins->detail->ppc.op_count >= 2); 125 | assert(ins->detail->ppc.operands[0].type == PPC_OP_IMM); 126 | assert(ins->detail->ppc.operands[1].type == PPC_OP_IMM); 127 | bo = ins->detail->ppc.operands[0].imm; 128 | bi = ins->detail->ppc.operands[1].imm; 129 | if (bo == 20 && bi == 0) { 130 | return 1; 131 | } 132 | return 0; 133 | default: 134 | return 0; 135 | } 136 | } 137 | 138 | 139 | static int 140 | is_cs_conditional_cflow_ins(cs_insn *ins) 141 | { 142 | int32_t bo, bi; 143 | switch(ins->id) { 144 | case PPC_INS_B: 145 | case PPC_INS_BA: 146 | if(ins->detail->ppc.bc == PPC_BC_INVALID) { 147 | return 0; 148 | } 149 | return 1; 150 | case PPC_INS_BC: 151 | case PPC_INS_BCA: 152 | assert(ins->detail->ppc.op_count >= 2); 153 | assert(ins->detail->ppc.operands[0].type == PPC_OP_IMM); 154 | assert(ins->detail->ppc.operands[1].type == PPC_OP_IMM); 155 | bo = ins->detail->ppc.operands[0].imm; 156 | bi = ins->detail->ppc.operands[1].imm; 157 | if(bo == 20 && bi == 0) { 158 | return 0; 159 | } 160 | return 1; 161 | default: 162 | return 0; 163 | } 164 | } 165 | 166 | 167 | static int 168 | is_cs_privileged_ins(cs_insn *ins) 169 | { 170 | switch(ins->id) { 171 | case PPC_INS_DCBI: 172 | case PPC_INS_MFMSR: 173 | case PPC_INS_MFSR: 174 | case PPC_INS_MFSRIN: 175 | case PPC_INS_MTMSR: 176 | case PPC_INS_MTSR: 177 | case PPC_INS_MTSRIN: 178 | case PPC_INS_RFI: 179 | case PPC_INS_TLBIA: 180 | case PPC_INS_TLBIE: 181 | case PPC_INS_TLBSYNC: 182 | return 1; 183 | default: 184 | return 0; 185 | } 186 | } 187 | 188 | 189 | static int 190 | is_cs_indirect_ins(cs_insn *ins) 191 | { 192 | switch(ins->id) { 193 | case PPC_INS_BCTR: 194 | case PPC_INS_BCTRL: 195 | case PPC_INS_BCCTR: 196 | case PPC_INS_BCCTRL: 197 | return 1; 198 | default: 199 | return 0; 200 | } 201 | } 202 | 203 | 204 | static uint8_t 205 | cs_to_nucleus_op_type(ppc_op_type op) 206 | { 207 | switch(op) { 208 | case PPC_OP_REG: 209 | return Operand::OP_TYPE_REG; 210 | case PPC_OP_IMM: 211 | return Operand::OP_TYPE_IMM; 212 | case PPC_OP_MEM: 213 | return Operand::OP_TYPE_MEM; 214 | case PPC_OP_CRX: 215 | case PPC_OP_INVALID: 216 | default: 217 | return Operand::OP_TYPE_NONE; 218 | } 219 | } 220 | 221 | 222 | int 223 | nucleus_disasm_bb_ppc(Binary *bin, DisasmSection *dis, BB *bb) 224 | { 225 | int init, ret, jmp, cflow, indir, cond, call, nop, only_nop, priv, trap, ndisassembled; 226 | csh cs_dis; 227 | cs_mode cs_mode_flags; 228 | cs_insn *cs_ins; 229 | cs_ppc_op *cs_op; 230 | const uint8_t *pc; 231 | uint64_t pc_addr, offset; 232 | size_t i, j, n; 233 | Instruction *ins; 234 | Operand *op; 235 | 236 | init = 0; 237 | cs_ins = NULL; 238 | 239 | switch(bin->bits) { 240 | case 64: 241 | cs_mode_flags = (cs_mode)(CS_MODE_BIG_ENDIAN | CS_MODE_64); 242 | break; 243 | case 32: 244 | cs_mode_flags = (cs_mode)(CS_MODE_BIG_ENDIAN); 245 | break; 246 | default: 247 | print_err("unsupported bit width %u for architecture %s", bin->bits, bin->arch_str.c_str()); 248 | goto fail; 249 | } 250 | 251 | if(cs_open(CS_ARCH_PPC, cs_mode_flags, &cs_dis) != CS_ERR_OK) { 252 | print_err("failed to initialize libcapstone"); 253 | goto fail; 254 | } 255 | init = 1; 256 | cs_option(cs_dis, CS_OPT_DETAIL, CS_OPT_ON); 257 | 258 | cs_ins = cs_malloc(cs_dis); 259 | if(!cs_ins) { 260 | print_err("out of memory"); 261 | goto fail; 262 | } 263 | 264 | offset = bb->start - dis->section->vma; 265 | if((bb->start < dis->section->vma) || (offset >= dis->section->size)) { 266 | print_err("basic block address points outside of section '%s'", dis->section->name.c_str()); 267 | goto fail; 268 | } 269 | 270 | pc = dis->section->bytes + offset; 271 | n = dis->section->size - offset; 272 | pc_addr = bb->start; 273 | bb->end = bb->start; 274 | bb->section = dis->section; 275 | ndisassembled = 0; 276 | only_nop = 0; 277 | while(cs_disasm_iter(cs_dis, &pc, &n, &pc_addr, cs_ins)) { 278 | if(cs_ins->id == PPC_INS_INVALID) { 279 | bb->invalid = 1; 280 | bb->end += 1; 281 | break; 282 | } 283 | if(!cs_ins->size) { 284 | break; 285 | } 286 | 287 | trap = is_cs_trap_ins(cs_ins); 288 | nop = is_cs_nop_ins(cs_ins); 289 | ret = is_cs_ret_ins(cs_ins); 290 | jmp = is_cs_unconditional_jmp_ins(cs_ins) || is_cs_conditional_cflow_ins(cs_ins); 291 | cond = is_cs_conditional_cflow_ins(cs_ins); 292 | cflow = is_cs_cflow_ins(cs_ins); 293 | call = is_cs_call_ins(cs_ins); 294 | priv = is_cs_privileged_ins(cs_ins); 295 | indir = is_cs_indirect_ins(cs_ins); 296 | 297 | if(!ndisassembled && nop) only_nop = 1; /* group nop instructions together */ 298 | if(!only_nop && nop) break; 299 | if(only_nop && !nop) break; 300 | 301 | ndisassembled++; 302 | 303 | bb->end += cs_ins->size; 304 | bb->insns.push_back(Instruction()); 305 | if(priv) { 306 | bb->privileged = true; 307 | } 308 | if(nop) { 309 | bb->padding = true; 310 | } 311 | if(trap) { 312 | bb->trap = true; 313 | } 314 | 315 | ins = &bb->insns.back(); 316 | ins->id = cs_ins->id; 317 | ins->start = cs_ins->address; 318 | ins->size = cs_ins->size; 319 | ins->mnem = std::string(cs_ins->mnemonic); 320 | ins->op_str = std::string(cs_ins->op_str); 321 | ins->privileged = priv; 322 | ins->trap = trap; 323 | if(nop) ins->flags |= Instruction::INS_FLAG_NOP; 324 | if(ret) ins->flags |= Instruction::INS_FLAG_RET; 325 | if(jmp) ins->flags |= Instruction::INS_FLAG_JMP; 326 | if(cond) ins->flags |= Instruction::INS_FLAG_COND; 327 | if(cflow) ins->flags |= Instruction::INS_FLAG_CFLOW; 328 | if(call) ins->flags |= Instruction::INS_FLAG_CALL; 329 | if(indir) ins->flags |= Instruction::INS_FLAG_INDIRECT; 330 | 331 | for(i = 0; i < cs_ins->detail->ppc.op_count; i++) { 332 | cs_op = &cs_ins->detail->ppc.operands[i]; 333 | ins->operands.push_back(Operand()); 334 | op = &ins->operands.back(); 335 | op->type = cs_to_nucleus_op_type(cs_op->type); 336 | if(op->type == Operand::OP_TYPE_IMM) { 337 | op->ppc_value.imm = cs_op->imm; 338 | } else if(op->type == Operand::OP_TYPE_REG) { 339 | op->ppc_value.reg = (ppc_reg)cs_op->reg; 340 | } else if(op->type == Operand::OP_TYPE_MEM) { 341 | op->ppc_value.mem.base = cs_op->mem.base; 342 | op->ppc_value.mem.disp = cs_op->mem.disp; 343 | } 344 | } 345 | 346 | if(cflow) { 347 | for(j = 0; j < cs_ins->detail->ppc.op_count; j++) { 348 | cs_op = &cs_ins->detail->ppc.operands[j]; 349 | if(cs_op->type == PPC_OP_IMM) { 350 | ins->target = cs_op->imm; 351 | } 352 | } 353 | } 354 | 355 | /* XXX: Some relocations entries point to symbols in sections 356 | * that are ignored by Nucleus, e.g. calls to external functions. 357 | * We ignore such calls directly at disasm level. */ 358 | if(call && ins->target == ins->start) { 359 | ins->flags &= ~Instruction::INS_FLAG_CALL; 360 | ins->flags &= ~Instruction::INS_FLAG_CFLOW; 361 | } 362 | 363 | if(cflow) { 364 | /* end of basic block */ 365 | break; 366 | } 367 | } 368 | 369 | if(!ndisassembled) { 370 | bb->invalid = 1; 371 | bb->end += 1; /* ensure forward progress */ 372 | } 373 | 374 | ret = ndisassembled; 375 | goto cleanup; 376 | 377 | fail: 378 | ret = -1; 379 | 380 | cleanup: 381 | if(cs_ins) { 382 | cs_free(cs_ins, 1); 383 | } 384 | if(init) { 385 | cs_close(&cs_dis); 386 | } 387 | return ret; 388 | } 389 | -------------------------------------------------------------------------------- /disasm-ppc.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_DISASM_PPC_H 2 | #define NUCLEUS_DISASM_PPC_H 3 | 4 | #include "disasm.h" 5 | 6 | int nucleus_disasm_bb_ppc(Binary *bin, DisasmSection *dis, BB *bb); 7 | 8 | #endif /* NUCLEUS_DISASM_PPC_H */ 9 | -------------------------------------------------------------------------------- /disasm-x86.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "disasm-x86.h" 4 | #include "log.h" 5 | 6 | 7 | static int 8 | is_cs_nop_ins(cs_insn *ins) 9 | { 10 | switch(ins->id) { 11 | case X86_INS_NOP: 12 | case X86_INS_FNOP: 13 | return 1; 14 | default: 15 | return 0; 16 | } 17 | } 18 | 19 | 20 | static int 21 | is_cs_semantic_nop_ins(cs_insn *ins) 22 | { 23 | cs_x86 *x86; 24 | 25 | /* XXX: to make this truly platform-independent, we need some real 26 | * semantic analysis, but for now checking known cases is sufficient */ 27 | 28 | x86 = &ins->detail->x86; 29 | switch(ins->id) { 30 | case X86_INS_MOV: 31 | /* mov reg,reg */ 32 | if((x86->op_count == 2) 33 | && (x86->operands[0].type == X86_OP_REG) 34 | && (x86->operands[1].type == X86_OP_REG) 35 | && (x86->operands[0].reg == x86->operands[1].reg)) { 36 | return 1; 37 | } 38 | return 0; 39 | case X86_INS_XCHG: 40 | /* xchg reg,reg */ 41 | if((x86->op_count == 2) 42 | && (x86->operands[0].type == X86_OP_REG) 43 | && (x86->operands[1].type == X86_OP_REG) 44 | && (x86->operands[0].reg == x86->operands[1].reg)) { 45 | return 1; 46 | } 47 | return 0; 48 | case X86_INS_LEA: 49 | /* lea reg,[reg + 0x0] */ 50 | if((x86->op_count == 2) 51 | && (x86->operands[0].type == X86_OP_REG) 52 | && (x86->operands[1].type == X86_OP_MEM) 53 | && (x86->operands[1].mem.segment == X86_REG_INVALID) 54 | && (x86->operands[1].mem.base == x86->operands[0].reg) 55 | && (x86->operands[1].mem.index == X86_REG_INVALID) 56 | /* mem.scale is irrelevant since index is not used */ 57 | && (x86->operands[1].mem.disp == 0)) { 58 | return 1; 59 | } 60 | /* lea reg,[reg + eiz*x + 0x0] */ 61 | if((x86->op_count == 2) 62 | && (x86->operands[0].type == X86_OP_REG) 63 | && (x86->operands[1].type == X86_OP_MEM) 64 | && (x86->operands[1].mem.segment == X86_REG_INVALID) 65 | && (x86->operands[1].mem.base == x86->operands[0].reg) 66 | && (x86->operands[1].mem.index == X86_REG_EIZ) 67 | /* mem.scale is irrelevant since index is the zero-register */ 68 | && (x86->operands[1].mem.disp == 0)) { 69 | return 1; 70 | } 71 | return 0; 72 | default: 73 | return 0; 74 | } 75 | } 76 | 77 | 78 | static int 79 | is_cs_trap_ins(cs_insn *ins) 80 | { 81 | switch(ins->id) { 82 | case X86_INS_INT3: 83 | case X86_INS_UD2: 84 | return 1; 85 | default: 86 | return 0; 87 | } 88 | } 89 | 90 | 91 | static int 92 | is_cs_cflow_group(uint8_t g) 93 | { 94 | return (g == CS_GRP_JUMP) || (g == CS_GRP_CALL) || (g == CS_GRP_RET) || (g == CS_GRP_IRET); 95 | } 96 | 97 | 98 | static int 99 | is_cs_cflow_ins(cs_insn *ins) 100 | { 101 | size_t i; 102 | 103 | for(i = 0; i < ins->detail->groups_count; i++) { 104 | if(is_cs_cflow_group(ins->detail->groups[i])) { 105 | return 1; 106 | } 107 | } 108 | 109 | return 0; 110 | } 111 | 112 | 113 | static int 114 | is_cs_call_ins(cs_insn *ins) 115 | { 116 | switch(ins->id) { 117 | case X86_INS_CALL: 118 | case X86_INS_LCALL: 119 | return 1; 120 | default: 121 | return 0; 122 | } 123 | } 124 | 125 | 126 | static int 127 | is_cs_ret_ins(cs_insn *ins) 128 | { 129 | switch(ins->id) { 130 | case X86_INS_RET: 131 | case X86_INS_RETF: 132 | return 1; 133 | default: 134 | return 0; 135 | } 136 | } 137 | 138 | 139 | static int 140 | is_cs_unconditional_jmp_ins(cs_insn *ins) 141 | { 142 | switch(ins->id) { 143 | case X86_INS_JMP: 144 | return 1; 145 | default: 146 | return 0; 147 | } 148 | } 149 | 150 | 151 | static int 152 | is_cs_conditional_cflow_ins(cs_insn *ins) 153 | { 154 | switch(ins->id) { 155 | case X86_INS_JAE: 156 | case X86_INS_JA: 157 | case X86_INS_JBE: 158 | case X86_INS_JB: 159 | case X86_INS_JCXZ: 160 | case X86_INS_JECXZ: 161 | case X86_INS_JE: 162 | case X86_INS_JGE: 163 | case X86_INS_JG: 164 | case X86_INS_JLE: 165 | case X86_INS_JL: 166 | case X86_INS_JNE: 167 | case X86_INS_JNO: 168 | case X86_INS_JNP: 169 | case X86_INS_JNS: 170 | case X86_INS_JO: 171 | case X86_INS_JP: 172 | case X86_INS_JRCXZ: 173 | case X86_INS_JS: 174 | return 1; 175 | case X86_INS_JMP: 176 | default: 177 | return 0; 178 | } 179 | } 180 | 181 | 182 | static int 183 | is_cs_privileged_ins(cs_insn *ins) 184 | { 185 | switch(ins->id) { 186 | case X86_INS_HLT: 187 | case X86_INS_IN: 188 | case X86_INS_INSB: 189 | case X86_INS_INSW: 190 | case X86_INS_INSD: 191 | case X86_INS_OUT: 192 | case X86_INS_OUTSB: 193 | case X86_INS_OUTSW: 194 | case X86_INS_OUTSD: 195 | case X86_INS_RDMSR: 196 | case X86_INS_WRMSR: 197 | case X86_INS_RDPMC: 198 | case X86_INS_RDTSC: 199 | case X86_INS_LGDT: 200 | case X86_INS_LLDT: 201 | case X86_INS_LTR: 202 | case X86_INS_LMSW: 203 | case X86_INS_CLTS: 204 | case X86_INS_INVD: 205 | case X86_INS_INVLPG: 206 | case X86_INS_WBINVD: 207 | return 1; 208 | default: 209 | return 0; 210 | } 211 | } 212 | 213 | 214 | static uint8_t 215 | cs_to_nucleus_op_type(x86_op_type op) 216 | { 217 | switch(op) { 218 | case X86_OP_REG: 219 | return Operand::OP_TYPE_REG; 220 | case X86_OP_IMM: 221 | return Operand::OP_TYPE_IMM; 222 | case X86_OP_MEM: 223 | return Operand::OP_TYPE_MEM; 224 | #if CS_API_MAJOR < 4 /* X86_OP_FP does not exist in later versions */ 225 | case X86_OP_FP: 226 | return Operand::OP_TYPE_FP; 227 | #endif 228 | case X86_OP_INVALID: 229 | default: 230 | return Operand::OP_TYPE_NONE; 231 | } 232 | } 233 | 234 | 235 | int 236 | nucleus_disasm_bb_x86(Binary *bin, DisasmSection *dis, BB *bb) 237 | { 238 | int init, ret, jmp, cflow, cond, call, nop, only_nop, priv, trap, ndisassembled; 239 | csh cs_dis; 240 | cs_mode cs_mode; 241 | cs_insn *cs_ins; 242 | cs_x86_op *cs_op; 243 | const uint8_t *pc; 244 | uint64_t pc_addr, offset; 245 | size_t i, j, n; 246 | Instruction *ins; 247 | Operand *op; 248 | 249 | init = 0; 250 | cs_ins = NULL; 251 | 252 | switch(bin->bits) { 253 | case 64: 254 | cs_mode = CS_MODE_64; 255 | break; 256 | case 32: 257 | cs_mode = CS_MODE_32; 258 | break; 259 | case 16: 260 | cs_mode = CS_MODE_16; 261 | break; 262 | default: 263 | print_err("unsupported bit width %u for architecture %s", bin->bits, bin->arch_str.c_str()); 264 | goto fail; 265 | } 266 | 267 | if(cs_open(CS_ARCH_X86, cs_mode, &cs_dis) != CS_ERR_OK) { 268 | print_err("failed to initialize libcapstone"); 269 | goto fail; 270 | } 271 | init = 1; 272 | cs_option(cs_dis, CS_OPT_DETAIL, CS_OPT_ON); 273 | cs_option(cs_dis, CS_OPT_SYNTAX, CS_OPT_SYNTAX_INTEL); 274 | 275 | cs_ins = cs_malloc(cs_dis); 276 | if(!cs_ins) { 277 | print_err("out of memory"); 278 | goto fail; 279 | } 280 | 281 | offset = bb->start - dis->section->vma; 282 | if((bb->start < dis->section->vma) || (offset >= dis->section->size)) { 283 | print_err("basic block address points outside of section '%s'", dis->section->name.c_str()); 284 | goto fail; 285 | } 286 | 287 | pc = dis->section->bytes + offset; 288 | n = dis->section->size - offset; 289 | pc_addr = bb->start; 290 | bb->end = bb->start; 291 | bb->section = dis->section; 292 | ndisassembled = 0; 293 | only_nop = 0; 294 | while(cs_disasm_iter(cs_dis, &pc, &n, &pc_addr, cs_ins)) { 295 | if(cs_ins->id == X86_INS_INVALID) { 296 | bb->invalid = 1; 297 | bb->end += 1; 298 | break; 299 | } 300 | if(!cs_ins->size) { 301 | break; 302 | } 303 | 304 | trap = is_cs_trap_ins(cs_ins); 305 | nop = is_cs_nop_ins(cs_ins) 306 | /* Visual Studio sometimes places semantic nops at the function start */ 307 | || (is_cs_semantic_nop_ins(cs_ins) && (bin->type != Binary::BIN_TYPE_PE)) 308 | /* Visual Studio uses int3 for padding */ 309 | || (trap && (bin->type == Binary::BIN_TYPE_PE)); 310 | ret = is_cs_ret_ins(cs_ins); 311 | jmp = is_cs_unconditional_jmp_ins(cs_ins) || is_cs_conditional_cflow_ins(cs_ins); 312 | cond = is_cs_conditional_cflow_ins(cs_ins); 313 | cflow = is_cs_cflow_ins(cs_ins); 314 | call = is_cs_call_ins(cs_ins); 315 | priv = is_cs_privileged_ins(cs_ins); 316 | 317 | if(!ndisassembled && nop) only_nop = 1; /* group nop instructions together */ 318 | if(!only_nop && nop) break; 319 | if(only_nop && !nop) break; 320 | 321 | ndisassembled++; 322 | 323 | bb->end += cs_ins->size; 324 | bb->insns.push_back(Instruction()); 325 | if(priv) { 326 | bb->privileged = true; 327 | } 328 | if(nop) { 329 | bb->padding = true; 330 | } 331 | if(trap) { 332 | bb->trap = true; 333 | } 334 | 335 | ins = &bb->insns.back(); 336 | ins->start = cs_ins->address; 337 | ins->size = cs_ins->size; 338 | ins->addr_size = cs_ins->detail->x86.addr_size; 339 | ins->mnem = std::string(cs_ins->mnemonic); 340 | ins->op_str = std::string(cs_ins->op_str); 341 | ins->privileged = priv; 342 | ins->trap = trap; 343 | if(nop) ins->flags |= Instruction::INS_FLAG_NOP; 344 | if(ret) ins->flags |= Instruction::INS_FLAG_RET; 345 | if(jmp) ins->flags |= Instruction::INS_FLAG_JMP; 346 | if(cond) ins->flags |= Instruction::INS_FLAG_COND; 347 | if(cflow) ins->flags |= Instruction::INS_FLAG_CFLOW; 348 | if(call) ins->flags |= Instruction::INS_FLAG_CALL; 349 | 350 | for(i = 0; i < cs_ins->detail->x86.op_count; i++) { 351 | cs_op = &cs_ins->detail->x86.operands[i]; 352 | ins->operands.push_back(Operand()); 353 | op = &ins->operands.back(); 354 | op->type = cs_to_nucleus_op_type(cs_op->type); 355 | op->size = cs_op->size; 356 | if(op->type == Operand::OP_TYPE_IMM) { 357 | op->x86_value.imm = cs_op->imm; 358 | } else if(op->type == Operand::OP_TYPE_REG) { 359 | op->x86_value.reg = cs_op->reg; 360 | if(cflow) ins->flags |= Instruction::INS_FLAG_INDIRECT; 361 | } else if(op->type == Operand::OP_TYPE_FP) { 362 | #if CS_API_MAJOR < 4 /* cs_op->fp does not exist in later versions */ 363 | op->x86_value.fp = cs_op->fp; 364 | #else 365 | op->x86_value.fp = 0; 366 | #endif 367 | } else if(op->type == Operand::OP_TYPE_MEM) { 368 | op->x86_value.mem.segment = cs_op->mem.segment; 369 | op->x86_value.mem.base = cs_op->mem.base; 370 | op->x86_value.mem.index = cs_op->mem.index; 371 | op->x86_value.mem.scale = cs_op->mem.scale; 372 | op->x86_value.mem.disp = cs_op->mem.disp; 373 | if(cflow) ins->flags |= Instruction::INS_FLAG_INDIRECT; 374 | } 375 | } 376 | 377 | for(i = 0; i < cs_ins->detail->groups_count; i++) { 378 | if(is_cs_cflow_group(cs_ins->detail->groups[i])) { 379 | for(j = 0; j < cs_ins->detail->x86.op_count; j++) { 380 | cs_op = &cs_ins->detail->x86.operands[j]; 381 | if(cs_op->type == X86_OP_IMM) { 382 | ins->target = cs_op->imm; 383 | } 384 | } 385 | } 386 | } 387 | 388 | if(cflow) { 389 | /* end of basic block */ 390 | break; 391 | } 392 | } 393 | 394 | if(!ndisassembled) { 395 | bb->invalid = 1; 396 | bb->end += 1; /* ensure forward progress */ 397 | } 398 | 399 | ret = ndisassembled; 400 | goto cleanup; 401 | 402 | fail: 403 | ret = -1; 404 | 405 | cleanup: 406 | if(cs_ins) { 407 | cs_free(cs_ins, 1); 408 | } 409 | if(init) { 410 | cs_close(&cs_dis); 411 | } 412 | return ret; 413 | } 414 | -------------------------------------------------------------------------------- /disasm-x86.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_DISASM_X86_H 2 | #define NUCLEUS_DISASM_X86_H 3 | 4 | #include "disasm.h" 5 | 6 | int nucleus_disasm_bb_x86(Binary *bin, DisasmSection *dis, BB *bb); 7 | 8 | #endif /* NUCLEUS_DISASM_X86_H */ 9 | -------------------------------------------------------------------------------- /disasm.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | #include 10 | 11 | #include 12 | 13 | #include "loader.h" 14 | #include "bb.h" 15 | #include "disasm.h" 16 | #include "strategy.h" 17 | #include "util.h" 18 | #include "options.h" 19 | #include "log.h" 20 | 21 | #include "disasm-aarch64.h" 22 | #include "disasm-arm.h" 23 | #include "disasm-mips.h" 24 | #include "disasm-ppc.h" 25 | #include "disasm-x86.h" 26 | 27 | 28 | /******************************************************************************* 29 | ** DisasmSection ** 30 | ******************************************************************************/ 31 | void 32 | DisasmSection::print_BBs(FILE *out) 33 | { 34 | fprintf(out, "
\n\n", 35 | section->name.c_str(), (section->type == Section::SEC_TYPE_CODE) ? "C" : "D", 36 | section->vma, section->size); 37 | sort_BBs(); 38 | for(auto &bb: BBs) { 39 | bb.print(out); 40 | } 41 | } 42 | 43 | 44 | void 45 | DisasmSection::sort_BBs() 46 | { 47 | BBs.sort(BB::comparator); 48 | } 49 | 50 | /******************************************************************************* 51 | ** AddressMap ** 52 | ******************************************************************************/ 53 | void 54 | AddressMap::insert(uint64_t addr) 55 | { 56 | if(!contains(addr)) { 57 | unmapped.push_back(addr); 58 | unmapped_lookup[addr] = unmapped.size()-1; 59 | } 60 | } 61 | 62 | 63 | bool 64 | AddressMap::contains(uint64_t addr) 65 | { 66 | return addrmap.count(addr) || unmapped_lookup.count(addr); 67 | } 68 | 69 | 70 | unsigned 71 | AddressMap::get_addr_type(uint64_t addr) 72 | { 73 | assert(contains(addr)); 74 | if(!contains(addr)) { 75 | return AddressMap::DISASM_REGION_UNMAPPED; 76 | } else { 77 | return addrmap[addr]; 78 | } 79 | } 80 | unsigned AddressMap::addr_type(uint64_t addr) { return get_addr_type(addr); } 81 | 82 | 83 | void 84 | AddressMap::set_addr_type(uint64_t addr, unsigned type) 85 | { 86 | assert(contains(addr)); 87 | if(contains(addr)) { 88 | if(type != AddressMap::DISASM_REGION_UNMAPPED) { 89 | erase_unmapped(addr); 90 | } 91 | addrmap[addr] = type; 92 | } 93 | } 94 | 95 | 96 | void 97 | AddressMap::add_addr_flag(uint64_t addr, unsigned flag) 98 | { 99 | assert(contains(addr)); 100 | if(contains(addr)) { 101 | if(flag != AddressMap::DISASM_REGION_UNMAPPED) { 102 | erase_unmapped(addr); 103 | } 104 | addrmap[addr] |= flag; 105 | } 106 | } 107 | 108 | 109 | size_t 110 | AddressMap::unmapped_count() 111 | { 112 | return unmapped.size(); 113 | } 114 | 115 | 116 | uint64_t 117 | AddressMap::get_unmapped(size_t i) 118 | { 119 | return unmapped[i]; 120 | } 121 | 122 | 123 | void 124 | AddressMap::erase(uint64_t addr) 125 | { 126 | if(addrmap.count(addr)) { 127 | addrmap.erase(addr); 128 | } 129 | erase_unmapped(addr); 130 | } 131 | 132 | 133 | void 134 | AddressMap::erase_unmapped(uint64_t addr) 135 | { 136 | size_t i; 137 | 138 | if(unmapped_lookup.count(addr)) { 139 | if(unmapped_count() > 1) { 140 | i = unmapped_lookup[addr]; 141 | unmapped[i] = unmapped.back(); 142 | unmapped_lookup[unmapped.back()] = i; 143 | } 144 | unmapped_lookup.erase(addr); 145 | unmapped.pop_back(); 146 | } 147 | } 148 | 149 | /******************************************************************************* 150 | ** Disassembly engine ** 151 | ******************************************************************************/ 152 | static int 153 | init_disasm(Binary *bin, std::list *disasm) 154 | { 155 | size_t i; 156 | uint64_t vma; 157 | Section *sec; 158 | DisasmSection *dis; 159 | 160 | disasm->clear(); 161 | for(i = 0; i < bin->sections.size(); i++) { 162 | sec = &bin->sections[i]; 163 | if((sec->type != Section::SEC_TYPE_CODE) 164 | && !(!options.only_code_sections && (sec->type == Section::SEC_TYPE_DATA))) continue; 165 | 166 | disasm->push_back(DisasmSection()); 167 | dis = &disasm->back(); 168 | 169 | dis->section = sec; 170 | for(vma = sec->vma; vma < (sec->vma+sec->size); vma++) { 171 | dis->addrmap.insert(vma); 172 | } 173 | } 174 | verbose(1, "disassembler initialized"); 175 | 176 | return 0; 177 | } 178 | 179 | 180 | static int 181 | fini_disasm(Binary *bin, std::list *disasm) 182 | { 183 | verbose(1, "disassembly complete"); 184 | 185 | return 0; 186 | } 187 | 188 | 189 | static int 190 | nucleus_disasm_bb(Binary *bin, DisasmSection *dis, BB *bb) 191 | { 192 | switch(bin->arch) { 193 | case Binary::ARCH_AARCH64: 194 | return nucleus_disasm_bb_aarch64(bin, dis, bb); 195 | case Binary::ARCH_ARM: 196 | return nucleus_disasm_bb_arm(bin, dis, bb); 197 | case Binary::ARCH_MIPS: 198 | return nucleus_disasm_bb_mips(bin, dis, bb); 199 | case Binary::ARCH_PPC: 200 | return nucleus_disasm_bb_ppc(bin, dis, bb); 201 | case Binary::ARCH_X86: 202 | return nucleus_disasm_bb_x86(bin, dis, bb); 203 | default: 204 | print_err("disassembly for architecture %s is not supported", bin->arch_str.c_str()); 205 | return -1; 206 | } 207 | } 208 | 209 | 210 | static int 211 | nucleus_disasm_section(Binary *bin, DisasmSection *dis) 212 | { 213 | int ret; 214 | unsigned i, n; 215 | uint64_t vma; 216 | double s; 217 | BB *mutants; 218 | std::queue Q; 219 | 220 | mutants = NULL; 221 | 222 | if((dis->section->type != Section::SEC_TYPE_CODE) && options.only_code_sections) { 223 | print_warn("skipping non-code section '%s'", dis->section->name.c_str()); 224 | return 0; 225 | } 226 | 227 | verbose(2, "disassembling section '%s'", dis->section->name.c_str()); 228 | 229 | Q.push(NULL); 230 | while(!Q.empty()) { 231 | n = bb_mutate(dis, Q.front(), &mutants); 232 | Q.pop(); 233 | for(i = 0; i < n; i++) { 234 | if(nucleus_disasm_bb(bin, dis, &mutants[i]) < 0) { 235 | goto fail; 236 | } 237 | if((s = bb_score(dis, &mutants[i])) < 0) { 238 | goto fail; 239 | } 240 | } 241 | if((n = bb_select(dis, mutants, n)) < 0) { 242 | goto fail; 243 | } 244 | for(i = 0; i < n; i++) { 245 | if(mutants[i].alive) { 246 | dis->addrmap.add_addr_flag(mutants[i].start, AddressMap::DISASM_REGION_BB_START); 247 | for(auto &ins: mutants[i].insns) { 248 | dis->addrmap.add_addr_flag(ins.start, AddressMap::DISASM_REGION_INS_START); 249 | } 250 | for(vma = mutants[i].start; vma < mutants[i].end; vma++) { 251 | dis->addrmap.add_addr_flag(vma, AddressMap::DISASM_REGION_CODE); 252 | } 253 | dis->BBs.push_back(BB(mutants[i])); 254 | Q.push(&dis->BBs.back()); 255 | } 256 | } 257 | } 258 | 259 | ret = 0; 260 | goto cleanup; 261 | 262 | fail: 263 | ret = -1; 264 | 265 | cleanup: 266 | if(mutants) { 267 | delete[] mutants; 268 | } 269 | return ret; 270 | } 271 | 272 | 273 | int 274 | nucleus_disasm(Binary *bin, std::list *disasm) 275 | { 276 | int ret; 277 | 278 | if(init_disasm(bin, disasm) < 0) { 279 | goto fail; 280 | } 281 | 282 | for(auto &dis: (*disasm)) { 283 | if(nucleus_disasm_section(bin, &dis) < 0) { 284 | goto fail; 285 | } 286 | } 287 | 288 | if(fini_disasm(bin, disasm) < 0) { 289 | goto fail; 290 | } 291 | 292 | ret = 0; 293 | goto cleanup; 294 | 295 | fail: 296 | ret = -1; 297 | 298 | cleanup: 299 | return ret; 300 | } 301 | 302 | -------------------------------------------------------------------------------- /disasm.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_DISASM_H 2 | #define NUCLEUS_DISASM_H 3 | 4 | #include 5 | 6 | #include 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include "bb.h" 13 | #include "dataregion.h" 14 | #include "loader.h" 15 | 16 | class AddressMap { 17 | public: 18 | enum DisasmRegion { 19 | DISASM_REGION_UNMAPPED = 0x0000, 20 | DISASM_REGION_CODE = 0x0001, 21 | DISASM_REGION_DATA = 0x0002, 22 | DISASM_REGION_INS_START = 0x0100, 23 | DISASM_REGION_BB_START = 0x0200, 24 | DISASM_REGION_FUNC_START = 0x0400 25 | }; 26 | 27 | AddressMap() {} 28 | 29 | void insert (uint64_t addr); 30 | bool contains (uint64_t addr); 31 | unsigned get_addr_type (uint64_t addr); 32 | void set_addr_type (uint64_t addr, unsigned type); 33 | void add_addr_flag (uint64_t addr, unsigned flag); 34 | unsigned addr_type (uint64_t addr); 35 | 36 | size_t unmapped_count (); 37 | uint64_t get_unmapped (size_t i); 38 | void erase (uint64_t addr); 39 | void erase_unmapped (uint64_t addr); 40 | 41 | private: 42 | std::map addrmap; 43 | std::vector unmapped; 44 | std::map unmapped_lookup; 45 | }; 46 | 47 | class DisasmSection { 48 | public: 49 | DisasmSection() : section(NULL) {} 50 | 51 | void print_BBs(FILE *out); 52 | 53 | Section *section; 54 | AddressMap addrmap; 55 | std::list BBs; 56 | std::list data; 57 | 58 | private: 59 | void sort_BBs(); 60 | }; 61 | 62 | int nucleus_disasm (Binary *bin, std::list *disasm); 63 | 64 | #endif /* NUCLEUS_DISASM_H */ 65 | 66 | -------------------------------------------------------------------------------- /edge.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "edge.h" 4 | 5 | 6 | std::string 7 | Edge::type2str() 8 | { 9 | std::string s; 10 | 11 | switch(this->type) { 12 | case EDGE_TYPE_JMP: 13 | s = "jmp"; 14 | break; 15 | case EDGE_TYPE_JMP_INDIRECT: 16 | s = "ijmp"; 17 | break; 18 | case EDGE_TYPE_CALL: 19 | s = "call"; 20 | break; 21 | case EDGE_TYPE_CALL_INDIRECT: 22 | s = "icall"; 23 | break; 24 | case EDGE_TYPE_RET: 25 | s = "ret"; 26 | break; 27 | case EDGE_TYPE_FALLTHROUGH: 28 | s = "fallthrough"; 29 | break; 30 | default: 31 | s = "none"; 32 | break; 33 | } 34 | 35 | if(this->is_switch) { 36 | s += "/switch"; 37 | } 38 | if(this->offset) { 39 | s += "/+" + std::to_string(this->offset); 40 | } 41 | 42 | return s; 43 | } 44 | 45 | -------------------------------------------------------------------------------- /edge.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_EDGE_H 2 | #define NUCLEUS_EDGE_H 3 | 4 | #include 5 | 6 | #include 7 | 8 | class BB; 9 | 10 | class Edge { 11 | public: 12 | enum EdgeType { 13 | EDGE_TYPE_NONE, 14 | EDGE_TYPE_JMP, 15 | EDGE_TYPE_JMP_INDIRECT, 16 | EDGE_TYPE_CALL, 17 | EDGE_TYPE_CALL_INDIRECT, 18 | EDGE_TYPE_RET, 19 | EDGE_TYPE_FALLTHROUGH 20 | }; 21 | 22 | Edge(Edge::EdgeType type_, BB *src_, BB *dst_) : type(type_), src(src_), dst(dst_), is_switch(false), jmptab(0), offset(0) {} 23 | Edge(Edge::EdgeType type_, BB *src_, BB *dst_, bool is_switch_, uint64_t jmptab_, unsigned offset_) : type(type_), src(src_), dst(dst_), is_switch(is_switch_), jmptab(jmptab_), offset(offset_) {} 24 | 25 | std::string type2str (); 26 | 27 | EdgeType type; 28 | BB *src; 29 | BB *dst; 30 | bool is_switch; 31 | uint64_t jmptab; 32 | unsigned offset; 33 | }; 34 | 35 | #endif /* NUCLEUS_EDGE_H */ 36 | 37 | -------------------------------------------------------------------------------- /endian.cc: -------------------------------------------------------------------------------- 1 | #include "endian.h" 2 | 3 | /* Detect host endianness */ 4 | #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 5 | #define NUCLEUS_HOST_LE 6 | #elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 7 | #define NUCLEUS_HOST_BE 8 | #endif 9 | 10 | /* Endian swap */ 11 | #define SWAP_16(x) ( \ 12 | (((x) >> 8) & 0x00FF) | (((x) << 8) & 0xFF00) \ 13 | ) 14 | #define SWAP_32(x) ( \ 15 | (((x) >> 24) & 0x000000FF) | (((x) >> 8) & 0x0000FF00) | \ 16 | (((x) << 8) & 0x00FF0000) | (((x) << 24) & 0xFF000000) \ 17 | ) 18 | #define SWAP_64(x) ( \ 19 | (((x) >> 56) & 0x00000000000000FF) | (((x) >> 40) & 0x000000000000FF00) | \ 20 | (((x) >> 24) & 0x0000000000FF0000) | (((x) >> 8) & 0x00000000FF000000) | \ 21 | (((x) << 8) & 0x000000FF00000000) | (((x) << 24) & 0x0000FF0000000000) | \ 22 | (((x) << 40) & 0x00FF000000000000) | (((x) << 56) & 0xFF00000000000000) \ 23 | ) 24 | 25 | 26 | /* Little-Endian reads */ 27 | uint16_t read_le_i16(const uint16_t* data) 28 | { 29 | uint16_t value = *data; 30 | #if defined(NUCLEUS_HOST_LE) 31 | return value; 32 | #elif defined(NUCLEUS_HOST_BE) 33 | return SWAP_16(value); 34 | #endif 35 | } 36 | 37 | uint32_t read_le_i32(const uint32_t* data) 38 | { 39 | uint32_t value = *data; 40 | #if defined(NUCLEUS_HOST_LE) 41 | return value; 42 | #elif defined(NUCLEUS_HOST_BE) 43 | return SWAP_32(value); 44 | #endif 45 | } 46 | 47 | uint64_t read_le_i64(const uint64_t* data) 48 | { 49 | uint64_t value = *data; 50 | #if defined(NUCLEUS_HOST_LE) 51 | return value; 52 | #elif defined(NUCLEUS_HOST_BE) 53 | return SWAP_64(value); 54 | #endif 55 | } 56 | 57 | 58 | /* Big-Endian reads */ 59 | uint16_t read_be_i16(const uint16_t* data) 60 | { 61 | uint16_t value = *data; 62 | #if defined(NUCLEUS_HOST_BE) 63 | return value; 64 | #elif defined(NUCLEUS_HOST_LE) 65 | return SWAP_16(value); 66 | #endif 67 | } 68 | 69 | uint32_t read_be_i32(const uint32_t* data) 70 | { 71 | uint32_t value = *data; 72 | #if defined(NUCLEUS_HOST_BE) 73 | return value; 74 | #elif defined(NUCLEUS_HOST_LE) 75 | return SWAP_32(value); 76 | #endif 77 | } 78 | 79 | uint64_t read_be_i64(const uint64_t* data) 80 | { 81 | uint64_t value = *data; 82 | #if defined(NUCLEUS_HOST_BE) 83 | return value; 84 | #elif defined(NUCLEUS_HOST_LE) 85 | return SWAP_64(value); 86 | #endif 87 | } 88 | -------------------------------------------------------------------------------- /endian.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_ENDIAN_H 2 | #define NUCLEUS_ENDIAN_H 3 | 4 | #include 5 | 6 | uint16_t read_le_i16(const uint16_t* data); 7 | uint32_t read_le_i32(const uint32_t* data); 8 | uint64_t read_le_i64(const uint64_t* data); 9 | 10 | uint16_t read_be_i16(const uint16_t* data); 11 | uint32_t read_be_i32(const uint32_t* data); 12 | uint64_t read_be_i64(const uint64_t* data); 13 | 14 | #endif /* NUCLEUS_ENDIAN_H */ 15 | -------------------------------------------------------------------------------- /exception.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | 5 | #include "log.h" 6 | #include "exception.h" 7 | 8 | 9 | void 10 | nucleus_terminate() 11 | { 12 | print_err("unhandled exception, terminating..."); 13 | exit(EXIT_FAILURE); 14 | } 15 | 16 | 17 | void 18 | set_exception_handlers() 19 | { 20 | std::set_terminate(nucleus_terminate); 21 | } 22 | 23 | -------------------------------------------------------------------------------- /exception.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_EXCEPTION_H 2 | #define NUCLEUS_EXCEPTION_H 3 | 4 | void set_exception_handlers(); 5 | 6 | #endif /* NUCLEUS_EXCEPTION_H */ 7 | 8 | -------------------------------------------------------------------------------- /export.cc: -------------------------------------------------------------------------------- 1 | #include "bb.h" 2 | #include "edge.h" 3 | #include "insn.h" 4 | #include "cfg.h" 5 | #include "log.h" 6 | #include "nucleus.h" 7 | #include "export.h" 8 | 9 | 10 | int 11 | export_bin2ida(std::string &fname, Binary *bin, std::list *disasm, CFG *cfg) 12 | { 13 | FILE *f; 14 | uint64_t entry; 15 | size_t i; 16 | 17 | f = fopen(fname.c_str(), "w"); 18 | if(!f) { 19 | print_err("cannot open file '%s' for writing", fname.c_str()); 20 | return -1; 21 | } 22 | 23 | fprintf(f, "\"\"\"\n"); 24 | fprintf(f, "Script generated by %s\n", NUCLEUS_VERSION); 25 | fprintf(f, "\"\"\"\n"); 26 | fprintf(f, "\n"); 27 | fprintf(f, "import idaapi\n"); 28 | fprintf(f, "import idautils\n"); 29 | fprintf(f, "import idc\n"); 30 | fprintf(f, "\n"); 31 | fprintf(f, "idaapi.autoWait()\n"); 32 | fprintf(f, "\n"); 33 | fprintf(f, "def mark_functions():\n"); 34 | fprintf(f, " functions = [\n"); 35 | i = 0; 36 | for(auto &func: cfg->functions) { 37 | if(func.entry.empty()) continue; 38 | entry = func.entry.front()->start; 39 | if(!(i % 5)) fprintf(f, " "); 40 | fprintf(f, "0x%jx, ", entry); 41 | if(!(++i % 5)) fprintf(f, "\n"); 42 | } 43 | fprintf(f, " ]\n"); 44 | fprintf(f, " for seg in idautils.Segments():\n"); 45 | fprintf(f, " if idaapi.segtype(idc.SegStart(seg)) != idaapi.SEG_CODE:\n"); 46 | fprintf(f, " continue\n"); 47 | fprintf(f, " for f in idautils.Functions(idc.SegStart(seg), idc.SegEnd(seg)):\n"); 48 | fprintf(f, " print 'nucleus: deleting function 0x%%x' %% (f)\n"); 49 | fprintf(f, " idc.DelFunction(f)\n"); 50 | fprintf(f, " for f in functions:\n"); 51 | fprintf(f, " print 'nucleus: defining function 0x%%x' %% (f)\n"); 52 | fprintf(f, " if idc.MakeCode(f):\n"); 53 | fprintf(f, " idc.MakeFunction(f)\n"); 54 | fprintf(f, "\n"); 55 | fprintf(f, "mark_functions()\n"); 56 | 57 | fclose(f); 58 | 59 | return 0; 60 | } 61 | 62 | int 63 | export_bin2binja(std::string &fname, Binary *bin, std::list *disasm, CFG *cfg) 64 | { 65 | FILE *f; 66 | uint64_t entry; 67 | size_t i; 68 | 69 | f = fopen(fname.c_str(), "w"); 70 | if(!f) { 71 | print_err("cannot open file '%s' for writing", fname.c_str()); 72 | return -1; 73 | } 74 | 75 | fprintf(f, "\"\"\"\n"); 76 | fprintf(f, "Script generated by %s\n", NUCLEUS_VERSION); 77 | fprintf(f, "\"\"\"\n"); 78 | fprintf(f, "\n"); 79 | fprintf(f, "import binaryninja\n"); 80 | fprintf(f, "\n"); 81 | fprintf(f, "def mark_functions():\n"); 82 | fprintf(f, " functions = [\n"); 83 | i = 0; 84 | for(auto &func: cfg->functions) { 85 | if(func.entry.empty()) continue; 86 | entry = func.entry.front()->start; 87 | if(!(i % 5)) fprintf(f, " "); 88 | fprintf(f, "0x%jx, ", entry); 89 | if(!(++i % 5)) fprintf(f, "\n"); 90 | } 91 | fprintf(f, " ]\n"); 92 | fprintf(f, " for f in bv.functions:\n"); 93 | fprintf(f, " bv.remove_function(f)\n"); 94 | fprintf(f, " for f in functions:\n"); 95 | fprintf(f, " print 'nucleus: defining function 0x%%x' %% (f)\n"); 96 | fprintf(f, " bv.add_function(f)\n"); 97 | fprintf(f, "\n"); 98 | fprintf(f, "mark_functions()\n"); 99 | 100 | fclose(f); 101 | 102 | return 0; 103 | } 104 | 105 | 106 | int 107 | export_cfg2dot(std::string &fname, CFG *cfg) 108 | { 109 | FILE *f; 110 | BB *bb; 111 | 112 | f = fopen(fname.c_str(), "w"); 113 | if(!f) { 114 | print_err("cannot open file '%s' for writing", fname.c_str()); 115 | return -1; 116 | } 117 | 118 | fprintf(f, "digraph G {\n\n"); 119 | for(auto &kv : cfg->start2bb) { 120 | bb = kv.second; 121 | for(auto &e : bb->targets) { 122 | fprintf(f, "bb_%jx -> bb_%jx [ label=\"%s\" ];\n", e.src->start, e.dst->start, e.type2str().c_str()); 123 | } 124 | } 125 | fprintf(f, "}\n"); 126 | 127 | fclose(f); 128 | 129 | return 0; 130 | } 131 | 132 | -------------------------------------------------------------------------------- /export.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_EXPORT_H 2 | #define NUCLEUS_EXPORT_H 3 | 4 | #include 5 | #include 6 | 7 | #include "loader.h" 8 | #include "disasm.h" 9 | #include "cfg.h" 10 | 11 | int export_bin2ida (std::string &fname, Binary *bin, std::list *disasm, CFG *cfg); 12 | int export_bin2binja (std::string &fname, Binary *bin, std::list *disasm, CFG *cfg); 13 | int export_cfg2dot (std::string &fname, CFG *cfg); 14 | 15 | #endif /* NUCLEUS_EXPORT_H */ 16 | 17 | -------------------------------------------------------------------------------- /function.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include 5 | 6 | #include "cfg.h" 7 | #include "bb.h" 8 | #include "util.h" 9 | #include "function.h" 10 | 11 | 12 | uint64_t Function::global_id = 0; 13 | 14 | void 15 | Function::print(FILE *out) 16 | { 17 | size_t i; 18 | unsigned offset; 19 | 20 | if(entry.empty()) { 21 | fprintf(out, "function %ju: start@0x%016jx end@0x%016jx (entry point unknown)\n", id, start, end); 22 | } else { 23 | i = 0; 24 | for(auto entry_bb: entry) { 25 | offset = 0; 26 | for(auto &e: entry_bb->ancestors) { 27 | if(e.type == Edge::EDGE_TYPE_CALL) offset = e.offset; 28 | } 29 | if(i == 0) { 30 | fprintf(out, "function %ju: entry@0x%016jx %ju bytes\n", id, entry_bb->start + offset, (end-entry_bb->start)); 31 | if(entry.size() > 1) { 32 | fprintf(out, "/-- alternative entry points:\n"); 33 | } 34 | } else { 35 | fprintf(out, "/-- 0x%016jx\n", entry_bb->start + offset); 36 | } 37 | i++; 38 | } 39 | } 40 | for(auto &bb: BBs) { 41 | fprintf(out, " BB@0x%016jx\n", bb->start); 42 | } 43 | } 44 | 45 | 46 | void 47 | Function::print_summary(FILE *out) 48 | { 49 | BB *entry_bb; 50 | unsigned offset; 51 | 52 | if(entry.empty()) { 53 | fprintf(out, "0x0\t\t\t%ju\n", end-start); 54 | } else { 55 | entry_bb = entry.front(); 56 | offset = 0; 57 | for(auto &e: entry_bb->ancestors) { 58 | if(e.type == Edge::EDGE_TYPE_CALL) offset = e.offset; 59 | } 60 | fprintf(out, "0x%016jx\t%ju\n", entry_bb->start + offset, (end-entry_bb->start)); 61 | } 62 | } 63 | 64 | 65 | void 66 | Function::find_entry() 67 | { 68 | bool reached_directly; 69 | std::list called; 70 | std::list headers; 71 | 72 | /* Entries are sorted by priority as follows: 73 | * (1) Called BBs in order of increasing address 74 | * (2) Ancestor-less BBs in order of increasing address 75 | * (3) Starting address of the function (only if no other entry found) 76 | */ 77 | 78 | for(auto bb: this->BBs) { 79 | if(bb->is_called()) { 80 | called.push_back(bb); 81 | } 82 | } 83 | 84 | called.sort(compare_ptr); 85 | for(auto bb: called) this->entry.push_back(bb); 86 | 87 | for(auto bb: this->BBs) { 88 | reached_directly = false; 89 | for(auto &e: bb->ancestors) { 90 | if(e.offset == 0) reached_directly = true; 91 | } 92 | if(!reached_directly) { 93 | headers.push_back(bb); 94 | } 95 | } 96 | 97 | headers.sort(compare_ptr); 98 | for(auto bb: headers) this->entry.push_back(bb); 99 | 100 | if(this->entry.empty()) { 101 | if(this->cfg->start2bb.count(start)) { 102 | this->entry.push_back(this->cfg->start2bb[start]); 103 | } 104 | } 105 | } 106 | 107 | 108 | void 109 | Function::add_bb(BB *bb) 110 | { 111 | this->BBs.push_back(bb); 112 | if(!this->start || (bb->start < this->start)) { 113 | this->start = bb->start; 114 | } 115 | if(!this->end || (bb->end > this->end)) { 116 | if(!(bb->insns.back().flags & Instruction::INS_FLAG_NOP)) this->end = bb->end; 117 | } 118 | bb->function = this; 119 | } 120 | 121 | -------------------------------------------------------------------------------- /function.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_FUNCTION_H 2 | #define NUCLEUS_FUNCTION_H 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include "bb.h" 10 | 11 | class CFG; 12 | 13 | class Function { 14 | public: 15 | Function() : cfg(NULL), start(0), end(0) { id = global_id++; } 16 | 17 | void print (FILE *out); 18 | void print_summary (FILE *out); 19 | 20 | void find_entry (); 21 | void add_bb (BB *bb); 22 | 23 | CFG *cfg; 24 | uint64_t id; 25 | uint64_t start; 26 | uint64_t end; 27 | std::list entry; 28 | std::list BBs; 29 | 30 | private: 31 | static uint64_t global_id; 32 | }; 33 | 34 | #endif /* NUCLEUS_FUNCTION_H */ 35 | 36 | -------------------------------------------------------------------------------- /insn.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include "edge.h" 4 | #include "insn.h" 5 | 6 | 7 | void 8 | Instruction::print(FILE *out) 9 | { 10 | fprintf(out, " 0x%016jx %s\t%s\n", start, mnem.c_str(), op_str.c_str()); 11 | } 12 | 13 | 14 | Edge::EdgeType 15 | Instruction::edge_type() 16 | { 17 | if(flags & INS_FLAG_JMP) { 18 | return (flags & INS_FLAG_INDIRECT) ? Edge::EDGE_TYPE_JMP_INDIRECT : Edge::EDGE_TYPE_JMP; 19 | } else if(flags & INS_FLAG_CALL) { 20 | return (flags & INS_FLAG_INDIRECT) ? Edge::EDGE_TYPE_CALL_INDIRECT : Edge::EDGE_TYPE_CALL; 21 | } else if(flags & INS_FLAG_RET) { 22 | return Edge::EDGE_TYPE_RET; 23 | } else { 24 | return Edge::EDGE_TYPE_NONE; 25 | } 26 | } 27 | 28 | -------------------------------------------------------------------------------- /insn.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_INSN_H 2 | #define NUCLEUS_INSN_H 3 | 4 | #include 5 | #include 6 | 7 | #include 8 | 9 | #include 10 | #include 11 | 12 | #include "edge.h" 13 | 14 | class Operand { 15 | public: 16 | enum OperandType { 17 | OP_TYPE_NONE = 0, 18 | OP_TYPE_REG = 1, 19 | OP_TYPE_IMM = 2, 20 | OP_TYPE_MEM = 3, 21 | OP_TYPE_FP = 4 22 | }; 23 | 24 | union AArch64Value { 25 | AArch64Value() { reg = ARM64_REG_INVALID; imm = 0; fp = 0; mem.base = 0; mem.index = 0; mem.disp = 0; } 26 | AArch64Value(const AArch64Value &v) { mem.base = v.mem.base; 27 | mem.index = v.mem.index; mem.disp = v.mem.disp; } 28 | 29 | arm64_reg reg; 30 | int32_t imm; 31 | double fp; 32 | arm64_op_mem mem; 33 | }; 34 | 35 | union ARMValue { 36 | ARMValue() { reg = ARM_REG_INVALID; imm = 0; fp = 0; mem.base = 0; mem.index = 0; mem.scale = 0; mem.disp = 0; } 37 | ARMValue(const ARMValue &v) { mem.base = v.mem.base; mem.index = v.mem.index; 38 | mem.scale = v.mem.scale; mem.disp = v.mem.disp; } 39 | 40 | arm_reg reg; 41 | int32_t imm; 42 | double fp; 43 | arm_op_mem mem; 44 | }; 45 | 46 | union MIPSValue { 47 | MIPSValue() { reg = MIPS_REG_INVALID; imm = 0; fp = 0; mem.base = 0; mem.disp = 0; } 48 | MIPSValue(const MIPSValue &v) { mem.base = v.mem.base; mem.disp = v.mem.disp; } 49 | 50 | mips_reg reg; 51 | int32_t imm; 52 | double fp; 53 | mips_op_mem mem; 54 | }; 55 | 56 | union PPCValue { 57 | PPCValue() { reg = PPC_REG_INVALID; imm = 0; mem.base = 0; mem.disp = 0; } 58 | PPCValue(const PPCValue &v) { mem.base = v.mem.base; mem.disp = v.mem.disp; } 59 | 60 | ppc_reg reg; 61 | int32_t imm; 62 | ppc_op_mem mem; 63 | }; 64 | 65 | union X86Value { 66 | X86Value() { reg = X86_REG_INVALID; imm = 0; fp = 0; mem.segment = 0; mem.base = 0; mem.index = 0; mem.scale = 0; mem.disp = 0; } 67 | X86Value(const X86Value &v) { mem.segment = v.mem.segment; mem.base = v.mem.base; 68 | mem.index = v.mem.index; mem.scale = v.mem.scale; 69 | mem.disp = v.mem.disp; } 70 | 71 | x86_reg reg; 72 | int64_t imm; 73 | double fp; 74 | x86_op_mem mem; 75 | }; 76 | 77 | Operand() : type(OP_TYPE_NONE), size(0), x86_value() {} 78 | Operand(const Operand &op) : type(op.type), size(op.size), x86_value(op.x86_value) {} 79 | 80 | uint8_t type; 81 | uint8_t size; 82 | 83 | union { 84 | AArch64Value aarch64_value; /* Only set if the arch is aarch64 */ 85 | ARMValue arm_value; /* Only set if the arch is arm */ 86 | MIPSValue mips_value; /* Only set if the arch is mips */ 87 | PPCValue ppc_value; /* Only set if the arch is ppc */ 88 | X86Value x86_value; /* Only set if the arch is x86 */ 89 | }; 90 | }; 91 | 92 | class Instruction { 93 | public: 94 | enum InstructionFlags { 95 | INS_FLAG_CFLOW = 0x001, 96 | INS_FLAG_COND = 0x002, 97 | INS_FLAG_INDIRECT = 0x004, 98 | INS_FLAG_JMP = 0x008, 99 | INS_FLAG_CALL = 0x010, 100 | INS_FLAG_RET = 0x020, 101 | INS_FLAG_NOP = 0x040 102 | }; 103 | 104 | Instruction() : id(0), start(0), size(0), addr_size(0), target(0), flags(0), invalid(false), privileged(false), trap(false) {} 105 | Instruction(const Instruction &i) : id(i.id), start(i.start), size(i.size), addr_size(i.addr_size), target(i.target), flags(i.flags), 106 | mnem(i.mnem), op_str(i.op_str), operands(i.operands), invalid(i.invalid), privileged(i.privileged), trap(i.trap) {} 107 | 108 | void print (FILE *out); 109 | Edge::EdgeType edge_type (); 110 | 111 | unsigned int id; 112 | uint64_t start; 113 | uint8_t size; 114 | uint8_t addr_size; 115 | uint64_t target; 116 | unsigned short flags; 117 | std::string mnem; 118 | std::string op_str; 119 | std::vector operands; 120 | bool invalid; 121 | bool privileged; 122 | bool trap; 123 | }; 124 | 125 | class X86Instruction : public Instruction { 126 | public: 127 | static const uint8_t MAX_LEN = 16; 128 | 129 | X86Instruction() : Instruction() {} 130 | X86Instruction(const X86Instruction &i) : Instruction(i) {} 131 | }; 132 | 133 | #endif /* NUCLEUS_INSN_H */ 134 | 135 | -------------------------------------------------------------------------------- /loader.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | 7 | #include 8 | #include 9 | 10 | #include 11 | 12 | #include "log.h" 13 | #include "options.h" 14 | #include "loader.h" 15 | 16 | 17 | const char *binary_types_descr[][2] = { 18 | {"auto", "Try to automatically determine binary format (default)"}, 19 | {"raw" , "Raw binary (memory dump, ROM, network capture, ...)"}, 20 | {"elf" , "Unix ELF"}, 21 | {"pe" , "Windows PE"}, 22 | {NULL , NULL} 23 | }; 24 | 25 | const char *binary_arch_descr[][2] = { 26 | {"auto" , "Try to automatically determine architecture (default)"}, 27 | {"aarch64" , "aarch64 (experimental)"}, 28 | {"arm" , "arm (experimental)"}, 29 | {"mips" , "mips (experimental)"}, 30 | {"ppc" , "ppc: Specify ppc-32 or ppc-64 (default ppc-64, experimental)"}, 31 | {"x86" , "x86: Specify x86-16, x86-32 or x86-64 (default x86-64)"}, 32 | {NULL , NULL} 33 | }; 34 | 35 | 36 | static bfd* 37 | open_bfd(std::string &fname) 38 | { 39 | static int bfd_inited = 0; 40 | 41 | bfd *bin; 42 | 43 | if(!bfd_inited) { 44 | bfd_init(); 45 | bfd_inited = 1; 46 | } 47 | 48 | bin = bfd_openr(fname.c_str(), NULL); 49 | if(!bin) { 50 | print_err("failed to open binary '%s' (%s)", fname.c_str(), bfd_errmsg(bfd_get_error())); 51 | return NULL; 52 | } 53 | 54 | if(!bfd_check_format(bin, bfd_object)) { 55 | print_err("file '%s' does not look like a binary object (%s), maybe load as raw?", fname.c_str(), bfd_errmsg(bfd_get_error())); 56 | return NULL; 57 | } 58 | 59 | /* Some versions of bfd_check_format pessimistically set a wrong_format 60 | * error before detecting the format, and then neglect to unset it once 61 | * the format has been detected. We unset it manually to prevent problems. */ 62 | bfd_set_error(bfd_error_no_error); 63 | 64 | if(bfd_get_flavour(bin) == bfd_target_unknown_flavour) { 65 | print_err("unrecognized format for binary '%s' (%s)", fname.c_str(), bfd_errmsg(bfd_get_error())); 66 | return NULL; 67 | } 68 | 69 | verbose(2, "binary '%s' has format '%s'", fname.c_str(), bin->xvec->name); 70 | 71 | return bin; 72 | } 73 | 74 | 75 | int 76 | load_symbols_bfd(bfd *bfd_h, Binary *bin) 77 | { 78 | int ret; 79 | long n, nsyms, i; 80 | asymbol **bfd_symtab; 81 | Symbol *sym; 82 | 83 | bfd_symtab = NULL; 84 | 85 | n = bfd_get_symtab_upper_bound(bfd_h); 86 | if(n < 0) { 87 | print_err("failed to read symtab (%s)", bfd_errmsg(bfd_get_error())); 88 | goto fail; 89 | } else if(n) { 90 | bfd_symtab = (asymbol**)malloc(n); 91 | if(!bfd_symtab) { 92 | print_err("out of memory"); 93 | goto fail; 94 | } 95 | nsyms = bfd_canonicalize_symtab(bfd_h, bfd_symtab); 96 | if(nsyms < 0) { 97 | print_err("failed to read symtab (%s)", bfd_errmsg(bfd_get_error())); 98 | goto fail; 99 | } 100 | for(i = 0; i < nsyms; i++) { 101 | if(bfd_symtab[i]->flags & BSF_FUNCTION) { 102 | bin->symbols.push_back(Symbol()); 103 | sym = &bin->symbols.back(); 104 | sym->type |= Symbol::SYM_TYPE_FUNC; 105 | sym->name = std::string(bfd_symtab[i]->name); 106 | sym->addr = bfd_asymbol_value(bfd_symtab[i]); 107 | } 108 | } 109 | } 110 | 111 | ret = 0; 112 | goto cleanup; 113 | 114 | fail: 115 | ret = -1; 116 | 117 | cleanup: 118 | if(bfd_symtab) free(bfd_symtab); 119 | 120 | return ret; 121 | } 122 | 123 | 124 | int 125 | load_dynsym_bfd(bfd *bfd_h, Binary *bin) 126 | { 127 | int ret; 128 | long n, nsyms, i; 129 | asymbol **bfd_dynsym; 130 | Symbol *sym; 131 | 132 | bfd_dynsym = NULL; 133 | 134 | n = bfd_get_dynamic_symtab_upper_bound(bfd_h); 135 | if(n < 0) { 136 | print_err("failed to read dynamic symtab (%s)", bfd_errmsg(bfd_get_error())); 137 | goto fail; 138 | } else if(n) { 139 | bfd_dynsym = (asymbol**)malloc(n); 140 | if(!bfd_dynsym) { 141 | print_err("out of memory"); 142 | goto fail; 143 | } 144 | nsyms = bfd_canonicalize_dynamic_symtab(bfd_h, bfd_dynsym); 145 | if(nsyms < 0) { 146 | print_err("failed to read dynamic symtab (%s)", bfd_errmsg(bfd_get_error())); 147 | goto fail; 148 | } 149 | for(i = 0; i < nsyms; i++) { 150 | if(bfd_dynsym[i]->flags & BSF_FUNCTION) { 151 | bin->symbols.push_back(Symbol()); 152 | sym = &bin->symbols.back(); 153 | sym->type |= Symbol::SYM_TYPE_FUNC; 154 | sym->name = std::string(bfd_dynsym[i]->name); 155 | sym->addr = bfd_asymbol_value(bfd_dynsym[i]); 156 | } 157 | } 158 | } 159 | 160 | ret = 0; 161 | goto cleanup; 162 | 163 | fail: 164 | ret = -1; 165 | 166 | cleanup: 167 | if(bfd_dynsym) free(bfd_dynsym); 168 | 169 | return ret; 170 | } 171 | 172 | 173 | int 174 | load_dynrelocs_bfd(bfd *bfd_h, Binary *bin) 175 | { 176 | int ret; 177 | long nsyms, nrels, relsize, i; 178 | unsigned int symsize; 179 | asymbol **bfd_symtab = nullptr; 180 | arelent **bfd_relocs = nullptr; 181 | reloc_howto_type *bfd_howto; 182 | 183 | bfd_symtab = nullptr; 184 | bfd_relocs = nullptr; 185 | relsize = bfd_get_dynamic_reloc_upper_bound(bfd_h); 186 | if(relsize == 0) { 187 | return 0; 188 | } 189 | if(relsize < 0) { 190 | print_err("failed to read dynamic relocations size (%s)", bfd_errmsg(bfd_get_error())); 191 | goto fail; 192 | } 193 | nsyms = bfd_read_minisymbols(bfd_h, TRUE, (void**)&bfd_symtab, &symsize); 194 | if(nsyms < 0) { 195 | print_err("failed to read symtab (%s)", bfd_errmsg(bfd_get_error())); 196 | goto fail; 197 | } 198 | 199 | bfd_relocs = (arelent**)malloc(relsize); 200 | nrels = bfd_canonicalize_dynamic_reloc(bfd_h, bfd_relocs, bfd_symtab); 201 | if(nrels < 0) { 202 | print_err("failed to read dynamic relocations (%s)", bfd_errmsg(bfd_get_error())); 203 | goto fail; 204 | } 205 | /* Apply relocations */ 206 | for(i = 0; i < nrels; i++) { 207 | arelent *bfd_reloc = bfd_relocs[i]; 208 | asymbol *bfd_symbol = *(bfd_reloc->sym_ptr_ptr); 209 | bfd_howto = bfd_reloc->howto; 210 | 211 | for(const auto& sec : bin->sections) { 212 | /* Apply relocation to data of any executable section within range */ 213 | size_t bytesize = (bfd_howto->bitsize / 8); 214 | if(bfd_reloc->address < sec.vma || 215 | bfd_reloc->address > sec.vma + sec.size - bytesize || 216 | sec.type != Section::SEC_TYPE_CODE) { 217 | continue; 218 | } 219 | /* Compute relocation value */ 220 | bfd_vma relocation = 0; 221 | if(bfd_is_com_section(bfd_symbol->section)) { 222 | relocation = bfd_symbol->value; 223 | } 224 | relocation += bfd_reloc->addend; 225 | relocation >>= (bfd_vma)bfd_howto->rightshift; 226 | relocation <<= (bfd_vma)bfd_howto->bitpos; 227 | 228 | /* Patch data */ 229 | #define APPLY_RELOC(x) \ 230 | ((x & ~bfd_howto->dst_mask) | (((x & bfd_howto->src_mask) + relocation) & bfd_howto->dst_mask)) 231 | 232 | bfd_vma data_offset = bfd_reloc->address * bfd_octets_per_byte(bfd_h); 233 | bfd_byte* data = sec.bytes + (data_offset - sec.vma); 234 | 235 | switch (bfd_howto->size) { 236 | case 0: bfd_put_8 (bfd_h, APPLY_RELOC(bfd_get_8 (bfd_h, data)), data); break; 237 | case 1: bfd_put_16(bfd_h, APPLY_RELOC(bfd_get_16(bfd_h, data)), data); break; 238 | case 2: bfd_put_32(bfd_h, APPLY_RELOC(bfd_get_32(bfd_h, data)), data); break; 239 | case 4: bfd_put_64(bfd_h, APPLY_RELOC(bfd_get_64(bfd_h, data)), data); break; 240 | default: 241 | print_err("unsupported relocation size (%d)", bfd_howto->size); 242 | goto fail; 243 | } 244 | #undef APPLY_RELOC 245 | } 246 | } 247 | 248 | ret = 0; 249 | goto cleanup; 250 | 251 | fail: 252 | ret = -1; 253 | 254 | cleanup: 255 | if(bfd_symtab) free(bfd_symtab); 256 | if(bfd_relocs) free(bfd_relocs); 257 | 258 | return ret; 259 | } 260 | 261 | 262 | int 263 | load_sections_bfd(bfd *bfd_h, Binary *bin) 264 | { 265 | int bfd_flags, sectype; 266 | uint64_t vma, size; 267 | const char *secname; 268 | asection* bfd_sec; 269 | Section *sec; 270 | 271 | for(bfd_sec = bfd_h->sections; bfd_sec; bfd_sec = bfd_sec->next) { 272 | bfd_flags = bfd_get_section_flags(bfd_h, bfd_sec); 273 | 274 | sectype = Section::SEC_TYPE_NONE; 275 | if(bfd_flags & SEC_CODE) { 276 | sectype |= Section::SEC_TYPE_CODE; 277 | } else if(bfd_flags & SEC_DATA) { 278 | sectype |= Section::SEC_TYPE_DATA; 279 | } else { 280 | continue; 281 | } 282 | 283 | vma = bfd_section_vma(bfd_h, bfd_sec); 284 | size = bfd_section_size(bfd_h, bfd_sec); 285 | secname = bfd_section_name(bfd_h, bfd_sec); 286 | if(!secname) secname = ""; 287 | 288 | bin->sections.push_back(Section()); 289 | sec = &bin->sections.back(); 290 | 291 | sec->binary = bin; 292 | sec->name = std::string(secname); 293 | sec->type = sectype; 294 | sec->vma = vma; 295 | sec->size = size; 296 | sec->bytes = (uint8_t*)malloc(size); 297 | if(!sec->bytes) { 298 | print_err("out of memory"); 299 | return -1; 300 | } 301 | 302 | if(!bfd_get_section_contents(bfd_h, bfd_sec, sec->bytes, 0, size)) { 303 | print_err("failed to read section '%s' (%s)", secname, bfd_errmsg(bfd_get_error())); 304 | return -1; 305 | } 306 | } 307 | 308 | return 0; 309 | } 310 | 311 | 312 | int 313 | load_binary_bfd(std::string &fname, Binary *bin, Binary::BinaryType type) 314 | { 315 | int ret; 316 | bfd *bfd_h; 317 | const bfd_arch_info_type *bfd_info; 318 | 319 | bfd_h = NULL; 320 | 321 | bfd_h = open_bfd(fname); 322 | if(!bfd_h) { 323 | goto fail; 324 | } 325 | 326 | bin->filename = std::string(fname); 327 | bin->entry = bfd_get_start_address(bfd_h); 328 | 329 | bin->type_str = std::string(bfd_h->xvec->name); 330 | switch(bfd_h->xvec->flavour) { 331 | case bfd_target_elf_flavour: 332 | bin->type = Binary::BIN_TYPE_ELF; 333 | break; 334 | case bfd_target_coff_flavour: 335 | bin->type = Binary::BIN_TYPE_PE; 336 | break; 337 | case bfd_target_unknown_flavour: 338 | default: 339 | print_err("unsupported binary type (%s)", bfd_h->xvec->name); 340 | goto fail; 341 | } 342 | 343 | bfd_info = bfd_get_arch_info(bfd_h); 344 | bin->arch_str = std::string(bfd_info->printable_name); 345 | switch(bfd_info->arch) { 346 | case bfd_arch_i386: 347 | switch(bfd_info->mach) { 348 | case bfd_mach_i386_i386: 349 | bin->arch = Binary::ARCH_X86; 350 | bin->bits = 32; 351 | break; 352 | case bfd_mach_x86_64: 353 | bin->arch = Binary::ARCH_X86; 354 | bin->bits = 64; 355 | break; 356 | default: 357 | goto fail_arch; 358 | } 359 | break; 360 | 361 | case bfd_arch_arm: 362 | switch(bfd_info->mach) { 363 | case bfd_mach_arm_5T: 364 | bin->arch = Binary::ARCH_ARM; 365 | bin->bits = 32; 366 | break; 367 | default: 368 | goto fail_arch; 369 | } 370 | break; 371 | 372 | case bfd_arch_aarch64: 373 | switch(bfd_info->mach) { 374 | case bfd_mach_aarch64: 375 | case bfd_mach_aarch64_ilp32: 376 | bin->arch = Binary::ARCH_AARCH64; 377 | bin->bits = 64; 378 | break; 379 | default: 380 | goto fail_arch; 381 | } 382 | break; 383 | 384 | case bfd_arch_mips: 385 | switch(bfd_info->mach) { 386 | case bfd_mach_mips16: 387 | bin->arch = Binary::ARCH_MIPS; 388 | bin->bits = 16; 389 | break; 390 | case bfd_mach_mipsisa32r2: 391 | bin->arch = Binary::ARCH_MIPS; 392 | bin->bits = 32; 393 | break; 394 | case bfd_mach_mipsisa64: 395 | bin->arch = Binary::ARCH_MIPS; 396 | bin->bits = 64; 397 | break; 398 | default: 399 | goto fail_arch; 400 | } 401 | break; 402 | 403 | case bfd_arch_powerpc: 404 | switch(bfd_info->mach) { 405 | case bfd_mach_ppc: 406 | bin->arch = Binary::ARCH_PPC; 407 | bin->bits = 32; 408 | break; 409 | case bfd_mach_ppc64: 410 | bin->arch = Binary::ARCH_PPC; 411 | bin->bits = 64; 412 | break; 413 | default: 414 | goto fail_arch; 415 | } 416 | break; 417 | 418 | default: 419 | fail_arch: 420 | print_err("unsupported architecture (%s, [%u, %u])", bfd_info->printable_name, bfd_info->arch, bfd_info->mach); 421 | goto fail; 422 | } 423 | 424 | /* Symbol handling is best-effort only (they may not even be present) */ 425 | load_symbols_bfd(bfd_h, bin); 426 | load_dynsym_bfd(bfd_h, bin); 427 | 428 | if(load_sections_bfd(bfd_h, bin) < 0) goto fail; 429 | 430 | /* Apply relocations if necessary */ 431 | if (bin->arch == Binary::ARCH_PPC) { 432 | load_dynrelocs_bfd(bfd_h, bin); 433 | } 434 | 435 | ret = 0; 436 | goto cleanup; 437 | 438 | fail: 439 | ret = -1; 440 | 441 | cleanup: 442 | if(bfd_h) bfd_close(bfd_h); 443 | 444 | return ret; 445 | } 446 | 447 | 448 | int 449 | load_binary_raw(std::string &fname, Binary *bin, Binary::BinaryType type) 450 | { 451 | int ret; 452 | long fsize; 453 | FILE *f; 454 | Section *sec; 455 | 456 | f = NULL; 457 | 458 | bin->filename = std::string(fname); 459 | bin->type = type; 460 | bin->type_str = std::string("raw"); 461 | 462 | if(options.binary.arch == Binary::ARCH_NONE) { 463 | print_err("cannot determine binary architecture, specify manually"); 464 | goto fail; 465 | } 466 | bin->arch = options.binary.arch; 467 | bin->bits = options.binary.bits; 468 | bin->arch_str = std::string(binary_arch_descr[(int)options.binary.arch][0]); 469 | bin->entry = 0; 470 | 471 | if(!bin->bits) { 472 | switch(bin->arch) { 473 | case Binary::ARCH_X86: 474 | bin->bits = 64; 475 | break; 476 | default: 477 | break; 478 | } 479 | } 480 | 481 | bin->sections.push_back(Section()); 482 | sec = &bin->sections.back(); 483 | 484 | sec->binary = bin; 485 | sec->name = std::string("raw"); 486 | sec->type = Section::SEC_TYPE_CODE; 487 | sec->vma = options.binary.base_vma; 488 | 489 | f = fopen(fname.c_str(), "rb"); 490 | if(!f) { 491 | print_err("failed to open binary '%s' (%s)", fname.c_str(), strerror(errno)); 492 | goto fail; 493 | } 494 | 495 | fseek(f, 0L, SEEK_END); 496 | fsize = ftell(f); 497 | if(fsize <= 0) { 498 | print_err("binary '%s' appears to be empty", fname.c_str()); 499 | goto fail; 500 | } 501 | 502 | sec->size = (uint64_t)fsize; 503 | sec->bytes = (uint8_t*)malloc(fsize); 504 | if(!sec->bytes) { 505 | print_err("out of memory"); 506 | goto fail; 507 | } 508 | 509 | fseek(f, 0L, SEEK_SET); 510 | if(fread(sec->bytes, 1, fsize, f) != (size_t)fsize) { 511 | print_err("failed to read binary '%s'", fname.c_str()); 512 | goto fail; 513 | } 514 | 515 | ret = 0; 516 | goto cleanup; 517 | 518 | fail: 519 | ret = -1; 520 | 521 | cleanup: 522 | if(f) { 523 | fclose(f); 524 | } 525 | 526 | return ret; 527 | } 528 | 529 | 530 | int 531 | load_binary(std::string &fname, Binary *bin, Binary::BinaryType type) 532 | { 533 | if(type == Binary::BIN_TYPE_RAW) { 534 | return load_binary_raw(fname, bin, type); 535 | } else { 536 | return load_binary_bfd(fname, bin, type); 537 | } 538 | } 539 | 540 | 541 | void 542 | unload_binary(Binary *bin) 543 | { 544 | size_t i; 545 | Section *sec; 546 | 547 | for(i = 0; i < bin->sections.size(); i++) { 548 | sec = &bin->sections[i]; 549 | if(sec->bytes) { 550 | free(sec->bytes); 551 | } 552 | } 553 | } 554 | -------------------------------------------------------------------------------- /loader.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_LOADER_H 2 | #define NUCLEUS_LOADER_H 3 | 4 | #include 5 | #include 6 | #include 7 | 8 | class Binary; 9 | class Section; 10 | class Symbol; 11 | 12 | class Symbol { 13 | public: 14 | enum SymbolType { 15 | SYM_TYPE_UKN = 0x000, 16 | SYM_TYPE_FUNC = 0x001 17 | }; 18 | 19 | Symbol() : type(SYM_TYPE_UKN), name(), addr(0) {} 20 | 21 | unsigned type; 22 | std::string name; 23 | uint64_t addr; 24 | }; 25 | 26 | class Section { 27 | public: 28 | enum SectionType { 29 | SEC_TYPE_NONE = 0, 30 | SEC_TYPE_CODE = 1, 31 | SEC_TYPE_DATA = 2 32 | }; 33 | 34 | Section() : binary(NULL), type(0), vma(0), size(0), bytes(NULL) {} 35 | 36 | bool contains (uint64_t addr) { return (addr >= vma) && (addr-vma < size); } 37 | bool is_import_table () { return name == ".plt"; } 38 | 39 | Binary *binary; 40 | std::string name; 41 | unsigned type; 42 | uint64_t vma; 43 | uint64_t size; 44 | uint8_t *bytes; 45 | }; 46 | 47 | class Binary { 48 | public: 49 | enum BinaryType { 50 | BIN_TYPE_AUTO = 0, 51 | BIN_TYPE_RAW = 1, 52 | BIN_TYPE_ELF = 2, 53 | BIN_TYPE_PE = 3 54 | }; 55 | enum BinaryArch { 56 | ARCH_NONE = 0, 57 | ARCH_AARCH64 = 1, 58 | ARCH_ARM = 2, 59 | ARCH_MIPS = 3, 60 | ARCH_PPC = 4, 61 | ARCH_X86 = 5 62 | }; 63 | 64 | Binary() : type(0), arch(0), bits(0), entry(0) {} 65 | 66 | std::string filename; 67 | unsigned type; 68 | std::string type_str; 69 | unsigned arch; 70 | std::string arch_str; 71 | unsigned bits; 72 | uint64_t entry; 73 | std::vector
sections; 74 | std::vector symbols; 75 | }; 76 | 77 | int load_binary (std::string &fname, Binary *bin, Binary::BinaryType type); 78 | void unload_binary (Binary *bin); 79 | 80 | #endif /* NUCLEUS_LOADER_H */ 81 | 82 | -------------------------------------------------------------------------------- /log.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | 4 | #include "options.h" 5 | #include "log.h" 6 | 7 | #define ERROUT stderr 8 | 9 | 10 | void 11 | verbose(int level, char const *fmt, ...) 12 | { 13 | va_list args; 14 | 15 | if(options.verbosity >= level) { 16 | va_start(args, fmt); 17 | vprintf(fmt, args); 18 | printf("\n"); 19 | va_end(args); 20 | } 21 | } 22 | 23 | 24 | void 25 | print_warn(char const *fmt, ...) 26 | { 27 | va_list args; 28 | 29 | if(options.warnings) { 30 | va_start(args, fmt); 31 | fprintf(ERROUT, "WARNING: "); 32 | vfprintf(ERROUT, fmt, args); 33 | fprintf(ERROUT, "\n"); 34 | va_end(args); 35 | } 36 | } 37 | 38 | 39 | void 40 | print_err(char const *fmt, ...) 41 | { 42 | va_list args; 43 | 44 | va_start(args, fmt); 45 | fprintf(ERROUT, "ERROR: "); 46 | vfprintf(ERROUT, fmt, args); 47 | fprintf(ERROUT, "\n"); 48 | va_end(args); 49 | } 50 | 51 | -------------------------------------------------------------------------------- /log.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_LOG_H 2 | #define NUCLEUS_LOG_H 3 | 4 | void verbose (int level, char const *fmt, ...); 5 | void print_warn (char const *fmt, ...); 6 | void print_err (char const *fmt, ...); 7 | 8 | #endif /* NUCLEUS_LOG_H */ 9 | 10 | -------------------------------------------------------------------------------- /nucleus.cc: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | #include 4 | #include 5 | 6 | #include "nucleus.h" 7 | #include "disasm.h" 8 | #include "cfg.h" 9 | #include "loader.h" 10 | #include "util.h" 11 | #include "exception.h" 12 | #include "options.h" 13 | #include "export.h" 14 | #include "log.h" 15 | 16 | 17 | int 18 | main(int argc, char *argv[]) 19 | { 20 | size_t i; 21 | Binary bin; 22 | Section *sec; 23 | Symbol *sym; 24 | std::list disasm; 25 | CFG cfg; 26 | 27 | set_exception_handlers(); 28 | 29 | if(parse_options(argc, argv) < 0) { 30 | return 1; 31 | } 32 | 33 | if(load_binary(options.binary.filename, &bin, options.binary.type) < 0) { 34 | return 1; 35 | } 36 | 37 | verbose(1, "loaded binary '%s' %s/%s (%u bits) entry@0x%016jx", 38 | bin.filename.c_str(), 39 | bin.type_str.c_str(), bin.arch_str.c_str(), 40 | bin.bits, bin.entry); 41 | for(i = 0; i < bin.sections.size(); i++) { 42 | sec = &bin.sections[i]; 43 | verbose(1, " 0x%016jx %-8ju %-20s %s", 44 | sec->vma, sec->size, sec->name.c_str(), 45 | sec->type == Section::SEC_TYPE_CODE ? "CODE" : "DATA"); 46 | } 47 | if(bin.symbols.size() > 0) { 48 | verbose(1, "scanned symbol tables"); 49 | for(i = 0; i < bin.symbols.size(); i++) { 50 | sym = &bin.symbols[i]; 51 | verbose(1, " %-40s 0x%016jx %s", 52 | sym->name.c_str(), sym->addr, 53 | (sym->type & Symbol::SYM_TYPE_FUNC) ? "FUNC" : ""); 54 | } 55 | } 56 | 57 | if(nucleus_disasm(&bin, &disasm) < 0) { 58 | return 1; 59 | } 60 | 61 | if(cfg.make_cfg(&bin, &disasm) < 0) { 62 | return 1; 63 | } 64 | 65 | if(options.summarize_functions) { 66 | cfg.print_function_summaries(stdout); 67 | } else { 68 | fprintf(stdout, "\n"); 69 | for(auto &dis: disasm) { 70 | dis.print_BBs(stdout); 71 | } 72 | cfg.print_functions(stdout); 73 | } 74 | 75 | if(!options.exports.ida.empty()) { 76 | (void)export_bin2ida(options.exports.ida, &bin, &disasm, &cfg); 77 | } 78 | if(!options.exports.binja.empty()) { 79 | (void)export_bin2binja(options.exports.binja, &bin, &disasm, &cfg); 80 | } 81 | if(!options.exports.dot.empty()) { 82 | (void)export_cfg2dot(options.exports.dot, &cfg); 83 | } 84 | 85 | unload_binary(&bin); 86 | 87 | return 0; 88 | } 89 | 90 | -------------------------------------------------------------------------------- /nucleus.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_H 2 | #define NUCLEUS_H 3 | 4 | #define NUCLEUS_VERSION "nucleus disassembler v0.65" 5 | #define NUCLEUS_CREDITS "Copyright (C) 2016, 2017 Dennis Andriesse, Vrije Universiteit Amsterdam" 6 | 7 | #endif /* NUCLEUS_H */ 8 | 9 | -------------------------------------------------------------------------------- /options.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | 6 | #include "nucleus.h" 7 | #include "util.h" 8 | #include "strategy.h" 9 | #include "loader.h" 10 | #include "options.h" 11 | 12 | 13 | struct options options; 14 | 15 | 16 | void 17 | print_usage(char *prog) 18 | { 19 | int i; 20 | extern const char *strategy_functions_doc[]; 21 | extern const char *binary_types_descr[][2]; 22 | extern const char *binary_arch_descr[][2]; 23 | 24 | printf(NUCLEUS_VERSION"\n"); 25 | printf(NUCLEUS_CREDITS"\n"); 26 | printf("\n%s [-vwhtafbDpgi] -e -d \n", prog); 27 | printf(" -e \n"); 28 | printf(" : target binary\n"); 29 | printf(" -d \n"); 30 | printf(" : select disassembly strategy\n"); 31 | for(i = 0; strategy_functions[i]; i++) { 32 | printf(" %-12s %s\n", strategy_functions[i], strategy_functions_doc[i]); 33 | } 34 | printf(" -t \n"); 35 | printf(" : hint on binary format (may be ignored)\n"); 36 | for(i = 0; binary_types_descr[i][0]; i++) { 37 | printf(" %-12s %s\n", binary_types_descr[i][0], binary_types_descr[i][1]); 38 | } 39 | printf(" -a \n"); 40 | printf(" : disassemble as specified instruction architecture (only for raw binaries)\n"); 41 | for(i = 0; binary_arch_descr[i][0]; i++) { 42 | printf(" %-12s %s\n", binary_arch_descr[i][0], binary_arch_descr[i][1]); 43 | } 44 | printf(" -f : produce list of function entry points and sizes\n"); 45 | printf(" -b \n"); 46 | printf(" : binary base vma (only for raw binaries)\n"); 47 | printf(" -D : disassemble data sections as code\n"); 48 | printf(" -p : allow privileged instructions\n"); 49 | printf(" -g \n"); 50 | printf(" : export CFG to graphviz dot file\n"); 51 | printf(" -i \n"); 52 | printf(" : export binary info to IDA Pro script\n"); 53 | printf(" -n \n"); 54 | printf(" : export binary info to Binary Ninja script\n"); 55 | printf(" -v : verbose\n"); 56 | printf(" -w : disable warnings\n"); 57 | printf(" -h : help\n"); 58 | printf("\nConfiguration used in paper 'Compiler-Agnostic Function Detection in Binaries':\n"); 59 | printf(" %s -d linear -f -e \n", prog); 60 | printf("\n"); 61 | } 62 | 63 | 64 | int 65 | parse_options(int argc, char *argv[]) 66 | { 67 | int i, opt; 68 | char optstr[] = "vwhd:t:a:fb:Dpg:i:n:e:"; 69 | extern const char *binary_types_descr[][2]; 70 | extern const char *binary_arch_descr[][2]; 71 | std::string s; 72 | 73 | options.verbosity = 0; 74 | options.warnings = 1; 75 | options.only_code_sections = 1; 76 | options.allow_privileged = 0; 77 | options.summarize_functions = 0; 78 | 79 | options.nucleuspath.real = str_realpath(std::string(argv[0])); 80 | options.nucleuspath.dir = str_realpath_dir(std::string(argv[0])); 81 | options.nucleuspath.base = str_realpath_base(std::string(argv[0])); 82 | 83 | options.binary.type = Binary::BIN_TYPE_AUTO; 84 | options.binary.arch = Binary::ARCH_NONE; 85 | options.binary.base_vma = 0; 86 | 87 | options.strategy_function.score_function = NULL; 88 | options.strategy_function.mutate_function = NULL; 89 | options.strategy_function.select_function = NULL; 90 | 91 | opterr = 0; 92 | while((opt = getopt(argc, argv, optstr)) != -1) { 93 | switch(opt) { 94 | case 'v': 95 | options.verbosity++; 96 | break; 97 | 98 | case 'w': 99 | options.warnings = 0; 100 | break; 101 | 102 | case 'e': 103 | options.binary.filename = std::string(optarg); 104 | break; 105 | 106 | case 't': 107 | for(i = 0; binary_types_descr[i][0]; i++) { 108 | if(!strcmp(optarg, binary_types_descr[i][0])) { 109 | options.binary.type = (Binary::BinaryType)i; 110 | break; 111 | } 112 | } 113 | if(!binary_types_descr[i][0]) { 114 | printf("ERROR: Unrecognized binary format '%s'\n", optarg); 115 | print_usage(argv[0]); 116 | return -1; 117 | } 118 | break; 119 | 120 | case 'a': 121 | s = std::string(optarg); 122 | s = s.substr(0, s.find('-')); 123 | for(i = 0; binary_arch_descr[i][0]; i++) { 124 | if(!strcmp(s.c_str(), binary_arch_descr[i][0])) { 125 | options.binary.arch = (Binary::BinaryArch)i; 126 | break; 127 | } 128 | } 129 | s = std::string(optarg); 130 | if(s.find('-') != std::string::npos) { 131 | s = s.substr(s.find('-')+1); 132 | } 133 | options.binary.bits = strtoul(s.c_str(), NULL, 0); 134 | if(!binary_arch_descr[i][0]) { 135 | printf("ERROR: Unrecognized binary architecture '%s'\n", optarg); 136 | print_usage(argv[0]); 137 | return -1; 138 | } 139 | break; 140 | 141 | case 'f': 142 | options.summarize_functions = 1; 143 | break; 144 | 145 | case 'b': 146 | options.binary.base_vma = strtoul(optarg, NULL, 0); 147 | if(!options.binary.base_vma) { 148 | printf("ERROR: Invalid binary base address %s\n", optarg); 149 | return -1; 150 | } 151 | break; 152 | 153 | case 'D': 154 | options.only_code_sections = 0; 155 | break; 156 | 157 | case 'p': 158 | options.allow_privileged = 1; 159 | break; 160 | 161 | case 'g': 162 | options.exports.dot = std::string(optarg); 163 | break; 164 | 165 | case 'i': 166 | options.exports.ida = std::string(optarg); 167 | break; 168 | 169 | case 'n': 170 | options.exports.binja = std::string(optarg); 171 | break; 172 | 173 | case 'd': 174 | options.strategy_function.name = std::string(optarg); 175 | break; 176 | 177 | case 'h': 178 | default: 179 | print_usage(argv[0]); 180 | return -1; 181 | } 182 | } 183 | 184 | if(options.binary.filename.empty()) { 185 | print_usage(argv[0]); 186 | return -1; 187 | } 188 | 189 | if(options.strategy_function.name.empty()) { 190 | printf("ERROR: No strategy function specified\n"); 191 | print_usage(argv[0]); 192 | return -1; 193 | } else if(load_bb_strategy_functions() < 0) { 194 | print_usage(argv[0]); 195 | return -1; 196 | } 197 | 198 | return 0; 199 | } 200 | 201 | -------------------------------------------------------------------------------- /options.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_OPTIONS_H 2 | #define NUCLEUS_OPTIONS_H 3 | 4 | #include 5 | 6 | #include 7 | 8 | #include "bb.h" 9 | #include "loader.h" 10 | #include "disasm.h" 11 | 12 | struct options { 13 | int verbosity; 14 | int warnings; 15 | int only_code_sections; 16 | int allow_privileged; 17 | int summarize_functions; 18 | 19 | struct { 20 | std::string real; 21 | std::string dir; 22 | std::string base; 23 | } nucleuspath; 24 | 25 | struct { 26 | std::string ida; 27 | std::string binja; 28 | std::string dot; 29 | } exports; 30 | 31 | struct { 32 | std::string filename; 33 | Binary::BinaryType type; 34 | Binary::BinaryArch arch; 35 | unsigned bits; 36 | uint64_t base_vma; 37 | } binary; 38 | 39 | struct { 40 | std::string name; 41 | double (*score_function) (DisasmSection*, BB*); 42 | unsigned (*mutate_function) (DisasmSection*, BB*, BB**); 43 | int (*select_function) (DisasmSection*, BB*, unsigned); 44 | } strategy_function; 45 | }; 46 | extern struct options options; 47 | 48 | int parse_options (int argc, char *argv[]); 49 | 50 | #endif /* NUCLEUS_OPTIONS_H */ 51 | 52 | -------------------------------------------------------------------------------- /strategy.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | 5 | #include 6 | #include 7 | #include 8 | #include 9 | 10 | #include "loader.h" 11 | #include "bb.h" 12 | #include "insn.h" 13 | #include "dataregion.h" 14 | #include "disasm.h" 15 | #include "strategy.h" 16 | #include "util.h" 17 | #include "options.h" 18 | #include "log.h" 19 | 20 | typedef double (*bb_score_function_t) (DisasmSection*, BB*); 21 | typedef unsigned (*bb_mutate_function_t) (DisasmSection*, BB*, BB**); 22 | typedef int (*bb_select_function_t) (DisasmSection*, BB*, unsigned); 23 | 24 | 25 | /******************************************************************************* 26 | ** strategy function: linear ** 27 | ******************************************************************************/ 28 | double 29 | bb_score_linear(DisasmSection *dis, BB *bb) 30 | { 31 | bb->score = 1.0; 32 | return bb->score; 33 | } 34 | 35 | 36 | unsigned 37 | bb_mutate_linear(DisasmSection *dis, BB *parent, BB **mutants) 38 | { 39 | if(!parent) { 40 | try { 41 | (*mutants) = new BB[1]; 42 | } catch(std::bad_alloc &e) { 43 | print_err("out of memory"); 44 | return 0; 45 | } 46 | /* start disassembling at the start of the section */ 47 | (**mutants).set(dis->section->vma, 0); 48 | } else if(dis->section->contains(parent->end)) { 49 | /* next BB is directly after the current BB */ 50 | (**mutants).set(parent->end, 0); 51 | } else { 52 | (**mutants).set(0, 0); 53 | return 0; 54 | } 55 | 56 | return 1; 57 | } 58 | 59 | 60 | int 61 | bb_select_linear(DisasmSection *dis, BB *mutants, unsigned len) 62 | { 63 | unsigned i; 64 | 65 | for(i = 0; i < len; i++) { 66 | mutants[i].alive = true; 67 | } 68 | 69 | return len; 70 | } 71 | /******************************************************************************* 72 | ** strategy function: recursive ** 73 | ******************************************************************************/ 74 | double 75 | bb_score_recursive(DisasmSection *dis, BB *bb) 76 | { 77 | bb->score = 1.0; 78 | return bb->score; 79 | } 80 | 81 | 82 | unsigned 83 | bb_queue_recursive(DisasmSection *dis, BB *parent, BB **mutants, unsigned n, const unsigned max_mutants) 84 | { 85 | uint64_t target; 86 | 87 | for(auto &ins: parent->insns) { 88 | target = ins.target; 89 | if(target && dis->section->contains(target) 90 | && !(dis->addrmap.addr_type(target) & AddressMap::DISASM_REGION_BB_START)) { 91 | /* recursively queue the target BB for disassembly */ 92 | (*mutants)[n++].set(target, 0); 93 | } 94 | if((n+1) == max_mutants) break; 95 | } 96 | if((parent->insns.back().flags & Instruction::INS_FLAG_COND) 97 | || (parent->insns.back().flags & Instruction::INS_FLAG_CALL)) { 98 | /* queue fall-through block of conditional jump or call */ 99 | if(((n+1) < max_mutants) && dis->section->contains(parent->end) 100 | && !(dis->addrmap.addr_type(parent->end) & AddressMap::DISASM_REGION_BB_START)) { 101 | (*mutants)[n++].set(parent->end, 0); 102 | } 103 | } 104 | 105 | return n; 106 | } 107 | 108 | 109 | unsigned 110 | bb_mutate_recursive(DisasmSection *dis, BB *parent, BB **mutants) 111 | { 112 | unsigned i, n; 113 | const unsigned max_mutants = 4096; 114 | std::vector *symbols; 115 | 116 | /* XXX: This strategy may yield overlapping BBs. Also, the current 117 | * implementation is very basic and yields low coverage. For normal 118 | * use the linear strategy is recommended. */ 119 | 120 | n = 0; 121 | if(!parent) { 122 | try { 123 | (*mutants) = new BB[max_mutants]; 124 | } catch(std::bad_alloc &e) { 125 | print_err("out of memory"); 126 | return 0; 127 | } 128 | 129 | /* first guess for BBs are the entry point and function symbols if available, 130 | * or the section start address otherwise */ 131 | if(dis->section->contains(dis->section->binary->entry)) { 132 | (*mutants)[n++].set(dis->section->binary->entry, 0); 133 | } 134 | symbols = &dis->section->binary->symbols; 135 | for(i = 0; i < symbols->size(); i++) { 136 | if((symbols->at(i).type & Symbol::SYM_TYPE_FUNC) && ((n+1) < max_mutants) 137 | && dis->section->contains(symbols->at(i).addr)) { 138 | (*mutants)[n++].set(symbols->at(i).addr, 0); 139 | } 140 | } 141 | if(n == 0) { 142 | (*mutants)[n++].set(dis->section->vma, 0); 143 | } 144 | 145 | return n; 146 | } else { 147 | n = bb_queue_recursive(dis, parent, mutants, n, max_mutants); 148 | if(n == 0) { 149 | /* no recursive targets found, resort to heuristics */ 150 | if(dis->section->contains(parent->end) && !(dis->addrmap.addr_type(parent->end) & AddressMap::DISASM_REGION_BB_START)) { 151 | /* guess next BB directly after parent */ 152 | (*mutants)[n++].set(parent->end, 0); 153 | } 154 | } 155 | } 156 | 157 | return n; 158 | } 159 | 160 | 161 | int 162 | bb_select_recursive(DisasmSection *dis, BB *mutants, unsigned len) 163 | { 164 | unsigned i; 165 | 166 | for(i = 0; i < len; i++) { 167 | mutants[i].alive = true; 168 | } 169 | 170 | return len; 171 | } 172 | /******************************************************************************* 173 | ** dispatch functions ** 174 | ******************************************************************************/ 175 | const char *strategy_functions[] = { 176 | "linear", 177 | "recursive", 178 | NULL 179 | }; 180 | 181 | const char *strategy_functions_doc[] = { 182 | /* linear */ "Linear disassembly", 183 | /* recursive */ "Recursive disassembly (incomplete implementation, not recommended)", 184 | NULL 185 | }; 186 | 187 | void *bb_strategy_functions[][4] = { 188 | { (void*)bb_score_linear , (void*)bb_mutate_linear , (void*)bb_select_linear }, 189 | { (void*)bb_score_recursive , (void*)bb_mutate_recursive , (void*)bb_select_recursive }, 190 | { NULL, NULL, NULL } 191 | }; 192 | 193 | 194 | static int 195 | get_strategy_function_idx() 196 | { 197 | int i; 198 | 199 | i = 0; 200 | while(strategy_functions[i]) { 201 | if(options.strategy_function.name.compare(strategy_functions[i]) == 0) { 202 | return i; 203 | } 204 | i++; 205 | } 206 | 207 | return -1; 208 | } 209 | 210 | 211 | int 212 | load_bb_strategy_functions() 213 | { 214 | int i; 215 | std::string func; 216 | 217 | func = options.strategy_function.name; 218 | i = get_strategy_function_idx(); 219 | if(i >= 0) { 220 | options.strategy_function.score_function = (bb_score_function_t)bb_strategy_functions[i][0]; 221 | options.strategy_function.mutate_function = (bb_mutate_function_t)bb_strategy_functions[i][1]; 222 | options.strategy_function.select_function = (bb_select_function_t)bb_strategy_functions[i][2]; 223 | } else { 224 | goto fail; 225 | } 226 | 227 | return 0; 228 | 229 | fail: 230 | print_err("unknown strategy function '%s'", func.c_str()); 231 | return -1; 232 | } 233 | 234 | 235 | double 236 | bb_score(DisasmSection *dis, BB *bb) 237 | { 238 | if(!options.strategy_function.score_function) { 239 | if(load_bb_strategy_functions() < 0) return -1.0; 240 | } 241 | 242 | return options.strategy_function.score_function(dis, bb); 243 | } 244 | 245 | 246 | unsigned 247 | bb_mutate(DisasmSection *dis, BB *parent, BB **mutants) 248 | { 249 | if(!options.strategy_function.mutate_function) { 250 | if(load_bb_strategy_functions() < 0) return 0; 251 | } 252 | 253 | return options.strategy_function.mutate_function(dis, parent, mutants); 254 | } 255 | 256 | 257 | int 258 | bb_select(DisasmSection *dis, BB *mutants, unsigned len) 259 | { 260 | if(!options.strategy_function.select_function) { 261 | if(load_bb_strategy_functions() < 0) return 0; 262 | } 263 | 264 | return options.strategy_function.select_function(dis, mutants, len); 265 | } 266 | 267 | -------------------------------------------------------------------------------- /strategy.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_MUTATE_H 2 | #define NUCLEUS_MUTATE_H 3 | 4 | #include 5 | 6 | #include "disasm.h" 7 | 8 | extern const char *strategy_functions[]; 9 | 10 | int load_bb_strategy_functions (); 11 | 12 | double bb_score (DisasmSection *dis, BB *bb); 13 | unsigned bb_mutate (DisasmSection *dis, BB *parent, BB **mutants); 14 | int bb_select (DisasmSection *dis, BB *mutants, unsigned len); 15 | 16 | #endif /* NUCLEUS_MUTATE_H */ 17 | 18 | -------------------------------------------------------------------------------- /testout: -------------------------------------------------------------------------------- 1 | 0x0000000008048674 35 2 | 0x0000000008048870 4 3 | 0x0000000008048880 43 4 | 0x0000000008048940 280 5 | 0x0000000008048a60 33 6 | 0x0000000008048a90 249 7 | 0x0000000008048b90 31 8 | 0x0000000008048bb0 35 9 | 0x0000000008048be0 35 10 | 0x0000000008048c30 95 11 | 0x0000000008048c90 7 12 | 0x0000000008048ca0 46 13 | 0x0000000008048cd0 744 14 | 0x0000000008048fc0 161 15 | 0x0000000008049070 270 16 | 0x0000000008049180 326 17 | 0x0000000008049480 169 18 | 0x0000000008049530 369 19 | 0x00000000080496b0 275 20 | 0x00000000080497d0 384 21 | 0x0000000008049950 151 22 | 0x00000000080499f0 3324 23 | 0x000000000804b390 213 24 | 0x000000000804b4b0 373 25 | 0x000000000804b630 314 26 | 0x000000000804b770 397 27 | 0x000000000804b900 262 28 | 0x000000000804ba10 341 29 | 0x000000000804bb70 441 30 | 0x000000000804bd30 409 31 | 0x000000000804bed0 377 32 | 0x000000000804c050 41 33 | 0x000000000804c080 28 34 | 0x000000000804c0a0 123 35 | 0x000000000804c120 65 36 | 0x000000000804c230 5 37 | 0x000000000804c240 156 38 | 0x000000000804c2e0 323 39 | 0x000000000804c430 847 40 | 0x000000000804c780 758 41 | 0x000000000804ca80 523 42 | 0x000000000804cc90 527 43 | 0x000000000804cea0 197 44 | 0x000000000804cf70 195 45 | 0x000000000804d040 213 46 | 0x000000000804d120 172 47 | 0x000000000804d2b0 167 48 | 0x000000000804d4a0 84 49 | 0x000000000804d550 1211 50 | 0x000000000804dab0 10 51 | 0x000000000804e2c0 346 52 | 0x000000000804e5a0 77 53 | 0x000000000804e610 33 54 | 0x000000000804e640 251 55 | 0x000000000804e740 438 56 | 0x000000000804e980 192 57 | 0x000000000804ea40 217 58 | 0x000000000804eb20 695 59 | 0x000000000804ede0 96 60 | 0x000000000804f040 300 61 | 0x000000000804f250 83 62 | 0x000000000804f2b0 47 63 | 0x000000000804f4b0 88 64 | 0x000000000804f7e0 1545 65 | 0x0000000008050270 10 66 | 0x0000000008050280 94 67 | 0x00000000080502e0 1006 68 | 0x00000000080506ce 4 69 | 0x00000000080506e0 728 70 | 0x0000000008050a30 52 71 | 0x0000000008048840 34 72 | 0x0000000008048910 43 73 | 0x00000000080488f0 30 74 | 0x0000000008048c10 30 75 | 0x00000000080492d0 429 76 | 0x000000000804a6f0 3222 77 | 0x000000000804b470 56 78 | 0x000000000804c170 188 79 | 0x000000000804d1d0 86 80 | 0x000000000804d230 115 81 | 0x000000000804d360 58 82 | 0x000000000804d3a0 81 83 | 0x000000000804d400 15 84 | 0x000000000804d410 70 85 | 0x000000000804d460 35 86 | 0x000000000804d490 7 87 | 0x000000000804d500 68 88 | 0x000000000804da10 147 89 | 0x000000000804dac0 192 90 | 0x000000000804db80 1758 91 | 0x000000000804e260 83 92 | 0x000000000804e5f0 20 93 | 0x000000000804e900 123 94 | 0x000000000804ee40 101 95 | 0x000000000804eeb0 385 96 | 0x000000000804f170 149 97 | 0x000000000804f210 49 98 | 0x000000000804f2e0 344 99 | 0x000000000804f440 112 100 | 0x000000000804f510 110 101 | 0x000000000804f580 606 102 | 0x000000000804fdf0 1126 103 | 0x0000000008050260 6 104 | 0x00000000080509c0 93 105 | 0x0000000008050a20 2 106 | 0x0000000008050a64 20 107 | -------------------------------------------------------------------------------- /util.cc: -------------------------------------------------------------------------------- 1 | #include 2 | #include 3 | #include 4 | #include 5 | #include 6 | #include 7 | 8 | #include 9 | #include 10 | 11 | 12 | /******************************************************************************* 13 | ** string utils ** 14 | ******************************************************************************/ 15 | std::string 16 | str_realpath(std::string s) 17 | { 18 | char real[PATH_MAX+1]; 19 | 20 | if(!realpath(s.c_str(), real)) { 21 | return ""; 22 | } 23 | return std::string(real); 24 | } 25 | 26 | 27 | std::string 28 | str_realpath_dir(std::string s) 29 | { 30 | char real[PATH_MAX+1], *dir; 31 | 32 | if(!realpath(s.c_str(), real)) { 33 | return ""; 34 | } 35 | 36 | dir = dirname(real); 37 | return std::string(dir); 38 | } 39 | 40 | 41 | std::string 42 | str_realpath_base(std::string s) 43 | { 44 | char real[PATH_MAX+1], *base; 45 | 46 | if(!realpath(s.c_str(), real)) { 47 | return ""; 48 | } 49 | 50 | base = basename(real); 51 | return std::string(base); 52 | } 53 | 54 | 55 | std::string 56 | str_getenv(std::string env) 57 | { 58 | char *e; 59 | 60 | e = getenv(env.c_str()); 61 | return e ? std::string(e) : ""; 62 | } 63 | 64 | /******************************************************************************* 65 | ** rand functions ** 66 | ******************************************************************************/ 67 | uint64_t 68 | rand64() 69 | { 70 | std::random_device rd; 71 | std::mt19937 gen(rd()); 72 | std::uniform_int_distribution dis(0, 0xffffffffffffffff); 73 | 74 | return dis(gen); 75 | } 76 | 77 | 78 | uint64_t 79 | xorshift128plus() 80 | { 81 | uint64_t x, y; 82 | static uint64_t s[2]; 83 | static int inited = 0; 84 | 85 | if(!inited) { 86 | s[0] = rand64(); 87 | s[1] = rand64(); 88 | inited = 1; 89 | } 90 | 91 | x = s[0]; 92 | y = s[1]; 93 | 94 | s[0] = y; 95 | x ^= x << 23; 96 | s[1] = x ^ y ^ (x >> 17) ^ (y >> 26); 97 | 98 | return s[1] + y; 99 | } 100 | 101 | 102 | uint64_t 103 | fast_rand64() 104 | { 105 | return xorshift128plus(); 106 | } 107 | 108 | -------------------------------------------------------------------------------- /util.h: -------------------------------------------------------------------------------- 1 | #ifndef NUCLEUS_UTIL_H 2 | #define NUCLEUS_UTIL_H 3 | 4 | #include 5 | 6 | #include 7 | 8 | std::string str_realpath (std::string s); 9 | std::string str_realpath_dir (std::string s); 10 | std::string str_realpath_base (std::string s); 11 | std::string str_getenv (std::string env); 12 | 13 | uint64_t rand64 (); 14 | uint64_t xorshift128plus (); 15 | uint64_t fast_rand64 (); 16 | 17 | template bool compare_ptr (const T *const& a, const T *const& b) { return (*a) < (*b); } 18 | 19 | #endif /* NUCLEUS_UTIL_H */ 20 | 21 | --------------------------------------------------------------------------------