&sec, section_offset off, 8 | unsigned cu_addr_size, taddr cu_low_pc) 9 | : sec(sec->slice(off, ~0, format::unknown, cu_addr_size)), 10 | base_addr(cu_low_pc) 11 | { 12 | } 13 | 14 | rangelist::rangelist(const initializer_list > &ranges) 15 | { 16 | synthetic.reserve(ranges.size() * 2 + 2); 17 | for (auto &range : ranges) { 18 | synthetic.push_back(range.first); 19 | synthetic.push_back(range.second); 20 | } 21 | synthetic.push_back(0); 22 | synthetic.push_back(0); 23 | 24 | sec = make_shared

( 25 | section_type::ranges, (const char*)synthetic.data(), 26 | synthetic.size() * sizeof(taddr), format::unknown, 27 | sizeof(taddr)); 28 | 29 | base_addr = 0; 30 | } 31 | 32 | rangelist::iterator 33 | rangelist::begin() const 34 | { 35 | if (sec) 36 | return iterator(sec, base_addr); 37 | return end(); 38 | } 39 | 40 | rangelist::iterator 41 | rangelist::end() const 42 | { 43 | return iterator(); 44 | } 45 | 46 | bool 47 | rangelist::contains(taddr addr) const 48 | { 49 | for (auto ent : *this) 50 | if (ent.contains(addr)) 51 | return true; 52 | return false; 53 | } 54 | 55 | rangelist::iterator::iterator(const std::shared_ptr

&sec, taddr base_addr) 56 | : sec(sec), base_addr(base_addr), pos(0) 57 | { 58 | // Read in the first entry 59 | ++(*this); 60 | } 61 | 62 | rangelist::iterator & 63 | rangelist::iterator::operator++() 64 | { 65 | // DWARF4 section 2.17.3 66 | taddr largest_offset = ~(taddr)0; 67 | if (sec->addr_size < sizeof(taddr)) 68 | largest_offset += 1 << (8 * sec->addr_size); 69 | 70 | // Read in entries until we reach a regular entry of an 71 | // end-of-list. Note that pos points to the beginning of the 72 | // entry *following* the current entry, so that's where we 73 | // start. 74 | cursor cur(sec, pos); 75 | while (true) { 76 | entry.low = cur.address(); 77 | entry.high = cur.address(); 78 | 79 | if (entry.low == 0 && entry.high == 0) { 80 | // End of list 81 | sec.reset(); 82 | pos = 0; 83 | break; 84 | } else if (entry.low == largest_offset) { 85 | // Base address change 86 | base_addr = entry.high; 87 | } else { 88 | // Regular entry. Adjust by base address. 89 | entry.low += base_addr; 90 | entry.high += base_addr; 91 | pos = cur.get_section_offset(); 92 | break; 93 | } 94 | } 95 | 96 | return *this; 97 | } 98 | 99 | DWARFPP_END_NAMESPACE 100 | -------------------------------------------------------------------------------- /src/disasm/MaximalBlock.h: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2015-2016 University of Kaiserslautern. 8 | 9 | #pragma once 10 | 11 | #include "MCInst.h" 12 | #include "BasicBlock.h" 13 | #include "BranchData.h" 14 | #include 15 | 16 | namespace disasm { 17 | 18 | /** 19 | * MaximalBlock 20 | */ 21 | class MaximalBlock { 22 | public: 23 | MaximalBlock(); 24 | virtual ~MaximalBlock() = default; 25 | MaximalBlock(const MaximalBlock &src) = default; 26 | MaximalBlock &operator=(const MaximalBlock &src) = default; 27 | MaximalBlock(MaximalBlock &&src) = default; 28 | bool operator==(const MaximalBlock& src) const noexcept; 29 | 30 | /** 31 | * MB is valid when all of its BBs are valid. A BB is valid when it 32 | * has a branch as last instruction. 33 | */ 34 | bool isValid() const; 35 | 36 | const BasicBlock &getBasicBlockAt(const size_t bb_id) const; 37 | BasicBlock *ptrToBasicBlockAt(const unsigned bb_id); 38 | const std::vector &getBasicBlocks() const; 39 | // getting size and memsize of getFragments are provided by the fragment itself. 40 | // providing the same for BBs, however, requires MB intervention! 41 | size_t getBasicBlockMemSize(const unsigned int bb_id) const; 42 | size_t getBasicBlocksCount() const; 43 | size_t instructionsCount() const; 44 | /* 45 | * return all instructions contained in the MB 46 | */ 47 | const std::vector &getInstructions() const; 48 | std::vector &getInstructionsRef(); 49 | 50 | const std::vector 51 | getInstructionsOf(const BasicBlock &bblock) const; 52 | const std::vector & 53 | getInstructionAddressesOf(const BasicBlock &bblock) const noexcept; 54 | const std::vector & 55 | getInstructionAddressesOf(const BasicBlock *bblock) const noexcept; 56 | const BranchData &branchInfo() const; 57 | void setBranchCondition(bool is_conditional) noexcept; 58 | 59 | size_t id() const; 60 | /* 61 | * return true if the given address falls inside the address space 62 | * covered by MB 63 | */ 64 | bool isWithinAddressSpace(addr_t addr) const; 65 | 66 | addr_t addrOfFirstInst() const; 67 | addr_t addrOfLastInst() const; 68 | addr_t endAddr() const; 69 | bool isAddressOfInstruction(const addr_t inst_addr) const; 70 | bool startOverlapsWith(const MaximalBlock &prev_block) const; 71 | bool startOverlapsWith(const MaximalBlock *prev_block) const; 72 | bool coversAddressSpaceOf(const MaximalBlock &block) const; 73 | bool coversAddressSpaceOf(const MaximalBlock *block) const; 74 | const MCInst *branchInstruction() const noexcept; 75 | // returns true if this block aligns with the first (or second) instruction 76 | // of the given block. 77 | bool isAppendableBy(const MaximalBlock &block) const noexcept; 78 | 79 | friend class MaximalBlockBuilder; 80 | private: 81 | explicit MaximalBlock(size_t id, const BranchData &branch); 82 | private: 83 | size_t m_id; 84 | addr_t m_end_addr; 85 | BranchData m_branch; 86 | std::vector m_insts; 87 | std::vector m_bblocks; 88 | }; 89 | } 90 | -------------------------------------------------------------------------------- /main.cpp: -------------------------------------------------------------------------------- 1 | #include "binutils/elf/elf++.hh" 2 | #include "disasm/ElfDisassembler.h" 3 | #include "disasm/analysis/SectionDisassemblyAnalyzerARM.h" 4 | #include 5 | #include 6 | 7 | struct ConfigConsts { 8 | const std::string kFile; 9 | const std::string kNoSymbols; 10 | const std::string kSpeculative; 11 | const std::string kText; 12 | 13 | ConfigConsts() : kFile{"file"}, 14 | kNoSymbols{"no-symbols"}, 15 | kSpeculative{"speculative"}, 16 | kText{"text"} { } 17 | }; 18 | 19 | int main(int argc, char **argv) { 20 | ConfigConsts config; 21 | 22 | cmdline::parser cmd_parser; 23 | cmd_parser.add(config.kFile, 24 | 'f', 25 | "Path to an ARM ELF file to be disassembled", 26 | true, 27 | ""); 28 | cmd_parser.add(config.kSpeculative, 's', 29 | "Show all 'valid' disassembly"); 30 | 31 | cmd_parser.add(config.kText, 't', 32 | "Disassemble .text section only"); 33 | 34 | cmd_parser.parse_check(argc, argv); 35 | 36 | auto file_path = cmd_parser.get(config.kFile); 37 | 38 | int fd = open(file_path.c_str(), O_RDONLY); 39 | if (fd < 0) { 40 | fprintf(stderr, "%s: %s\n", argv[1], strerror(errno)); 41 | return 1; 42 | } 43 | 44 | elf::elf elf_file(elf::create_mmap_loader(fd)); 45 | 46 | // We disassmble ARM/Thumb executables only 47 | if ((elf_file.get_hdr().machine) != EM_ARM) { 48 | fprintf(stderr, "%s : Elf file architecture is not ARM!\n", argv[1]); 49 | return 3; 50 | } 51 | 52 | disasm::ElfDisassembler disassembler{elf_file}; 53 | if (cmd_parser.exist(config.kSpeculative)) { 54 | std::cout << "Speculative disassembly of file: " 55 | << file_path << "\n"; 56 | if (cmd_parser.exist(config.kText)) { 57 | auto result = 58 | disassembler.disassembleSectionbyNameSpeculative(".text"); 59 | disasm::SectionDisassemblyAnalyzerARM analyzer{&elf_file, &result}; 60 | analyzer.buildCFG(); 61 | analyzer.refineCFG(); 62 | disassembler.prettyPrintSectionCFG 63 | (&analyzer.getCFG(), 64 | disasm::PrettyPrintConfig::kHideDataNodes); 65 | // disassembler.prettyPrintSwitchTables(&analyzer.getCFG()); 66 | // analyzer.buildCallGraph(); 67 | } else { 68 | disassembler.disassembleCodeSpeculative(); 69 | } 70 | } else if (disassembler.isSymbolTableAvailable()) { 71 | std::cout << "Disassembly using symbol table of file: " 72 | << file_path << "\n"; 73 | if (cmd_parser.exist(config.kText)) { 74 | auto result = disassembler.disassembleSectionbyName(".text"); 75 | disasm::SectionDisassemblyAnalyzerARM analyzer{&elf_file, &result}; 76 | analyzer.buildCFG(); 77 | analyzer.refineCFG(); 78 | disassembler.prettyPrintSectionCFG 79 | (&analyzer.getCFG(), 80 | disasm::PrettyPrintConfig::kDisplayDataNodes); 81 | // disassembler.prettyPrintSwitchTables(&analyzer.getCFG()); 82 | // analyzer.buildCallGraph(); 83 | } else 84 | disassembler.disassembleCodeUsingSymbols(); 85 | } else 86 | std::cout << "Symbol table was not found!!" << "\n"; 87 | return 0; 88 | } 89 | -------------------------------------------------------------------------------- /src/disasm/ElfDisassembler.h: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2015-2016 University of Kaiserslautern. 8 | 9 | #pragma once 10 | #include "SectionDisassemblyARM.h" 11 | #include "RawInstAnalyzer.h" 12 | #include "binutils/elf/elf++.hh" 13 | #include "MCParser.h" 14 | #include "MaximalBlockBuilder.h" 15 | 16 | #define EM_ARM 40 // From elf.h 17 | namespace disasm { 18 | 19 | class CFGNode; 20 | class DisassemblyCFG; 21 | class BasicBlock; 22 | 23 | class ARMCodeSymbolVal { 24 | public: 25 | static std::string 26 | kThumb() { return "$t"; } 27 | 28 | static std::string 29 | kARM() { return "$a"; } 30 | 31 | static std::string 32 | kData() { return "$d"; } 33 | }; 34 | 35 | enum class PrettyPrintConfig: unsigned { 36 | kHideDataNodes, 37 | kDisplayDataNodes 38 | }; 39 | /** 40 | * ElfDisassembler 41 | */ 42 | class ElfDisassembler { 43 | public: 44 | /** 45 | * Construct a Elf Disassembler that is initially not valid. Calling 46 | * methods other than valid on this results in undefined behavior. 47 | */ 48 | ElfDisassembler(); 49 | 50 | /** 51 | * Prepares input file for disassembly. 52 | * Precondition: file is a valid ELF file. 53 | */ 54 | explicit ElfDisassembler(const elf::elf &elf_file); 55 | virtual ~ElfDisassembler() = default; 56 | ElfDisassembler(const ElfDisassembler &src) = delete; 57 | ElfDisassembler &operator=(const ElfDisassembler &src) = delete; 58 | ElfDisassembler(ElfDisassembler &&src) = default; 59 | 60 | bool valid() const { return m_valid; } 61 | void disassembleCodeUsingSymbols() const; 62 | 63 | SectionDisassemblyARM disassembleSectionUsingSymbols 64 | (const elf::section &sec) const; 65 | SectionDisassemblyARM disassembleSectionSpeculative 66 | (const elf::section &sec) const; 67 | std::vector disassembleCodeSpeculative() const; 68 | 69 | SectionDisassemblyARM disassembleSectionbyName 70 | (std::string sec_name) const; 71 | SectionDisassemblyARM disassembleSectionbyNameSpeculative 72 | (std::string sec_name) const; 73 | const std::pair getExecutableRegion(); 74 | bool isSymbolTableAvailable(); 75 | 76 | /** 77 | * Return the type of code at the initial address of executable. 78 | * needed to distinguish ARM/Thumb. 79 | */ 80 | ISAType getInitialMode() const; 81 | 82 | ISAType getElfMachineArch() const; 83 | 84 | void prettyPrintMaximalBlock 85 | (const MaximalBlock *mblock) const; 86 | void prettyPrintSectionDisassembly 87 | (const SectionDisassemblyARM *sec_disasm) const; 88 | void prettyPrintSectionCFG 89 | (const DisassemblyCFG *sec_cfg, 90 | const PrettyPrintConfig config = PrettyPrintConfig::kHideDataNodes) 91 | const; 92 | void prettyPrintCFGNode(const CFGNode *cfg_node) const; 93 | void prettyPrintValidCFGNode 94 | (const CFGNode *cfg_node, 95 | const PrettyPrintConfig config = PrettyPrintConfig::kHideDataNodes) 96 | const; 97 | void prettyPrintSwitchTables(const DisassemblyCFG *sec_cfg) const; 98 | const RawInstAnalyzer *getMCAnalyzer() const; 99 | 100 | private: 101 | void prettyPrintCapstoneInst 102 | (const csh &handle, cs_insn *inst, bool details_enabled) const; 103 | std::vector> 104 | getCodeSymbolsOfSection(const elf::section &sec) const; 105 | private: 106 | bool m_valid; 107 | mutable RawInstAnalyzer m_analyzer; 108 | const elf::elf *m_elf_file; 109 | }; 110 | } 111 | -------------------------------------------------------------------------------- /src/binutils/dwarf/die_str_map.cc: -------------------------------------------------------------------------------- 1 | #include "internal.hh" 2 | 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | // XXX Make this more readily available? 9 | namespace std { 10 | template<> 11 | struct hash 12 | { 13 | typedef size_t result_type; 14 | typedef dwarf::DW_TAG argument_type; 15 | result_type operator()(argument_type a) const 16 | { 17 | return (result_type)a; 18 | } 19 | }; 20 | } 21 | 22 | DWARFPP_BEGIN_NAMESPACE 23 | 24 | struct string_hash 25 | { 26 | typedef size_t result_type; 27 | typedef const char *argument_type; 28 | result_type operator()(const char *s) const 29 | { 30 | result_type h = 0; 31 | for (; *s; ++s) 32 | h += 33 * h + *s; 33 | return h; 34 | } 35 | }; 36 | 37 | struct string_eq 38 | { 39 | typedef bool result_type; 40 | typedef const char *first_argument_type; 41 | typedef const char *second_argument_type; 42 | bool operator()(const char *x, const char *y) const 43 | { 44 | return strcmp(x, y) == 0; 45 | } 46 | }; 47 | 48 | struct die_str_map::impl 49 | { 50 | impl(const die &parent, DW_AT attr, 51 | const initializer_list &accept) 52 | : attr(attr), accept(accept.begin(), accept.end()), 53 | pos(parent.begin()), end(parent.end()) { } 54 | 55 | unordered_map str_map; 56 | DW_AT attr; 57 | unordered_set accept; 58 | die::iterator pos, end; 59 | die invalid; 60 | }; 61 | 62 | die_str_map::die_str_map(const die &parent, DW_AT attr, 63 | const initializer_list &accept) 64 | : m(make_shared(parent, attr, accept)) 65 | { 66 | } 67 | 68 | die_str_map 69 | die_str_map::from_type_names(const die &parent) 70 | { 71 | return die_str_map 72 | (parent, DW_AT::name, 73 | // All DWARF type tags (this is everything that ends 74 | // with _type except thrown_type). 75 | {DW_TAG::array_type, DW_TAG::class_type, 76 | DW_TAG::enumeration_type, DW_TAG::pointer_type, 77 | DW_TAG::reference_type, DW_TAG::string_type, 78 | DW_TAG::structure_type, DW_TAG::subroutine_type, 79 | DW_TAG::union_type, DW_TAG::ptr_to_member_type, 80 | DW_TAG::set_type, DW_TAG::subrange_type, 81 | DW_TAG::base_type, DW_TAG::const_type, 82 | DW_TAG::file_type, DW_TAG::packed_type, 83 | DW_TAG::volatile_type, DW_TAG::restrict_type, 84 | DW_TAG::interface_type, DW_TAG::unspecified_type, 85 | DW_TAG::shared_type, DW_TAG::rvalue_reference_type}); 86 | } 87 | 88 | const die & 89 | die_str_map::operator[](const char *val) const 90 | { 91 | // Do we have this value? 92 | auto it = m->str_map.find(val); 93 | if (it != m->str_map.end()) 94 | return it->second; 95 | // Read more until we find the value or the end 96 | while (m->pos != m->end) { 97 | const die &d = *m->pos; 98 | ++m->pos; 99 | 100 | if (!m->accept.count(d.tag) || !d.has(m->attr)) 101 | continue; 102 | value dval(d[m->attr]); 103 | if (dval.get_type() != value::type::string) 104 | continue; 105 | const char *dstr = dval.as_cstr(); 106 | m->str_map[dstr] = d; 107 | if (strcmp(val, dstr) == 0) 108 | return m->str_map[dstr]; 109 | } 110 | // Not found 111 | return m->invalid; 112 | } 113 | 114 | DWARFPP_END_NAMESPACE 115 | -------------------------------------------------------------------------------- /src/disasm/analysis/ICFGNode.h: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2016 University of Kaiserslautern. 8 | 9 | #pragma once 10 | #include 11 | #include 12 | #include 13 | #include "CFGNode.h" 14 | 15 | namespace disasm { 16 | class CFGNode; 17 | 18 | enum class ICFGExitNodeType: unsigned char { 19 | kTailCall, // Tail call to an entry which can be direct or indirect 20 | kOverlap, // direct branch to body of another procedure 21 | kInvalidLR, 22 | kTailCallOrOverlap, 23 | kReturn, 24 | kIndirect 25 | }; 26 | 27 | enum class ICFGProcedureType: unsigned char { 28 | kTail, 29 | kDirectlyCalled, 30 | kExternal, 31 | kIndirectlyCalled, 32 | kInvalid 33 | }; 34 | 35 | /** 36 | * ICFGNode 37 | */ 38 | class ICFGNode { 39 | public: 40 | /** 41 | * Construct a ICFGNode that is initially not valid. Calling 42 | * methods other than operator= and valid on this results in 43 | * undefined behavior. 44 | */ 45 | ICFGNode(); 46 | ICFGNode(addr_t entry_addr, CFGNode *entry_node, ICFGProcedureType type); 47 | ICFGNode(CFGNode *entry_node, ICFGProcedureType type); 48 | virtual ~ICFGNode() = default; 49 | ICFGNode(const ICFGNode &src) = default; 50 | ICFGNode &operator=(const ICFGNode &src) = default; 51 | ICFGNode(ICFGNode &&src) = default; 52 | bool operator==(const ICFGNode &src) const noexcept; 53 | bool operator<(const ICFGNode &src) const noexcept; 54 | 55 | bool isCallerAlreadyExists(const ICFGNode &caller) const noexcept; 56 | bool isCalleeAlreadyExists(const ICFGNode &callee) const noexcept; 57 | void addCaller(const CFGNode *caller) noexcept; 58 | void addCallee(const ICFGNode *callee) const noexcept; 59 | CFGNode *getEntryNode() const noexcept; 60 | std::vector getAllExitNodes() const noexcept; 61 | const std::vector> & 62 | getExitNodes() const noexcept; 63 | /* 64 | * if this node overlaps with another 65 | */ 66 | std::vector getAllCFGNodes() const noexcept; 67 | 68 | bool hasOverlapWithOtherProcedure() const noexcept; 69 | bool isBuilt() const noexcept; 70 | bool isValid() const noexcept; 71 | bool isNonReturnProcedure() const noexcept; 72 | bool isReturnsToCaller() const noexcept; 73 | size_t id() const noexcept; 74 | bool isWithinEstimatedAddressSpace(const addr_t addr) const noexcept; 75 | CFGNode *entryNode() const noexcept; 76 | CFGNode *endNode() const noexcept; 77 | addr_t entryAddr() const noexcept; 78 | addr_t endAddr() const noexcept; 79 | addr_t estimatedEndAddr() const noexcept; 80 | const std::string &name() const noexcept; 81 | void setName(const char *name) noexcept; 82 | void setNonReturn(bool non_return) noexcept; 83 | void setReturnsToCaller(bool returns_to_caller) noexcept; 84 | ICFGProcedureType type() const noexcept; 85 | void finalize() noexcept; 86 | friend class DisassemblyCallGraph; 87 | friend class SectionDisassemblyAnalyzerARM; 88 | private: 89 | // a procedure is valid iff it returns to the address designated by caller 90 | // in all of its exit nodes. 91 | ICFGProcedureType m_proc_type; 92 | bool m_valid; 93 | bool m_non_return_proc; 94 | bool m_returns_to_caller; 95 | CFGNode *m_entry_node; 96 | CFGNode *m_end_node; 97 | addr_t m_entry_addr; 98 | addr_t m_end_addr; // actual end address of procedure 99 | addr_t m_estimated_end_addr; // initial overapproximated end address. 100 | unsigned m_lr_store_idx; 101 | bool m_has_overlap; 102 | std::string m_name; 103 | std::vector m_callers; 104 | std::vector m_callees; 105 | // The first node in m_cfg_nodes should be the entry_node 106 | std::vector m_cfg_nodes; 107 | std::vector> m_exit_nodes; 108 | }; 109 | } 110 | -------------------------------------------------------------------------------- /src/disasm/SectionDisassemblyARM.cpp: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2015-2016 University of Kaiserslautern. 8 | 9 | #include "SectionDisassemblyARM.h" 10 | #include 11 | #include 12 | 13 | namespace disasm { 14 | 15 | SectionDisassemblyARM::SectionDisassemblyARM() : m_valid(false) { 16 | } 17 | 18 | SectionDisassemblyARM::SectionDisassemblyARM 19 | (const elf::section *section) : 20 | m_valid{false}, 21 | m_isa{ISAType::kThumb}, 22 | m_section{section} { 23 | } 24 | 25 | SectionDisassemblyARM::SectionDisassemblyARM 26 | (const elf::section *section, ISAType isa) : 27 | m_valid{false}, 28 | m_isa{isa}, 29 | m_section{section} { 30 | } 31 | 32 | const std::string 33 | SectionDisassemblyARM::sectionName() const { 34 | return m_section->get_name(); 35 | } 36 | 37 | addr_t 38 | SectionDisassemblyARM::secStartAddr() const { 39 | return m_section->get_hdr().addr; 40 | } 41 | 42 | addr_t SectionDisassemblyARM::secEndAddr() const { 43 | return m_section->get_hdr().addr + m_section->get_hdr().size; 44 | } 45 | 46 | size_t 47 | SectionDisassemblyARM::sectionSize() const { 48 | return m_section->size(); 49 | } 50 | 51 | const uint8_t *SectionDisassemblyARM::ptrToData() const { 52 | return static_cast(m_section->data()); 53 | } 54 | 55 | void SectionDisassemblyARM::add(const MaximalBlock &max_block) { 56 | assert(m_max_blocks.size() == max_block.id() 57 | && "invalid index of maximal block"); 58 | m_max_blocks.push_back(max_block); 59 | } 60 | 61 | void SectionDisassemblyARM::add(MaximalBlock &&max_block) { 62 | m_max_blocks.emplace_back(max_block); 63 | } 64 | 65 | const MaximalBlock &SectionDisassemblyARM::back() const { 66 | return m_max_blocks.back(); 67 | } 68 | 69 | addr_t SectionDisassemblyARM::virtualAddrOf(const uint8_t *ptr) const { 70 | assert(ptr < ptrToData() + sectionSize() 71 | && ptrToData() <= ptr 72 | && "Invalid pointer !!!"); 73 | return secStartAddr() + (ptr - ptrToData()); 74 | } 75 | 76 | const uint8_t *SectionDisassemblyARM::physicalAddrOf 77 | (const addr_t virtual_addr) const { 78 | // assert(secStartAddr() <= virtual_addr 79 | // && virtual_addr < secEndAddr() 80 | // && "Invalid virtual address !!!"); 81 | return ptrToData() + (virtual_addr - secStartAddr()); 82 | } 83 | 84 | std::vector &SectionDisassemblyARM::getMaximalBlocks() { 85 | return m_max_blocks; 86 | } 87 | 88 | bool SectionDisassemblyARM::isLast(const MaximalBlock *max_block) const { 89 | return max_block->id() == m_max_blocks.size() - 1;; 90 | } 91 | 92 | bool SectionDisassemblyARM::isFirst(const MaximalBlock *max_block) const { 93 | return max_block->id() == 0; 94 | } 95 | 96 | const MaximalBlock &SectionDisassemblyARM::maximalBlockAt(size_t index) const { 97 | return m_max_blocks[index]; 98 | } 99 | 100 | MaximalBlock *SectionDisassemblyARM::ptrToMaximalBlockAt(size_t index) { 101 | return &(*(m_max_blocks.begin() + index)); 102 | } 103 | 104 | std::vector::const_iterator SectionDisassemblyARM::cbegin() const { 105 | return m_max_blocks.cbegin(); 106 | } 107 | 108 | std::vector::const_iterator SectionDisassemblyARM::cend() const { 109 | return m_max_blocks.cend(); 110 | } 111 | 112 | bool SectionDisassemblyARM::isWithinSectionAddressSpace(const addr_t &addr) const { 113 | return m_section->get_hdr().addr <= addr && 114 | addr < m_section->get_hdr().addr + m_section->get_hdr().size; 115 | } 116 | 117 | size_t SectionDisassemblyARM::maximalBlockCount() const { 118 | return m_max_blocks.size(); 119 | } 120 | 121 | ISAType SectionDisassemblyARM::getISA() const { 122 | return m_isa; 123 | } 124 | 125 | size_t SectionDisassemblyARM::size() const noexcept { 126 | return m_max_blocks.size(); 127 | } 128 | 129 | void SectionDisassemblyARM::reserve(size_t maximal_block_count) { 130 | m_max_blocks.reserve(maximal_block_count); 131 | } 132 | } 133 | -------------------------------------------------------------------------------- /src/disasm/analysis/DisassemblyAnalysisHelperARM.cpp: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2016 University of Kaiserslautern. 8 | 9 | #include "DisassemblyAnalysisHelperARM.h" 10 | #include "CFGNode.h" 11 | 12 | namespace disasm { 13 | 14 | DisassemblyAnalysisHelperARM::DisassemblyAnalysisHelperARM() : 15 | m_isa{ISAType::kThumb} { 16 | } 17 | 18 | DisassemblyAnalysisHelperARM::DisassemblyAnalysisHelperARM(ISAType isa) : 19 | m_isa{isa} { 20 | } 21 | 22 | std::vector 23 | DisassemblyAnalysisHelperARM::getPCRelativeLoadInstructions 24 | (const CFGNode *cfg_node) const noexcept { 25 | // XXX: assuming pc-relative loads can happen only in LDR, VLDR, and LDRD 26 | auto predicate = [](const MCInst *inst) -> bool { 27 | if ((inst->id() == ARM_INS_LDR || 28 | inst->id() == ARM_INS_VLDR) && 29 | inst->detail().arm.operands[1].mem.base == ARM_REG_PC) { 30 | return true; 31 | } 32 | return false; 33 | }; 34 | return cfg_node->getCandidateInstructionsSatisfying(predicate); 35 | } 36 | 37 | addr_t DisassemblyAnalysisHelperARM::recoverLDRSwitchBaseAddr 38 | (const CFGNode &node) const { 39 | const auto &switch_inst = node.maximalBlock()->branchInstruction(); 40 | if (switch_inst->detail().arm.operands[1].mem.base == ARM_REG_PC) { 41 | if (switch_inst->addr() % 4 == 0) { 42 | return switch_inst->addr() + 4; 43 | } else { 44 | return switch_inst->addr() + 6; 45 | } 46 | } else { 47 | for (const auto &inst:node.maximalBlock()->getInstructions()) { 48 | if (inst.id() == ARM_INS_ADR 49 | && (inst.detail().arm.operands[0].reg 50 | == switch_inst->detail().arm.operands[1].mem.base)) { 51 | addr_t base = 52 | inst.addr() + inst.detail().arm.operands[1].imm + 4; 53 | if (base % 4 == 0) { 54 | return base; 55 | } else { 56 | return base - 2; 57 | } 58 | } else if (inst.id() == ARM_INS_ADDW 59 | && (inst.detail().arm.operands[0].reg 60 | == switch_inst->detail().arm.operands[1].mem.base)) { 61 | addr_t base = 62 | inst.addr() + inst.detail().arm.operands[2].imm + 4; 63 | if (base % 4 == 0) { 64 | return base; 65 | } else { 66 | return base - 2; 67 | } 68 | } 69 | } 70 | return 0; 71 | } 72 | } 73 | 74 | unsigned DisassemblyAnalysisHelperARM::getLRStackStoreIndex 75 | (const CFGNode *cfg_node) const noexcept { 76 | auto predicate = [](const MCInst *inst) -> bool { 77 | return inst->id() == ARM_INS_PUSH; 78 | }; 79 | auto stack_pushes = cfg_node->getCandidateInstructionsSatisfying(predicate); 80 | // LR is normally the last one to be saved 81 | for (const auto inst_ptr: stack_pushes) { 82 | for (int i = (inst_ptr->detail().arm.op_count - 1); 83 | i > -1; --i) { 84 | if (inst_ptr->detail().arm.operands[i].reg == ARM_REG_LR) { 85 | return (unsigned) i + 1; 86 | } 87 | } 88 | } 89 | return 0; 90 | } 91 | 92 | bool DisassemblyAnalysisHelperARM::isReturnToCaller 93 | (const MCInst *inst) const noexcept { 94 | if (inst->id() == ARM_INS_B || inst->id() == ARM_INS_BX) { 95 | if (inst->detail().arm.operands[0].reg == ARM_REG_LR) { 96 | return true; 97 | } 98 | } 99 | if (inst->id() == ARM_INS_POP) { 100 | for (int i = inst->detail().arm.op_count - 1; 0 <= i; --i) { 101 | if (inst->detail().arm.operands[i].reg == ARM_REG_PC) { 102 | return true; 103 | } 104 | } 105 | } 106 | // TODO: ldr pc, [sp], imm is another type of return calls? 107 | return false; 108 | } 109 | 110 | bool DisassemblyAnalysisHelperARM::isIndirectTailCall 111 | (const MCInst *inst) const noexcept { 112 | if (inst->id() == ARM_INS_B || inst->id() == ARM_INS_BX) { 113 | if (inst->detail().arm.operands[0].reg != ARM_REG_LR) { 114 | return true; 115 | } 116 | } 117 | return false; 118 | } 119 | } 120 | -------------------------------------------------------------------------------- /src/disasm/analysis/ICFGNode.cpp: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2016 University of Kaiserslautern. 8 | 9 | #include "ICFGNode.h" 10 | #include 11 | 12 | namespace disasm { 13 | 14 | ICFGNode::ICFGNode(): m_entry_node{nullptr} { 15 | } 16 | 17 | ICFGNode::ICFGNode(addr_t entry_addr, 18 | CFGNode *entry_node, 19 | ICFGProcedureType type) : 20 | m_proc_type{type}, 21 | m_valid{false}, 22 | m_non_return_proc{false}, 23 | m_returns_to_caller{false}, 24 | m_entry_node{entry_node}, 25 | m_end_node{nullptr}, 26 | m_entry_addr{entry_addr}, 27 | m_end_addr{0}, 28 | m_estimated_end_addr{0}, 29 | m_lr_store_idx{0}, 30 | m_has_overlap{false} { 31 | 32 | if (m_entry_node == nullptr) { 33 | m_proc_type = ICFGProcedureType::kExternal; 34 | } else { 35 | entry_node->m_role_in_procedure = CFGNodeRoleInProcedure::kEntry; 36 | entry_node->m_procedure_id = entry_addr; 37 | // XXX this should typically be the case. 38 | entry_node->m_candidate_start_addr = entry_addr; 39 | m_end_node = m_entry_node; 40 | m_end_addr = m_entry_node->maximalBlock()->endAddr(); 41 | } 42 | std::ostringstream out; 43 | out << "proc_" << std::hex << m_entry_addr; 44 | m_name = out.str(); 45 | } 46 | 47 | ICFGNode::ICFGNode(CFGNode *entry_node, ICFGProcedureType type) : 48 | m_proc_type{type}, 49 | m_valid{false}, 50 | m_non_return_proc{false}, 51 | m_returns_to_caller{false}, 52 | m_entry_node{entry_node}, 53 | m_end_node{nullptr}, 54 | m_entry_addr{entry_node->getCandidateStartAddr()}, 55 | m_end_addr{0}, 56 | m_estimated_end_addr{0}, 57 | m_lr_store_idx{0}, 58 | m_has_overlap{false} { 59 | 60 | entry_node->m_role_in_procedure = CFGNodeRoleInProcedure::kEntry; 61 | entry_node->m_procedure_id = entry_node->getCandidateStartAddr(); 62 | m_end_node = m_entry_node; 63 | m_end_addr = m_entry_node->maximalBlock()->endAddr(); 64 | std::ostringstream out; 65 | out << "proc_" << std::hex << m_entry_addr; 66 | m_name = out.str(); 67 | } 68 | 69 | size_t ICFGNode::id() const noexcept { 70 | return m_entry_addr; 71 | } 72 | 73 | bool ICFGNode::isWithinEstimatedAddressSpace(const addr_t addr) const noexcept { 74 | return addr < m_estimated_end_addr 75 | && addr >= m_entry_addr; 76 | } 77 | 78 | void ICFGNode::addCaller(const CFGNode *caller) noexcept { 79 | m_callers.push_back(caller); 80 | } 81 | 82 | bool ICFGNode::operator==(const ICFGNode &src) const noexcept { 83 | return this->m_entry_addr == src.m_entry_addr; 84 | } 85 | 86 | bool ICFGNode::operator<(const ICFGNode &src) const noexcept { 87 | return this->m_entry_addr < src.m_entry_addr; 88 | } 89 | 90 | CFGNode *ICFGNode::entryNode() const noexcept { 91 | return m_entry_node; 92 | } 93 | 94 | CFGNode *ICFGNode::endNode() const noexcept { 95 | return m_end_node; 96 | } 97 | 98 | bool ICFGNode::isBuilt() const noexcept { 99 | return m_valid; 100 | } 101 | 102 | bool ICFGNode::isValid() const noexcept { 103 | return m_entry_node != nullptr; 104 | } 105 | 106 | addr_t ICFGNode::entryAddr() const noexcept { 107 | return m_entry_addr; 108 | } 109 | 110 | addr_t ICFGNode::endAddr() const noexcept { 111 | return m_end_addr; 112 | } 113 | 114 | addr_t ICFGNode::estimatedEndAddr() const noexcept { 115 | return m_estimated_end_addr; 116 | } 117 | 118 | void ICFGNode::setName(const char *name) noexcept { 119 | // XXX: assuming name points to well-formed .dynstr section 120 | m_name = name; 121 | } 122 | 123 | void ICFGNode::setNonReturn(bool non_return) noexcept { 124 | m_non_return_proc = non_return; 125 | } 126 | 127 | void ICFGNode::setReturnsToCaller(bool returns_to_caller) noexcept { 128 | m_returns_to_caller = returns_to_caller; 129 | } 130 | 131 | const std::string &ICFGNode::name() const noexcept { 132 | return m_name; 133 | } 134 | 135 | ICFGProcedureType ICFGNode::type() const noexcept { 136 | return m_proc_type; 137 | } 138 | 139 | void ICFGNode::finalize() noexcept { 140 | m_valid = true; 141 | } 142 | 143 | bool ICFGNode::isNonReturnProcedure() const noexcept { 144 | return m_non_return_proc; 145 | } 146 | 147 | bool ICFGNode::isReturnsToCaller() const noexcept { 148 | return m_returns_to_caller; 149 | } 150 | 151 | const std::vector> & 152 | ICFGNode::getExitNodes() const noexcept { 153 | return m_exit_nodes; 154 | } 155 | } 156 | -------------------------------------------------------------------------------- /src/disasm/MaximalBlock.cpp: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2015-2016 University of Kaiserslautern. 8 | 9 | #include "MaximalBlock.h" 10 | #include 11 | 12 | namespace disasm { 13 | 14 | size_t 15 | MaximalBlock::getBasicBlockMemSize(const unsigned int bb_id) const { 16 | assert(bb_id <= m_bblocks.size() 17 | && "Invalid Basic Block Id!!"); 18 | return m_bblocks[bb_id].size(); 19 | } 20 | 21 | bool MaximalBlock::isValid() const { 22 | if (m_bblocks.size() == 0) 23 | return false; 24 | 25 | for (const BasicBlock &block: m_bblocks) 26 | if (!block.isValid()) 27 | return false; 28 | return true; 29 | } 30 | 31 | const std::vector 32 | MaximalBlock::getInstructionsOf(const BasicBlock &bblock) const { 33 | std::vector result; 34 | 35 | auto current = bblock.startAddr(); 36 | for (auto iter = m_insts.cbegin(); iter < m_insts.cend(); ++iter) { 37 | if ((*iter).addr() == current) { 38 | result.push_back(&(*iter)); 39 | current += (*iter).size(); 40 | } 41 | } 42 | return result; 43 | } 44 | 45 | const std::vector & 46 | MaximalBlock::getInstructionAddressesOf(const BasicBlock &bblock) const noexcept { 47 | return bblock.m_inst_addrs; 48 | } 49 | 50 | const std::vector & 51 | MaximalBlock::getInstructionAddressesOf(const BasicBlock *bblock) const noexcept { 52 | return bblock->m_inst_addrs; 53 | } 54 | 55 | addr_t MaximalBlock::addrOfFirstInst() const { 56 | return m_insts.front().addr(); 57 | } 58 | 59 | addr_t MaximalBlock::addrOfLastInst() const { 60 | return m_insts.back().addr(); 61 | } 62 | 63 | const BasicBlock & 64 | MaximalBlock::getBasicBlockAt(const size_t bb_id) const { 65 | return m_bblocks[bb_id]; 66 | } 67 | 68 | size_t 69 | MaximalBlock::getBasicBlocksCount() const { 70 | return m_bblocks.size(); 71 | } 72 | 73 | const std::vector & 74 | MaximalBlock::getBasicBlocks() const { 75 | return m_bblocks; 76 | } 77 | 78 | MaximalBlock::MaximalBlock() { 79 | } 80 | 81 | MaximalBlock::MaximalBlock(size_t id, const BranchData &branch) : 82 | m_id{id}, 83 | m_branch{branch} { 84 | } 85 | 86 | size_t MaximalBlock::id() const { 87 | return m_id; 88 | } 89 | 90 | size_t MaximalBlock::instructionsCount() const { 91 | return m_insts.size(); 92 | } 93 | 94 | 95 | bool MaximalBlock::isWithinAddressSpace(addr_t addr) const { 96 | return addrOfFirstInst() <= addr 97 | && addr < endAddr(); 98 | } 99 | 100 | const std::vector &MaximalBlock::getInstructions() const { 101 | return m_insts; 102 | } 103 | 104 | std::vector &MaximalBlock::getInstructionsRef() { 105 | return m_insts; 106 | } 107 | 108 | const BranchData &MaximalBlock::branchInfo() const { 109 | return m_branch; 110 | } 111 | 112 | void MaximalBlock::setBranchCondition(bool is_conditional) noexcept { 113 | m_branch.m_conditional_branch = is_conditional; 114 | } 115 | 116 | addr_t MaximalBlock::endAddr() const { 117 | return (m_end_addr); 118 | } 119 | 120 | bool MaximalBlock::startOverlapsWith(const MaximalBlock &prev_block) const { 121 | return addrOfFirstInst() < prev_block.endAddr(); 122 | } 123 | 124 | bool MaximalBlock::startOverlapsWith(const MaximalBlock *prev_block) const { 125 | return addrOfFirstInst() < prev_block->endAddr();; 126 | } 127 | 128 | bool MaximalBlock::coversAddressSpaceOf(const MaximalBlock &block) const { 129 | return addrOfFirstInst() < block.addrOfFirstInst() 130 | && endAddr() > block.endAddr(); 131 | } 132 | 133 | bool MaximalBlock::coversAddressSpaceOf(const MaximalBlock *block) const { 134 | return addrOfFirstInst() < block->addrOfFirstInst() 135 | && endAddr() > block->endAddr(); 136 | } 137 | 138 | bool MaximalBlock::isAddressOfInstruction(const addr_t inst_addr) const { 139 | if (inst_addr < addrOfFirstInst() || inst_addr > addrOfLastInst()) { 140 | return false; 141 | } 142 | for (auto it = m_insts.cbegin(); it < m_insts.cend(); ++it) { 143 | if ((*it).addr() == inst_addr) { 144 | return true; 145 | } 146 | } 147 | return false; 148 | } 149 | 150 | BasicBlock *MaximalBlock::ptrToBasicBlockAt(const unsigned bb_id) { 151 | return &(*(m_bblocks.begin() + bb_id)); 152 | } 153 | 154 | bool MaximalBlock::operator==(const MaximalBlock &src) const noexcept { 155 | return this->id() == src.id(); 156 | } 157 | 158 | const MCInst *MaximalBlock::branchInstruction() const noexcept { 159 | return &(m_insts.back()); 160 | } 161 | 162 | bool MaximalBlock::isAppendableBy(const MaximalBlock &block) const noexcept { 163 | return m_end_addr == block.m_insts[0].addr() 164 | || (block.m_insts.size() > 1 && m_end_addr == block.m_insts[1].addr()); 165 | } 166 | } 167 | -------------------------------------------------------------------------------- /src/disasm/analysis/SectionDisassemblyAnalyzerARM.h: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2016 University of Kaiserslautern. 8 | 9 | #pragma once 10 | #include "DisassemblyCFG.h" 11 | #include "DisassemblyCallGraph.h" 12 | #include "DisassemblyAnalysisHelperARM.h" 13 | #include "PLTProcedureMap.h" 14 | #include 15 | #include 16 | 17 | namespace disasm { 18 | 19 | class SectionDisassemblyARM; 20 | class RawInstAnalyzer; 21 | 22 | /** 23 | * SectionDisassemblyAnalyzerARM 24 | */ 25 | class SectionDisassemblyAnalyzerARM { 26 | public: 27 | SectionDisassemblyAnalyzerARM() = delete; 28 | SectionDisassemblyAnalyzerARM 29 | (elf::elf *elf_file, SectionDisassemblyARM *sec_disasm); 30 | virtual ~SectionDisassemblyAnalyzerARM() = default; 31 | SectionDisassemblyAnalyzerARM 32 | (const SectionDisassemblyAnalyzerARM &src) = default; 33 | SectionDisassemblyAnalyzerARM 34 | &operator=(const SectionDisassemblyAnalyzerARM &src) = default; 35 | SectionDisassemblyAnalyzerARM 36 | (SectionDisassemblyAnalyzerARM &&src) = default; 37 | 38 | void buildCFG(); 39 | void refineCFG(); 40 | void buildCallGraph(); 41 | /* 42 | * Search in CFG to find direct successor 43 | */ 44 | CFGNode *findImmediateSuccessor(const CFGNode &cfg_node) noexcept; 45 | /* 46 | * Search in CFG to find remote successor matching target 47 | */ 48 | CFGNode *findRemoteSuccessor(addr_t target) noexcept; 49 | 50 | void RefineMaximalBlocks(const std::vector &known_code_addrs); 51 | bool isValidCodeAddr(addr_t addr) const noexcept; 52 | const DisassemblyCFG &getCFG() const noexcept; 53 | 54 | /* 55 | * returns the sum of instruction count of all predecessors in addition to 56 | * instruction count of current node. 57 | */ 58 | size_t calculateNodeWeight(const CFGNode *node) const noexcept; 59 | 60 | /* 61 | * returns the sum of instruction count of all predecessors that are 62 | * not of type data in addition to instruction count of given basic block. 63 | */ 64 | size_t calculateBasicBlockWeight 65 | (const CFGNode &node, const BasicBlock &basic_block) const noexcept; 66 | 67 | private: 68 | /* 69 | * Finds a valid basic block in and invalidates all direct predecessors that 70 | * do not target it. 71 | */ 72 | void resolveValidBasicBlock(CFGNode &node); 73 | void addConditionalBranchToCFG(CFGNode &node); 74 | void resolveSpaceOverlap(CFGNode &node); 75 | void resolveCFGConflicts 76 | (CFGNode &node, const std::vector &valid_predecessors); 77 | void recoverSwitchStatements(); 78 | void identifyPCRelativeLoadData(); 79 | bool isConditionalBranchAffectedByNodeOverlap 80 | (const CFGNode &node) const noexcept; 81 | private: 82 | // switch table related methods 83 | /* 84 | * returns true if given node is definitely not a switch statement. 85 | */ 86 | bool isNotSwitchStatement(const CFGNode &node) const noexcept; 87 | struct SwitchTableData { 88 | SwitchTableData() = default; 89 | SwitchTableData 90 | (CFGNode *node, unsigned char table_type, addr_t table_end) : 91 | m_node{node}, 92 | m_table_type{table_type}, 93 | m_table_end{table_end} { 94 | } 95 | CFGNode *m_node; 96 | unsigned char m_table_type; 97 | addr_t m_table_end; 98 | }; 99 | using SwitchData = SectionDisassemblyAnalyzerARM::SwitchTableData; 100 | SwitchData recoverTBBSwitchTable(CFGNode &node); 101 | SwitchData recoverTBHSwitchTable(CFGNode &node); 102 | SwitchData recoverLDRSwitchTable(CFGNode &node); 103 | void switchTableCleanUp(SwitchTableData &table_data) noexcept; 104 | int recoverLimitOfSwitchTable(const CFGNode &node) const noexcept; 105 | 106 | private: 107 | // call graph related methods 108 | using AddrCFGNodePairVec = std::vector>; 109 | using AddrICFGNodeMap = std::unordered_map; 110 | void buildProcedure(ICFGNode &proc_node) noexcept; 111 | void traverseProcedureNode 112 | (ICFGNode &proc_node, 113 | CFGNode *cfg_node, 114 | CFGNode *predecessor) noexcept; 115 | void recoverDirectCalledProcedures() noexcept; 116 | addr_t validateProcedure(const ICFGNode &proc) noexcept; 117 | CFGNode *findSwitchTableTarget 118 | (addr_t target_addr); 119 | void addCallReturnRelation(CFGNode &node); 120 | 121 | private: 122 | elf::elf *m_elf_file; 123 | SectionDisassemblyARM *m_sec_disasm; 124 | DisassemblyAnalysisHelperARM m_analyzer; 125 | addr_t m_exec_addr_start; 126 | addr_t m_exec_addr_end; 127 | DisassemblyCFG m_sec_cfg; 128 | DisassemblyCallGraph m_call_graph; 129 | PLTProcedureMap m_plt_map; 130 | }; 131 | } 132 | -------------------------------------------------------------------------------- /src/binutils/elf/to_string.cc: -------------------------------------------------------------------------------- 1 | // Automatically generated by make at Mon Jul 13 12:29:09 CEST 2015 2 | // DO NOT EDIT 3 | 4 | #include "data.hh" 5 | #include "to_hex.hh" 6 | 7 | ELFPP_BEGIN_NAMESPACE 8 | 9 | std::string 10 | to_string(elfclass v) 11 | { 12 | switch (v) { 13 | case elfclass::_32: return "32"; 14 | case elfclass::_64: return "64"; 15 | } 16 | return "(elfclass)0x" + to_hex((int)v); 17 | } 18 | 19 | std::string 20 | to_string(elfdata v) 21 | { 22 | switch (v) { 23 | case elfdata::lsb: return "lsb"; 24 | case elfdata::msb: return "msb"; 25 | } 26 | return "(elfdata)0x" + to_hex((int)v); 27 | } 28 | 29 | std::string 30 | to_string(elfosabi v) 31 | { 32 | switch (v) { 33 | case elfosabi::sysv: return "sysv"; 34 | case elfosabi::hpux: return "hpux"; 35 | case elfosabi::standalone: return "standalone"; 36 | } 37 | return "(elfosabi)0x" + to_hex((int)v); 38 | } 39 | 40 | std::string 41 | to_string(et v) 42 | { 43 | switch (v) { 44 | case et::none: return "none"; 45 | case et::rel: return "rel"; 46 | case et::exec: return "exec"; 47 | case et::dyn: return "dyn"; 48 | case et::core: return "core"; 49 | case et::loos: break; 50 | case et::hios: break; 51 | case et::loproc: break; 52 | case et::hiproc: break; 53 | } 54 | return "(et)0x" + to_hex((int)v); 55 | } 56 | 57 | std::string 58 | to_string(sht v) 59 | { 60 | switch (v) { 61 | case sht::null: return "null"; 62 | case sht::progbits: return "progbits"; 63 | case sht::symtab: return "symtab"; 64 | case sht::strtab: return "strtab"; 65 | case sht::rela: return "rela"; 66 | case sht::hash: return "hash"; 67 | case sht::dynamic: return "dynamic"; 68 | case sht::note: return "note"; 69 | case sht::nobits: return "nobits"; 70 | case sht::rel: return "rel"; 71 | case sht::shlib: return "shlib"; 72 | case sht::dynsym: return "dynsym"; 73 | case sht::loos: break; 74 | case sht::hios: break; 75 | case sht::loproc: break; 76 | case sht::hiproc: break; 77 | } 78 | return "(sht)0x" + to_hex((int)v); 79 | } 80 | 81 | std::string 82 | to_string(shf v) 83 | { 84 | std::string res; 85 | if ((v & shf::write) == shf::write) { res += "write|"; v &= ~shf::write; } 86 | if ((v & shf::alloc) == shf::alloc) { res += "alloc|"; v &= ~shf::alloc; } 87 | if ((v & shf::execinstr) == shf::execinstr) { res += "execinstr|"; v &= ~shf::execinstr; } 88 | if ((v & shf::maskos) == shf::maskos) { res += "maskos|"; v &= ~shf::maskos; } 89 | if ((v & shf::maskproc) == shf::maskproc) { res += "maskproc|"; v &= ~shf::maskproc; } 90 | if (res.empty() || v != (shf)0) res += "(shf)0x" + to_hex((int)v); 91 | else res.pop_back(); 92 | return res; 93 | } 94 | 95 | std::string 96 | to_string(pt v) 97 | { 98 | switch (v) { 99 | case pt::null: return "null"; 100 | case pt::load: return "load"; 101 | case pt::dynamic: return "dynamic"; 102 | case pt::interp: return "interp"; 103 | case pt::note: return "note"; 104 | case pt::shlib: return "shlib"; 105 | case pt::phdr: return "phdr"; 106 | case pt::loos: break; 107 | case pt::hios: break; 108 | case pt::loproc: break; 109 | case pt::hiproc: break; 110 | case pt::gnu_eh_frame: return "gnu_eh_frame"; 111 | case pt::gnu_stack: return "gnu_stack"; 112 | case pt::gnu_relro: return "gnu_relro"; 113 | } 114 | return "(pt)0x" + to_hex((int)v); 115 | } 116 | 117 | std::string 118 | to_string(pf v) 119 | { 120 | std::string res; 121 | if ((v & pf::x) == pf::x) { res += "x|"; v &= ~pf::x; } 122 | if ((v & pf::w) == pf::w) { res += "w|"; v &= ~pf::w; } 123 | if ((v & pf::r) == pf::r) { res += "r|"; v &= ~pf::r; } 124 | if ((v & pf::maskos) == pf::maskos) { res += "maskos|"; v &= ~pf::maskos; } 125 | if ((v & pf::maskproc) == pf::maskproc) { res += "maskproc|"; v &= ~pf::maskproc; } 126 | if (res.empty() || v != (pf)0) res += "(pf)0x" + to_hex((int)v); 127 | else res.pop_back(); 128 | return res; 129 | } 130 | 131 | std::string 132 | to_string(stb v) 133 | { 134 | switch (v) { 135 | case stb::local: return "local"; 136 | case stb::global: return "global"; 137 | case stb::weak: return "weak"; 138 | case stb::loos: break; 139 | case stb::hios: break; 140 | case stb::loproc: break; 141 | case stb::hiproc: break; 142 | } 143 | return "(stb)0x" + to_hex((int)v); 144 | } 145 | 146 | std::string 147 | to_string(stt v) 148 | { 149 | switch (v) { 150 | case stt::notype: return "notype"; 151 | case stt::object: return "object"; 152 | case stt::func: return "func"; 153 | case stt::section: return "section"; 154 | case stt::file: return "file"; 155 | case stt::loos: break; 156 | case stt::hios: break; 157 | case stt::loproc: break; 158 | case stt::hiproc: break; 159 | } 160 | return "(stt)0x" + to_hex((int)v); 161 | } 162 | 163 | ELFPP_END_NAMESPACE 164 | -------------------------------------------------------------------------------- /src/binutils/dwarf/small_vector.hh: -------------------------------------------------------------------------------- 1 | #ifndef _DWARFPP_SMALL_VECTOR_HH_ 2 | #define _DWARFPP_SMALL_VECTOR_HH_ 3 | 4 | DWARFPP_BEGIN_NAMESPACE 5 | 6 | /** 7 | * A vector-like class that only heap allocates above a specified 8 | * size. 9 | */ 10 | template 11 | class small_vector 12 | { 13 | public: 14 | typedef T value_type; 15 | typedef value_type& reference; 16 | typedef const value_type& const_reference; 17 | typedef size_t size_type; 18 | 19 | small_vector() 20 | : base((T*)buf), end(base), cap((T*)&buf[sizeof(T[Min])]) 21 | { 22 | } 23 | 24 | small_vector(const small_vector &o) 25 | : base((T*)buf), end(base), cap((T*)&buf[sizeof(T[Min])]) 26 | { 27 | *this = o; 28 | } 29 | 30 | small_vector(small_vector &&o) 31 | : base((T*)buf), end(base), cap((T*)&buf[sizeof(T[Min])]) 32 | { 33 | if ((char*)o.base == o.buf) { 34 | // Elements are inline; have to copy them 35 | base = (T*)buf; 36 | end = base; 37 | cap = (T*)&buf[sizeof(T[Min])]; 38 | 39 | *this = o; 40 | o.clear(); 41 | } else { 42 | // Elements are external; swap pointers 43 | base = o.base; 44 | end = o.end; 45 | cap = o.cap; 46 | 47 | o.base = (T*)o.buf; 48 | o.end = o.base; 49 | o.cap = (T*)&o.buf[sizeof(T[Min])]; 50 | } 51 | } 52 | 53 | ~small_vector() 54 | { 55 | clear(); 56 | if ((char*)base != buf) 57 | delete[] (char*)base; 58 | } 59 | 60 | small_vector &operator=(const small_vector &o) 61 | { 62 | size_type osize = o.size(); 63 | clear(); 64 | reserve(osize); 65 | for (size_type i = 0; i < osize; i++) 66 | new (&base[i]) T(o[i]); 67 | end = base + osize; 68 | return *this; 69 | } 70 | 71 | size_type size() const 72 | { 73 | return end - base; 74 | } 75 | 76 | bool empty() const 77 | { 78 | return base == end; 79 | } 80 | 81 | void reserve(size_type n) 82 | { 83 | if (n <= (size_type)(cap - base)) 84 | return; 85 | 86 | size_type target = cap - base; 87 | if (target == 0) 88 | target = 1; 89 | while (target < n) 90 | target <<= 1; 91 | 92 | char *newbuf = new char[sizeof(T[target])]; 93 | T *src = base, *dest = (T*)newbuf; 94 | for (; src < end; src++, dest++) { 95 | new(dest) T(*src); 96 | dest->~T(); 97 | } 98 | if ((char*)base != buf) 99 | delete[] (char*)base; 100 | base = (T*)newbuf; 101 | end = dest; 102 | cap = base + target; 103 | } 104 | 105 | reference operator[](size_type n) 106 | { 107 | return base[n]; 108 | } 109 | 110 | const_reference operator[](size_type n) const 111 | { 112 | return base[n]; 113 | } 114 | 115 | reference at(size_type n) 116 | { 117 | return base[n]; 118 | } 119 | 120 | const_reference at(size_type n) const 121 | { 122 | return base[n]; 123 | } 124 | 125 | /** 126 | * "Reverse at". revat(0) is equivalent to back(). revat(1) 127 | * is the element before back. Etc. 128 | */ 129 | reference revat(size_type n) 130 | { 131 | return *(end - 1 - n); 132 | } 133 | 134 | const_reference revat(size_type n) const 135 | { 136 | return *(end - 1 - n); 137 | } 138 | 139 | reference front() 140 | { 141 | return base[0]; 142 | } 143 | 144 | const_reference front() const 145 | { 146 | return base[0]; 147 | } 148 | 149 | reference back() 150 | { 151 | return *(end-1); 152 | } 153 | 154 | const_reference back() const 155 | { 156 | return *(end-1); 157 | } 158 | 159 | void push_back(const T& x) 160 | { 161 | reserve(size() + 1); 162 | new (end) T(x); 163 | end++; 164 | } 165 | 166 | void push_back(T&& x) 167 | { 168 | reserve(size() + 1); 169 | new (end) T(std::move(x)); 170 | end++; 171 | } 172 | 173 | void pop_back() 174 | { 175 | end--; 176 | end->~T(); 177 | } 178 | 179 | void clear() 180 | { 181 | for (T* p = base; p < end; ++p) 182 | p->~T(); 183 | end = base; 184 | } 185 | 186 | private: 187 | char buf[sizeof(T[Min])]; 188 | T *base, *end, *cap; 189 | }; 190 | 191 | DWARFPP_END_NAMESPACE 192 | 193 | #endif 194 | -------------------------------------------------------------------------------- /src/disasm/analysis/CFGNode.h: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2016 University of Kaiserslautern. 8 | 9 | #pragma once 10 | #include "disasm/common.h" 11 | #include "disasm/MaximalBlock.h" 12 | #include "CFGEdge.h" 13 | #include 14 | 15 | namespace disasm { 16 | class ICFGNode; 17 | 18 | enum class CFGNodeType: unsigned char { 19 | kData = 1, 20 | kUnknown = 2, 21 | kCode = 4 22 | }; 23 | 24 | enum class CFGNodeRoleInProcedure: unsigned char { 25 | kUnknown, 26 | kEntry, 27 | kCall, 28 | kExit, // call or tail call that exits the section (e.g, to plt) 29 | kBody, 30 | }; 31 | 32 | enum class NodeTraversalStatus: unsigned char { 33 | kUnvisited, 34 | kVisited, 35 | kFinished 36 | }; 37 | /** 38 | * CFGNode 39 | */ 40 | class CFGNode { 41 | public: 42 | /** 43 | * Construct a CFGNode that is initially not valid. Calling 44 | * methods other than operator= on this results in 45 | * undefined behavior. 46 | */ 47 | CFGNode(); 48 | CFGNode(MaximalBlock *current_block); 49 | virtual ~CFGNode() = default; 50 | CFGNode(const CFGNode &src) = default; 51 | CFGNode &operator=(const CFGNode &src) = default; 52 | CFGNode(CFGNode &&src) = default; 53 | bool operator==(const CFGNode &src) const noexcept; 54 | 55 | const MaximalBlock *maximalBlock() const noexcept; 56 | MaximalBlock *maximalBlockPtr() const noexcept; 57 | const CFGNode *getOverlapNode() const; 58 | size_t id() const noexcept; 59 | addr_t procedure_id() const noexcept; 60 | 61 | void addRemotePredecessor(CFGNode *predecessor, addr_t target_addr); 62 | void addImmediatePredecessor(CFGNode *predecessor, addr_t target_addr); 63 | /* 64 | * should be set only for conditional branches 65 | */ 66 | void setImmediateSuccessor(CFGNode *successor); 67 | /* 68 | * should be valid only for conditional branches 69 | */ 70 | const CFGNode *immediateSuccessor() const; 71 | /* 72 | * should be set for direct branches (conditional/unconditional) 73 | */ 74 | void setRemoteSuccessor(CFGNode *successor); 75 | const CFGNode *remoteSuccessor() const; 76 | 77 | const std::vector &getDirectPredecessors() const noexcept; 78 | const std::vector &getIndirectPredecessors() const noexcept; 79 | const std::vector &getIndirectSuccessors() const noexcept; 80 | bool hasOverlapWithOtherNode() const noexcept; 81 | bool isCandidateStartAddressSet() const noexcept; 82 | bool isProcedureEntry() const noexcept; 83 | /* 84 | * return the sequence of instructions in valid basic block starting from 85 | * the candidate start address. Throws exception in case valid basic block not set. 86 | */ 87 | std::vector getCandidateInstructions() const; 88 | std::vector getCandidateInstructionsSatisfying 89 | (std::function predicate) const; 90 | size_t getCountOfCandidateInstructions() const noexcept; 91 | addr_t getCandidateStartAddr() const noexcept; 92 | void setCandidateStartAddr(addr_t candidate_start) noexcept; 93 | void setType(const CFGNodeType type); 94 | void setToDataAndInvalidatePredecessors(); 95 | void resetCandidateStartAddress(); 96 | CFGNodeType getType() const; 97 | bool isData() const; 98 | bool isCode() const; 99 | bool isSwitchBranchTarget() const noexcept; 100 | /* 101 | * returns true if the branch instruction belongs to the call_group of 102 | * ARM which is BL and BLX. 103 | */ 104 | bool isCall() const noexcept; 105 | void setIsCall(bool value) noexcept; 106 | /* 107 | * returns true if immediate predecessor is a PossibleCall 108 | */ 109 | bool isAlignedToPredecessor() const noexcept; 110 | const CFGNode *getAlignedPredecessor() const noexcept; 111 | /* 112 | * returns a valid value only after recovering switch tables. 113 | */ 114 | bool isSwitchStatement() const noexcept; 115 | bool isCandidateStartAddressValid(addr_t candidate_addr) const noexcept; 116 | bool isAssignedToProcedure() const noexcept; 117 | bool isImmediateSuccessorSet() const noexcept; 118 | CFGNodeRoleInProcedure roleInProcedure() const noexcept; 119 | addr_t getMinTargetAddrOfValidPredecessor() const noexcept; 120 | bool hasPredecessors() const noexcept; 121 | bool isAppendableBy(const CFGNode *cfg_node) const; 122 | CFGNode *getReturnSuccessorNode() const noexcept; 123 | friend class SectionDisassemblyAnalyzerARM; 124 | friend class ICFGNode; 125 | private: 126 | void setMaximalBlock(MaximalBlock *maximal_block) noexcept; 127 | CFGNode *getOverlapNodePtr() const noexcept; 128 | void setAsReturnNodeFrom(CFGNode &cfg_node); 129 | void setAsSwitchCaseFor(CFGNode *cfg_node, const addr_t target_addr); 130 | private: 131 | CFGNodeType m_type; 132 | bool m_is_call; 133 | NodeTraversalStatus m_traversal_status; 134 | CFGNodeRoleInProcedure m_role_in_procedure; 135 | addr_t m_candidate_start_addr; 136 | CFGNode *m_overlap_node; 137 | CFGNode *m_node_appendable_by_this; 138 | addr_t m_procedure_id; // acts as an id for a procedure 139 | CFGNode *m_immediate_successor; 140 | CFGNode *m_remote_successor; 141 | MaximalBlock *m_max_block; 142 | std::vector m_direct_preds; 143 | std::vector m_indirect_preds; 144 | std::vector m_indirect_succs; 145 | }; 146 | } 147 | -------------------------------------------------------------------------------- /src/disasm/analysis/PLTProcedureMap.cpp: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2016 University of Kaiserslautern. 8 | 9 | #include "PLTProcedureMap.h" 10 | #include 11 | #include 12 | 13 | namespace disasm { 14 | 15 | PLTProcedureMap::PLTProcedureMap(const elf::elf *elf_file) : 16 | m_elf_file{elf_file} { 17 | 18 | std::vector dyn_func_names; 19 | // ELF standard: sections and segments have no specified order 20 | for (const auto &sec : m_elf_file->sections()) { 21 | if (sec.get_name() == ".dynsym") { 22 | auto dynsymtab = sec.as_symtab(); 23 | size_t len; 24 | for (auto sym : dynsymtab) { 25 | dyn_func_names.push_back(sym.get_name(&len)); 26 | } 27 | break; 28 | } 29 | } 30 | 31 | for (const auto &sec : m_elf_file->sections()) { 32 | if (sec.get_name() == ".rel.plt") { 33 | for (const Elf32_Rel 34 | *rel_iter = static_cast (sec.data()); 35 | rel_iter < reinterpret_cast 36 | (static_cast(sec.data()) + sec.size()); 37 | ++rel_iter) { 38 | auto func_name = dyn_func_names[ELF32_M_SYM(rel_iter->r_info)]; 39 | m_got_proc_name_map.insert({rel_iter->r_offset, func_name}); 40 | } 41 | break; 42 | } 43 | } 44 | for (const auto &sec : m_elf_file->sections()) { 45 | if (sec.get_name() == ".plt") { 46 | m_start_plt_addr = sec.get_hdr().addr; 47 | m_start_plt_code_ptr = static_cast(sec.data()); 48 | m_end_plt_addr = m_start_plt_addr + sec.get_hdr().size; 49 | m_parser.initialize(CS_ARCH_ARM, CS_MODE_ARM, m_end_plt_addr); 50 | break; 51 | } 52 | } 53 | } 54 | 55 | const char *PLTProcedureMap::getName(addr_t proc_entry_addr) const noexcept { 56 | auto res_find_entry = m_addr_got_map.find(proc_entry_addr); 57 | if (res_find_entry != m_addr_got_map.end()) { 58 | auto res_find_proc_name = 59 | m_got_proc_name_map.find((*res_find_entry).second.first); 60 | return (*res_find_proc_name).second; 61 | } 62 | return nullptr; 63 | } 64 | 65 | std::pair PLTProcedureMap::addProcedure 66 | (addr_t proc_entry_addr) noexcept { 67 | // first check if procedure is already found 68 | auto res_addr_got_return = m_addr_got_map.find(proc_entry_addr); 69 | if (res_addr_got_return != m_addr_got_map.end()) { 70 | auto res_got_name = 71 | m_got_proc_name_map.find((*res_addr_got_return).second.first); 72 | return {(*res_got_name).second, (*res_addr_got_return).second.second}; 73 | } 74 | addr_t got_offset = calculateGotOffset(proc_entry_addr); 75 | auto res_got_name = m_got_proc_name_map.find(got_offset); 76 | assert(res_got_name != m_got_proc_name_map.end() 77 | && "Invalid GOT offset calculated!"); 78 | bool non_returning = isNonReturnProcedure((*res_got_name).second); 79 | m_addr_got_map.insert({proc_entry_addr, {got_offset, non_returning}}); 80 | return {(*res_got_name).second, non_returning}; 81 | } 82 | 83 | bool PLTProcedureMap::isNonReturnProcedure(addr_t proc_entry_addr) noexcept { 84 | auto res_find_entry = m_addr_got_map.find(proc_entry_addr); 85 | if (res_find_entry != m_addr_got_map.end()) { 86 | return (*res_find_entry).second.second; 87 | } 88 | return false; 89 | } 90 | 91 | bool PLTProcedureMap::isNonReturnProcedure(const char *proc_name) const noexcept { 92 | // compares procedure name with well-known non-returning procedures 93 | if (strcmp(proc_name, "__assert_fail") == 0) { 94 | return true; 95 | } 96 | if (strcmp(proc_name, "__stack_chk_fail") == 0) { 97 | return true; 98 | } 99 | if (strcmp(proc_name, "_exit") == 0) { 100 | return true; 101 | } 102 | if (strcmp(proc_name, "abort") == 0) { 103 | return true; 104 | } 105 | if (strcmp(proc_name, "exit") == 0) { 106 | return true; 107 | } 108 | return false; 109 | } 110 | 111 | addr_t PLTProcedureMap::calculateGotOffset(addr_t proc_entry_addr) const noexcept { 112 | const uint8_t *code_ptr = 113 | m_start_plt_code_ptr - m_start_plt_addr + proc_entry_addr; 114 | cs_insn inst; 115 | cs_detail detail; 116 | inst.detail = &detail; 117 | size_t size = 12; 118 | m_parser.disasm2(&code_ptr, &size, &proc_entry_addr, &inst); 119 | if (inst.id != ARM_INS_ADD) { 120 | // handling inline veneer that performs a state mode change only 121 | m_parser.disasm2(&code_ptr, &size, &proc_entry_addr, &inst); 122 | size = 8; 123 | } 124 | assert(inst.id == ARM_INS_ADD && "Invalid PLT entry!!"); 125 | // This instruction should actually be ADR 126 | addr_t result = inst.address + 8; // PC value 127 | m_parser.disasm2(&code_ptr, &size, &proc_entry_addr, &inst); 128 | assert(inst.id == ARM_INS_ADD && "Invalid PLT entry!!"); 129 | result += detail.arm.operands[2].imm; 130 | m_parser.disasm2(&code_ptr, &size, &proc_entry_addr, &inst); 131 | assert(inst.id == ARM_INS_LDR && "Invalid PLT entry!!"); 132 | result += detail.arm.operands[1].mem.disp; 133 | return result; 134 | } 135 | 136 | bool PLTProcedureMap::isWithinPLTSection(addr_t addr) const noexcept { 137 | return m_start_plt_addr <= addr && addr < m_end_plt_addr; 138 | } 139 | } 140 | -------------------------------------------------------------------------------- /src/binutils/dwarf/cursor.cc: -------------------------------------------------------------------------------- 1 | #include "internal.hh" 2 | 3 | #include 4 | #include 5 | 6 | using namespace std; 7 | 8 | DWARFPP_BEGIN_NAMESPACE 9 | 10 | int64_t 11 | cursor::sleb128() 12 | { 13 | // Appendix C 14 | uint64_t result = 0; 15 | unsigned shift = 0; 16 | while (pos < sec->end) { 17 | uint8_t byte = *(uint8_t*)(pos++); 18 | result |= (uint64_t)(byte & 0x7f) << shift; 19 | shift += 7; 20 | if ((byte & 0x80) == 0) { 21 | if (shift < sizeof(result)*8 && (byte & 0x40)) 22 | result |= -((uint64_t)1 << shift); 23 | return result; 24 | } 25 | } 26 | underflow(); 27 | return 0; 28 | } 29 | 30 | shared_ptr

31 | cursor::subsection() 32 | { 33 | // Section 7.4 34 | const char *begin = pos; 35 | section_length length = fixed(); 36 | format fmt; 37 | if (length < 0xfffffff0) { 38 | fmt = format::dwarf32; 39 | length += sizeof(uword); 40 | } else if (length == 0xffffffff) { 41 | length = fixed(); 42 | fmt = format::dwarf64; 43 | length += sizeof(uword) + sizeof(uint64_t); 44 | } else { 45 | throw format_error("initial length has reserved value"); 46 | } 47 | pos = begin + length; 48 | return make_shared

(sec->type, begin, length, fmt); 49 | } 50 | 51 | void 52 | cursor::skip_initial_length() 53 | { 54 | switch (sec->fmt) { 55 | case format::dwarf32: 56 | pos += sizeof(uword); 57 | break; 58 | case format::dwarf64: 59 | pos += sizeof(uword) + sizeof(uint64_t); 60 | break; 61 | default: 62 | throw logic_error("cannot skip initial length with unknown format"); 63 | } 64 | } 65 | 66 | section_offset 67 | cursor::offset() 68 | { 69 | switch (sec->fmt) { 70 | case format::dwarf32: 71 | return fixed(); 72 | case format::dwarf64: 73 | return fixed(); 74 | default: 75 | throw logic_error("cannot read offset with unknown format"); 76 | } 77 | } 78 | 79 | void 80 | cursor::string(std::string &out) 81 | { 82 | size_t size; 83 | const char *p = this->cstr(&size); 84 | out.resize(size); 85 | memmove(&out.front(), p, size); 86 | } 87 | 88 | const char * 89 | cursor::cstr(size_t *size_out) 90 | { 91 | // Scan string size 92 | const char *p = pos; 93 | while (pos < sec->end && *pos) 94 | pos++; 95 | if (pos == sec->end) 96 | throw format_error("unterminated string"); 97 | if (size_out) 98 | *size_out = pos - p; 99 | pos++; 100 | return p; 101 | } 102 | 103 | void 104 | cursor::skip_form(DW_FORM form) 105 | { 106 | section_offset tmp; 107 | 108 | // Section 7.5.4 109 | switch (form) { 110 | case DW_FORM::addr: 111 | pos += sec->addr_size; 112 | break; 113 | case DW_FORM::sec_offset: 114 | case DW_FORM::ref_addr: 115 | case DW_FORM::strp: 116 | switch (sec->fmt) { 117 | case format::dwarf32: 118 | pos += 4; 119 | break; 120 | case format::dwarf64: 121 | pos += 8; 122 | break; 123 | case format::unknown: 124 | throw logic_error("cannot read form with unknown format"); 125 | } 126 | break; 127 | 128 | // size+data forms 129 | case DW_FORM::block1: 130 | tmp = fixed(); 131 | pos += tmp; 132 | break; 133 | case DW_FORM::block2: 134 | tmp = fixed(); 135 | pos += tmp; 136 | break; 137 | case DW_FORM::block4: 138 | tmp = fixed(); 139 | pos += tmp; 140 | break; 141 | case DW_FORM::block: 142 | case DW_FORM::exprloc: 143 | tmp = uleb128(); 144 | pos += tmp; 145 | break; 146 | 147 | // fixed-length forms 148 | case DW_FORM::flag_present: 149 | break; 150 | case DW_FORM::flag: 151 | case DW_FORM::data1: 152 | case DW_FORM::ref1: 153 | pos += 1; 154 | break; 155 | case DW_FORM::data2: 156 | case DW_FORM::ref2: 157 | pos += 2; 158 | break; 159 | case DW_FORM::data4: 160 | case DW_FORM::ref4: 161 | pos += 4; 162 | break; 163 | case DW_FORM::data8: 164 | case DW_FORM::ref_sig8: 165 | pos += 8; 166 | break; 167 | 168 | // variable-length forms 169 | case DW_FORM::sdata: 170 | case DW_FORM::udata: 171 | case DW_FORM::ref_udata: 172 | while (pos < sec->end && (*(uint8_t*)pos & 0x80)) 173 | pos++; 174 | pos++; 175 | break; 176 | case DW_FORM::string: 177 | while (pos < sec->end && *pos) 178 | pos++; 179 | pos++; 180 | break; 181 | 182 | case DW_FORM::indirect: 183 | skip_form((DW_FORM)uleb128()); 184 | break; 185 | 186 | default: 187 | throw format_error("unknown form " + to_string(form)); 188 | } 189 | } 190 | 191 | void 192 | cursor::underflow() 193 | { 194 | throw underflow_error("cannot read past end of DWARF section"); 195 | } 196 | 197 | DWARFPP_END_NAMESPACE 198 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Spedi 2 | 3 | Spedi is a speculative disassembler for the variable-size Thumb ISA. 4 | Given an ELF file as input, Spedi can: 5 | 6 | - Recover correct assembly instructions. 7 | - Recover targets of switch jumps tables. 8 | - Identify functions in the binary and their call graph. 9 | 10 | Spedi works directly on the binary without using symbol information. We found Spedi 11 | to outperform IDA Pro in our experiments. 12 | 13 | # Idea 14 | 15 | Spedi recovers all possible Basic Blocks (BBs) available in the binary. BBs that 16 | share the same jump instruction are grouped in one Maximal Block (MB). Then, MBs 17 | are refined using overlap and CFG conflict analysis. 18 | Details can be found in our CASES'16 paper "*Speculative disassembly of binary code*". 19 | The paper is available [here]. 20 | 21 | # Result summary 22 | 23 | 24 | Instructions

25 | 26 | 27 | Spedi (almost) perfectly recovers assembly instructions from our benchmarks binaries 28 | with 99.96% average. In comparison, IDA Pro has an average of 95.83% skewed by the 29 | relative poor performance on *sha* benchmark. 30 | 31 | 32 | Callgraph

33 | 34 | 35 | Spedi precisely recovers 97.46% of functions on average. That is, it identifies 36 | the correct start address and end address. Compare that to 40.53% average achieved by IDA Pro. 37 | 38 | 39 | Disassembly time

40 | 41 | 42 | A nice property of our technique is that it's also fast and scales well with increased 43 | benchmark size. For example, spedi disassembles *du* (50K instructions) in about 150 ms. 44 | Note that there is good room for further optimizations. 45 | 46 | # Citing 47 | 48 | To cite Spedi in an academic work please use: 49 | 50 | ``` 51 | @inproceedings{BenKhadraSK2016, 52 | author = {Ben Khadra, M. Ammar and Stoffel, Dominik and Kunz, Wolfgang}, 53 | title = {Speculative Disassembly of Binary Code}, 54 | booktitle = {Proceedings of the International Conference on Compilers, Architectures and Synthesis for Embedded Systems}, 55 | year = {2016}, 56 | location = {Pittsburgh, Pennsylvania}, 57 | articleno = {16}, 58 | doi = {10.1145/2968455.2968505}, 59 | acmid = {2968505}, 60 | publisher = {ACM}, 61 | } 62 | ``` 63 | # Usage 64 | Build the project and try it on one of the binaries in our benchmark suite 65 | available in this [repository]. 66 | 67 | The following command will instruct `spedi` to speculatively disassemble 68 | the `.text` section, 69 | 70 | ```sh 71 | $ ./spedi -t -s -f $FILE > speculative.inst 72 | ``` 73 | 74 | Use the following command to disassemble the `.text` section 75 | based on ARM code mapping symbols which provides the ground truth about correct instructions, 76 | 77 | ```sh 78 | $ ./spedi -t -f $FILE > correct.inst 79 | ``` 80 | The easiest way to compare both outputs is by using, 81 | 82 | ```sh 83 | $ diff -y correct.inst speculative.inst |less 84 | ``` 85 | 86 | Currently, you need to manually modify `main.cpp` to show results related to 87 | switch table and call-graph recovery. 88 | 89 | # Road map 90 | 91 | This tool is an academic proof-of-concept. Currently, it's not on our priority list. 92 | However, there are certain features that we have in mind for the future, namely: 93 | 94 | - **Mixed-mode ARM/Thumb disassembly**. The general idea of speculative disassembly 95 | provides the framework to do this. 96 | Basically, one needs to speculatively disassemble a code region 97 | twice. One time in Thumb mode, and a second time in ARM mode. Later, mode 98 | switching instructions (mainly `bx` and `blx`) should be analyzed. This [paper] 99 | provides some related details. 100 | - **Support for x86/x64**. Thumb and similar RISC-like ISA have limited variability. 101 | Basically, instructions can either be 2 or 4 bytes width. We need to push the 102 | challenge even further by supporting x86/x64. To this end, overlap analysis might 103 | span more than two MB which complicates things. Current Maximal Block data structure 104 | is not efficient to do this. 105 | - **Refactorings**. The code is tightly coupled with our ELF reader. Also, it is 106 | specific to Thumb ISA. We need to make it more modular to support other ISAs and binary formats. 107 | - **ELF Reader**: Our ELF reader is based on [libelfin]. We inherit some memory 108 | leakage issues. Additionally, the reader might crash on binaries with DWARF 109 | debug info. These issues needs to be addressed either in upstream or directly here. 110 | 111 | # Related work 112 | 113 | Recently, Andriesse et. al. have been working on `Nucleus`, a tool for function 114 | identification in x64 binaries. Their paper "*Compiler-Agnostic Function Detection in Binaries*" 115 | was accepted at IEEE Euro S&P 2017. They use more or less the same function identification 116 | techniques implemented in Spedi. If you are interested in x64 support, you can 117 | have a look at their [tool]. 118 | 119 | Note, however, that their tool is based on the assumption that recent x64 120 | compilers allocate jump-table data in `.rodata` section. 121 | That makes instruction recovery significantly easier since it can be done reliably with 122 | linear sweep. In comparison, Spedi handles the more general case of mixed code/data 123 | using speculative disassembly. 124 | 125 | 126 | # Dependencies 127 | 128 | This project depends on [Capstone] disassembly library (v3.0.4). 129 | 130 | [here]: 131 | [Capstone]: 132 | [repository]: 133 | [paper]: 134 | [libelfin]: 135 | [tool]: 136 | -------------------------------------------------------------------------------- /src/binutils/dwarf/die.cc: -------------------------------------------------------------------------------- 1 | #include "internal.hh" 2 | 3 | using namespace std; 4 | 5 | DWARFPP_BEGIN_NAMESPACE 6 | 7 | die::die(const unit *cu) 8 | : cu(cu), abbrev(nullptr) 9 | { 10 | } 11 | 12 | const unit & 13 | die::get_unit() const 14 | { 15 | return *cu; 16 | } 17 | 18 | section_offset 19 | die::get_section_offset() const 20 | { 21 | return cu->get_section_offset() + offset; 22 | } 23 | 24 | void 25 | die::read(section_offset off) 26 | { 27 | cursor cur(cu->data(), off); 28 | 29 | offset = off; 30 | 31 | abbrev_code acode = cur.uleb128(); 32 | if (acode == 0) { 33 | abbrev = nullptr; 34 | next = cur.get_section_offset(); 35 | return; 36 | } 37 | abbrev = &cu->get_abbrev(acode); 38 | 39 | tag = abbrev->tag; 40 | 41 | // XXX We can pre-compute almost all of this work in the 42 | // abbrev_entry. 43 | attrs.clear(); 44 | attrs.reserve(abbrev->attributes.size()); 45 | for (auto &attr : abbrev->attributes) { 46 | attrs.push_back(cur.get_section_offset()); 47 | cur.skip_form(attr.form); 48 | } 49 | next = cur.get_section_offset(); 50 | } 51 | 52 | bool 53 | die::has(DW_AT attr) const 54 | { 55 | if (!abbrev) 56 | return false; 57 | // XXX Totally lame 58 | for (auto &a : abbrev->attributes) 59 | if (a.name == attr) 60 | return true; 61 | return false; 62 | } 63 | 64 | value 65 | die::operator[](DW_AT attr) const 66 | { 67 | // XXX We can pre-compute almost all of this work in the 68 | // abbrev_entry. 69 | if (abbrev) { 70 | int i = 0; 71 | for (auto &a : abbrev->attributes) { 72 | if (a.name == attr) 73 | return value(cu, a.name, a.form, a.type, attrs[i]); 74 | i++; 75 | } 76 | } 77 | throw out_of_range("DIE does not have attribute " + to_string(attr)); 78 | } 79 | 80 | value 81 | die::resolve(DW_AT attr) const 82 | { 83 | // DWARF4 section 2.13, DWARF4 section 3.3.8 84 | 85 | // DWARF4 is unclear about what to do when there's both a 86 | // DW_AT::specification and a DW_AT::abstract_origin. 87 | // Conceptually, though, a concrete inlined instance cannot 88 | // itself complete an external function that wasn't first 89 | // completed by its abstract instance, so we first try to 90 | // resolve abstract_origin, then we resolve specification. 91 | 92 | // XXX This traverses the abbrevs at least twice and 93 | // potentially several more times 94 | 95 | if (has(attr)) 96 | return (*this)[attr]; 97 | 98 | if (has(DW_AT::abstract_origin)) { 99 | die ao = (*this)[DW_AT::abstract_origin].as_reference(); 100 | if (ao.has(attr)) 101 | return ao[attr]; 102 | if (ao.has(DW_AT::specification)) { 103 | die s = ao[DW_AT::specification].as_reference(); 104 | if (s.has(attr)) 105 | return s[attr]; 106 | } 107 | } else if (has(DW_AT::specification)) { 108 | die s = (*this)[DW_AT::specification].as_reference(); 109 | if (s.has(attr)) 110 | return s[attr]; 111 | } 112 | 113 | return value(); 114 | } 115 | 116 | die::iterator 117 | die::begin() const 118 | { 119 | if (!abbrev || !abbrev->children) 120 | return end(); 121 | return iterator(cu, next); 122 | } 123 | 124 | die::iterator::iterator(const unit *cu, section_offset off) 125 | : d(cu) 126 | { 127 | d.read(off); 128 | } 129 | 130 | die::iterator & 131 | die::iterator::operator++() 132 | { 133 | if (!d.abbrev) 134 | return *this; 135 | 136 | if (!d.abbrev->children) { 137 | // The DIE has no children, so its successor follows 138 | // immediately 139 | d.read(d.next); 140 | } else if (d.has(DW_AT::sibling)) { 141 | // They made it easy on us. Follow the sibling 142 | // pointer. XXX Probably worth optimizing 143 | d = d[DW_AT::sibling].as_reference(); 144 | } else { 145 | // It's a hard-knock life. We have to iterate through 146 | // the children to find the next DIE. 147 | // XXX Particularly unfortunate if the user is doing a 148 | // DFS, since this will result in N^2 behavior. Maybe 149 | // a small cache of terminator locations in the CU? 150 | iterator sub(d.cu, d.next); 151 | while (sub->abbrev) 152 | ++sub; 153 | d.read(sub->next); 154 | } 155 | 156 | return *this; 157 | } 158 | 159 | const vector > 160 | die::attributes() const 161 | { 162 | vector > res; 163 | 164 | if (!abbrev) 165 | return res; 166 | 167 | // XXX Quite slow, especially when using this to traverse an 168 | // entire DIE tree since each DIE will produce a new vector 169 | // (whereas other vectors get reused). Might be worth a 170 | // custom iterator. 171 | int i = 0; 172 | for (auto &a : abbrev->attributes) { 173 | res.push_back(make_pair(a.name, value(cu, a.name, a.form, a.type, attrs[i]))); 174 | i++; 175 | } 176 | return res; 177 | } 178 | 179 | bool 180 | die::operator==(const die &o) const 181 | { 182 | return cu == o.cu && offset == o.offset; 183 | } 184 | 185 | bool 186 | die::operator!=(const die &o) const 187 | { 188 | return !(*this == o); 189 | } 190 | 191 | DWARFPP_END_NAMESPACE 192 | 193 | size_t 194 | std::hash::operator()(const dwarf::die &a) const 195 | { 196 | return hash()(a.cu) ^ 197 | hash()(a.get_unit_offset()); 198 | } 199 | -------------------------------------------------------------------------------- /src/binutils/dwarf/abbrev.cc: -------------------------------------------------------------------------------- 1 | #include "internal.hh" 2 | 3 | using namespace std; 4 | 5 | DWARFPP_BEGIN_NAMESPACE 6 | 7 | static value::type 8 | resolve_type(DW_AT name, DW_FORM form) 9 | { 10 | switch (form) { 11 | case DW_FORM::addr: 12 | return value::type::address; 13 | 14 | case DW_FORM::block: 15 | case DW_FORM::block1: 16 | case DW_FORM::block2: 17 | case DW_FORM::block4: 18 | // Prior to DWARF 4, exprlocs didn't have their own 19 | // form and were represented as blocks. 20 | // XXX Should this be predicated on version? 21 | switch (name) { 22 | case DW_AT::location: 23 | case DW_AT::byte_size: 24 | case DW_AT::bit_offset: 25 | case DW_AT::bit_size: 26 | case DW_AT::string_length: 27 | case DW_AT::lower_bound: 28 | case DW_AT::return_addr: 29 | case DW_AT::bit_stride: 30 | case DW_AT::upper_bound: 31 | case DW_AT::count: 32 | case DW_AT::data_member_location: 33 | case DW_AT::frame_base: 34 | case DW_AT::segment: 35 | case DW_AT::static_link: 36 | case DW_AT::use_location: 37 | case DW_AT::vtable_elem_location: 38 | case DW_AT::allocated: 39 | case DW_AT::associated: 40 | case DW_AT::data_location: 41 | case DW_AT::byte_stride: 42 | return value::type::exprloc; 43 | default: 44 | return value::type::block; 45 | } 46 | 47 | case DW_FORM::data4: 48 | case DW_FORM::data8: 49 | // Prior to DWARF 4, section offsets didn't have their 50 | // own form and were represented as data4 or data8. 51 | // DWARF 3 clarified that types that accepted both 52 | // constants and section offsets were to treat data4 53 | // and data8 as section offsets and other constant 54 | // forms as constants. 55 | // XXX Should this be predicated on version? 56 | switch (name) { 57 | case DW_AT::location: 58 | case DW_AT::stmt_list: 59 | case DW_AT::string_length: 60 | case DW_AT::return_addr: 61 | case DW_AT::start_scope: 62 | case DW_AT::data_member_location: 63 | case DW_AT::frame_base: 64 | case DW_AT::macro_info: 65 | case DW_AT::segment: 66 | case DW_AT::static_link: 67 | case DW_AT::use_location: 68 | case DW_AT::vtable_elem_location: 69 | case DW_AT::ranges: 70 | goto sec_offset; 71 | default: 72 | // Fall through 73 | break; 74 | } 75 | case DW_FORM::data1: 76 | case DW_FORM::data2: 77 | return value::type::constant; 78 | case DW_FORM::udata: 79 | return value::type::uconstant; 80 | case DW_FORM::sdata: 81 | return value::type::sconstant; 82 | 83 | case DW_FORM::exprloc: 84 | return value::type::exprloc; 85 | 86 | case DW_FORM::flag: 87 | case DW_FORM::flag_present: 88 | return value::type::flag; 89 | 90 | case DW_FORM::ref1: 91 | case DW_FORM::ref2: 92 | case DW_FORM::ref4: 93 | case DW_FORM::ref8: 94 | case DW_FORM::ref_addr: 95 | case DW_FORM::ref_sig8: 96 | case DW_FORM::ref_udata: 97 | return value::type::reference; 98 | 99 | case DW_FORM::string: 100 | case DW_FORM::strp: 101 | return value::type::string; 102 | 103 | case DW_FORM::indirect: 104 | // There's nothing meaningful we can do 105 | return value::type::invalid; 106 | 107 | case DW_FORM::sec_offset: 108 | sec_offset: 109 | // The type of this form depends on the attribute 110 | switch (name) { 111 | case DW_AT::stmt_list: 112 | return value::type::line; 113 | 114 | case DW_AT::location: 115 | case DW_AT::string_length: 116 | case DW_AT::return_addr: 117 | case DW_AT::data_member_location: 118 | case DW_AT::frame_base: 119 | case DW_AT::segment: 120 | case DW_AT::static_link: 121 | case DW_AT::use_location: 122 | case DW_AT::vtable_elem_location: 123 | return value::type::loclist; 124 | 125 | case DW_AT::macro_info: 126 | return value::type::mac; 127 | 128 | case DW_AT::start_scope: 129 | case DW_AT::ranges: 130 | return value::type::rangelist; 131 | 132 | default: 133 | throw format_error("DW_FORM_sec_offset not expected for attribute " + 134 | to_string(name)); 135 | } 136 | } 137 | throw format_error("unknown attribute form " + to_string(form)); 138 | } 139 | 140 | attribute_spec::attribute_spec(DW_AT name, DW_FORM form) 141 | : name(name), form(form), type(resolve_type(name, form)) 142 | { 143 | } 144 | 145 | bool 146 | abbrev_entry::read(cursor *cur) 147 | { 148 | attributes.clear(); 149 | 150 | // Section 7.5.3 151 | code = cur->uleb128(); 152 | if (!code) 153 | return false; 154 | 155 | tag = (DW_TAG)cur->uleb128(); 156 | children = cur->fixed() == DW_CHILDREN::yes; 157 | while (1) { 158 | DW_AT name = (DW_AT)cur->uleb128(); 159 | DW_FORM form = (DW_FORM)cur->uleb128(); 160 | if (name == (DW_AT)0 && form == (DW_FORM)0) 161 | break; 162 | attributes.push_back(attribute_spec(name, form)); 163 | } 164 | attributes.shrink_to_fit(); 165 | return true; 166 | } 167 | 168 | DWARFPP_END_NAMESPACE 169 | -------------------------------------------------------------------------------- /src/binutils/dwarf/internal.hh: -------------------------------------------------------------------------------- 1 | #ifndef _DWARFPP_INTERNAL_HH_ 2 | #define _DWARFPP_INTERNAL_HH_ 3 | 4 | #include "dwarf++.hh" 5 | #include "../elf/to_hex.hh" 6 | 7 | #include 8 | #include 9 | #include 10 | #include 11 | 12 | DWARFPP_BEGIN_NAMESPACE 13 | 14 | enum class format 15 | { 16 | unknown, 17 | dwarf32, 18 | dwarf64 19 | }; 20 | 21 | /** 22 | * A single DWARF section or a slice of a section. This also tracks 23 | * dynamic information necessary to decode values in this section. 24 | */ 25 | struct section 26 | { 27 | section_type type; 28 | const char *begin, *end; 29 | const format fmt; 30 | unsigned addr_size; 31 | 32 | section(section_type type, const void *begin, 33 | section_length length, format fmt = format::unknown, 34 | unsigned addr_size = 0) 35 | : type(type), begin((char*)begin), end((char*)begin + length), 36 | fmt(fmt), addr_size(addr_size) { } 37 | 38 | section(const section &o) = default; 39 | 40 | std::shared_ptr

slice(section_offset start, section_length len, 41 | format fmt = format::unknown, 42 | unsigned addr_size = 0) 43 | { 44 | if (fmt == format::unknown) 45 | fmt = this->fmt; 46 | if (addr_size == 0) 47 | addr_size = this->addr_size; 48 | 49 | return std::make_shared

( 50 | type, begin+start, 51 | std::min(len, (section_length)(end-begin)), 52 | fmt, addr_size); 53 | } 54 | 55 | size_t size() const 56 | { 57 | return end - begin; 58 | } 59 | }; 60 | 61 | /** 62 | * A cursor pointing into a DWARF section. Provides deserialization 63 | * operations and bounds checking. 64 | */ 65 | struct cursor 66 | { 67 | // XXX There's probably a lot of overhead to maintaining the 68 | // shared pointer to the section from this. Perhaps the rule 69 | // should be that all objects keep the dwarf::impl alive 70 | // (directly or indirectly) and that keeps the loader alive, 71 | // so a cursor just needs a regular section*. 72 | 73 | std::shared_ptr

sec; 74 | const char *pos; 75 | 76 | cursor() 77 | : pos(nullptr) { } 78 | cursor(const std::shared_ptr

sec, section_offset offset = 0) 79 | : sec(sec), pos(sec->begin + offset) { } 80 | 81 | /** 82 | * Read a subsection. The cursor must be at an initial 83 | * length. After, the cursor will point just past the end of 84 | * the subsection. The returned section has the appropriate 85 | * DWARF format and begins at the current location of the 86 | * cursor (so this is usually followed by a 87 | * skip_initial_length). 88 | */ 89 | std::shared_ptr

subsection(); 90 | std::int64_t sleb128(); 91 | section_offset offset(); 92 | void string(std::string &out); 93 | const char *cstr(size_t *size_out = nullptr); 94 | 95 | void 96 | ensure(section_offset bytes) 97 | { 98 | if ((section_offset)(sec->end - pos) < bytes || pos >= sec->end) 99 | underflow(); 100 | } 101 | 102 | template 103 | T fixed() 104 | { 105 | ensure(sizeof(T)); 106 | T val = *(T*)pos; 107 | pos += sizeof(T); 108 | return val; 109 | } 110 | 111 | std::uint64_t uleb128() 112 | { 113 | // Appendix C 114 | // XXX Pre-compute all two byte ULEB's 115 | std::uint64_t result = 0; 116 | int shift = 0; 117 | while (pos < sec->end) { 118 | uint8_t byte = *(uint8_t*)(pos++); 119 | result |= (uint64_t)(byte & 0x7f) << shift; 120 | if ((byte & 0x80) == 0) 121 | return result; 122 | shift += 7; 123 | } 124 | underflow(); 125 | return 0; 126 | } 127 | 128 | taddr address() 129 | { 130 | switch (sec->addr_size) { 131 | case 1: 132 | return fixed(); 133 | case 2: 134 | return fixed(); 135 | case 4: 136 | return fixed(); 137 | case 8: 138 | return fixed(); 139 | default: 140 | throw std::runtime_error("address size " + std::to_string(sec->addr_size) + " not supported"); 141 | } 142 | } 143 | 144 | void skip_initial_length(); 145 | void skip_form(DW_FORM form); 146 | 147 | cursor &operator+=(section_offset offset) 148 | { 149 | pos += offset; 150 | return *this; 151 | } 152 | 153 | cursor operator+(section_offset offset) const 154 | { 155 | return cursor(sec, pos + offset); 156 | } 157 | 158 | bool operator<(const cursor &o) const 159 | { 160 | return pos < o.pos; 161 | } 162 | 163 | bool end() const 164 | { 165 | return pos >= sec->end; 166 | } 167 | 168 | bool valid() const 169 | { 170 | return !!pos; 171 | } 172 | 173 | section_offset get_section_offset() const 174 | { 175 | return pos - sec->begin; 176 | } 177 | 178 | private: 179 | cursor(const std::shared_ptr

sec, const char *pos) 180 | : sec(sec), pos(pos) { } 181 | 182 | void underflow(); 183 | }; 184 | 185 | /** 186 | * An attribute specification in an abbrev. 187 | */ 188 | struct attribute_spec 189 | { 190 | DW_AT name; 191 | DW_FORM form; 192 | 193 | // Computed information 194 | value::type type; 195 | 196 | attribute_spec(DW_AT name, DW_FORM form); 197 | }; 198 | 199 | typedef std::uint64_t abbrev_code; 200 | 201 | /** 202 | * An entry in .debug_abbrev. 203 | */ 204 | struct abbrev_entry 205 | { 206 | abbrev_code code; 207 | DW_TAG tag; 208 | bool children; 209 | std::vector attributes; 210 | 211 | abbrev_entry() : code(0) { } 212 | 213 | bool read(cursor *cur); 214 | }; 215 | 216 | /** 217 | * A section header in .debug_pubnames or .debug_pubtypes. 218 | */ 219 | struct name_unit 220 | { 221 | uhalf version; 222 | section_offset debug_info_offset; 223 | section_length debug_info_length; 224 | // Cursor to the first name_entry in this unit. This cursor's 225 | // section is limited to this unit. 226 | cursor entries; 227 | 228 | void read(cursor *cur) 229 | { 230 | // Section 7.19 231 | std::shared_ptr

subsec = cur->subsection(); 232 | cursor sub(subsec); 233 | sub.skip_initial_length(); 234 | version = sub.fixed(); 235 | if (version != 2) 236 | throw format_error("unknown name unit version " + std::to_string(version)); 237 | debug_info_offset = sub.offset(); 238 | debug_info_length = sub.offset(); 239 | entries = sub; 240 | } 241 | }; 242 | 243 | /** 244 | * An entry in a .debug_pubnames or .debug_pubtypes unit. 245 | */ 246 | struct name_entry 247 | { 248 | section_offset offset; 249 | std::string name; 250 | 251 | void read(cursor *cur) 252 | { 253 | offset = cur->offset(); 254 | cur->string(name); 255 | } 256 | }; 257 | 258 | DWARFPP_END_NAMESPACE 259 | 260 | #endif 261 | -------------------------------------------------------------------------------- /src/disasm/RawInstAnalyzer.cpp: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2015-2016 University of Kaiserslautern. 8 | 9 | #include "RawInstAnalyzer.h" 10 | 11 | namespace disasm { 12 | 13 | RawInstAnalyzer::RawInstAnalyzer(ISAType isa) : 14 | m_isa{isa}, 15 | m_inst_width{getMinxInstWidth(isa)} { 16 | } 17 | 18 | bool RawInstAnalyzer::isBranch(const cs_insn *inst) const noexcept { 19 | if (inst->detail == NULL) return false; 20 | 21 | cs_detail *detail = inst->detail; 22 | // assuming that each instruction should belong to at least one group 23 | if (detail->groups[detail->groups_count - 1] == ARM_GRP_JUMP) 24 | return true; 25 | if (inst->id == ARM_INS_POP) { 26 | // pop accepts a register list. If pc was among them then this a branch 27 | for (int i = 0; i < detail->arm.op_count; ++i) { 28 | if (detail->arm.operands[i].reg == ARM_REG_PC) return true; 29 | } 30 | } 31 | 32 | if ((detail->arm.operands[0].type == ARM_OP_REG) 33 | && (detail->arm.operands[0].reg == ARM_REG_PC)) { 34 | if (inst->id == ARM_INS_STR) { 35 | return false; 36 | } 37 | return true; 38 | } 39 | return false; 40 | } 41 | 42 | bool RawInstAnalyzer::isValid(const cs_insn *inst) const { 43 | for (int i = 0; i < inst->detail->arm.op_count; ++i) { 44 | if (inst->detail->arm.operands[i].type == ARM_OP_REG) { 45 | if (inst->detail->arm.operands[i].reg == ARM_REG_PC) { 46 | // We do no apply PC, SP restrictions to load instructions 47 | if (ARM_INS_LDA <= inst->id && inst->id <= ARM_INS_LDR) { 48 | continue; 49 | } 50 | // PC usage restrictions based on manual A2-46 51 | // and Table D9.5 details. 52 | 53 | // XXX: use of pc in 16 bit add is deprecated D9.5 54 | switch (inst->id) { 55 | case ARM_INS_ADD: 56 | case ARM_INS_ADDW: 57 | case ARM_INS_ADR: 58 | case ARM_INS_BX: 59 | case ARM_INS_BLX: 60 | case ARM_INS_MOV: 61 | case ARM_INS_SUB: 62 | case ARM_INS_SUBW: 63 | case ARM_INS_SUBS: 64 | case ARM_INS_MOVS: 65 | case ARM_INS_POP: 66 | // XXX more restrictions can be applied here 67 | break; 68 | default: 69 | // TODO: make logging consistent and configurable 70 | // printf("Found invalid pc at 0x%lx, %s, %s\n", 71 | // inst->address, 72 | // inst->mnemonic, 73 | // inst->op_str); 74 | return false; 75 | } 76 | } else if (inst->detail->arm.operands[i].reg == ARM_REG_SP) { 77 | // We do not apply PC, SP restrictions to load instructions 78 | if (ARM_INS_LDA <= inst->id && inst->id <= ARM_INS_LDR) { 79 | continue; 80 | } 81 | // restrictions on SP usage 82 | switch (inst->id) { 83 | case ARM_INS_MOV: 84 | case ARM_INS_ADD: 85 | case ARM_INS_ADDW: 86 | case ARM_INS_SUB: 87 | case ARM_INS_SUBW: 88 | case ARM_INS_CMN: 89 | case ARM_INS_CMP: 90 | break; 91 | default: 92 | // we allow stores to use SP 93 | if (!(ARM_INS_STMDA <= inst->id 94 | && inst->id <= ARM_INS_STR)) { 95 | // printf("Found invalid sp at 0x%lx, %s, %s\n", 96 | // inst->address, 97 | // inst->mnemonic, 98 | // inst->op_str); 99 | return false; 100 | } 101 | break; 102 | } 103 | } 104 | } else if (inst->detail->arm.operands[i].type == ARM_OP_CIMM 105 | || inst->detail->arm.operands[i].type == ARM_OP_PIMM) { 106 | // printf("Found invalid co-register at 0x%lx, %s, %s\n", 107 | // inst->address, 108 | // inst->mnemonic, 109 | // inst->op_str); 110 | return false; 111 | } 112 | // else if (!( 113 | // ((ARM_REG_R0 <= inst->detail->arm.operands[i].reg) && 114 | // (inst->detail->arm.operands[i].reg <= ARM_REG_R12)) 115 | // || inst->detail->arm.operands[i].reg == ARM_REG_LR)) { 116 | // // XXX: using unusual registers such as co-processor registers 117 | // // is currently allowed. For example, we do not allow access 118 | // // to registers of system control co-processor (CP15). Note that 119 | // // only some instructions like MRC, MCREQ & MCR can use 120 | // // co-processor registers 121 | // printf("Found invalid register at 0x%lx, %s, %s\n", 122 | // inst->address, 123 | // inst->mnemonic, 124 | // inst->op_str); 125 | // return false; 126 | // } 127 | } 128 | return true; 129 | } 130 | 131 | bool RawInstAnalyzer::isConditional(const cs_insn *inst) const { 132 | return inst->detail->arm.cc != ARM_CC_AL; 133 | } 134 | 135 | ISAInstWidth RawInstAnalyzer::getMinxInstWidth(ISAType isa) const { 136 | switch (isa) { 137 | case ISAType::kx86: 138 | case ISAType::kx86_64: 139 | return ISAInstWidth::kByte; 140 | case ISAType::kThumb: 141 | case ISAType::kTriCore: 142 | return ISAInstWidth::kHWord; 143 | default: 144 | return ISAInstWidth::kWord; 145 | } 146 | } 147 | 148 | const std::string RawInstAnalyzer::conditionCodeToString(const arm_cc &condition) const { 149 | switch (condition) { 150 | case ARM_CC_INVALID: 151 | return "Invalid"; 152 | case ARM_CC_EQ: 153 | return "Equal"; 154 | case ARM_CC_NE: 155 | return "Not equal"; 156 | case ARM_CC_HS: 157 | return "Carry set"; 158 | case ARM_CC_LO: 159 | return "Carry clear"; 160 | case ARM_CC_MI: 161 | return "Minus"; 162 | case ARM_CC_PL: 163 | return "Plus"; 164 | case ARM_CC_VS: 165 | return "Overflow"; 166 | case ARM_CC_VC: 167 | return "No overflow"; 168 | case ARM_CC_HI: 169 | return "Unsigned higher"; 170 | case ARM_CC_LS: 171 | return "Unsigned lower or same"; 172 | case ARM_CC_GE: 173 | return "Greater than or equal"; 174 | case ARM_CC_LT: 175 | return "Less than"; 176 | case ARM_CC_GT: 177 | return "Greater than"; 178 | case ARM_CC_LE: 179 | return "Less than or equal"; 180 | case ARM_CC_AL: 181 | return "Always"; 182 | default: 183 | return "Unknown"; 184 | } 185 | } 186 | bool RawInstAnalyzer::isDirectBranch(const cs_insn *inst) const { 187 | if (inst->id == ARM_INS_CBZ || inst->id == ARM_INS_CBNZ) { 188 | return true; 189 | } 190 | if (inst->detail->arm.op_count == 1 191 | && inst->detail->arm.operands[0].type == ARM_OP_IMM) { 192 | return true; 193 | } 194 | return false; 195 | } 196 | void RawInstAnalyzer::setISA(const ISAType isa) { 197 | m_isa = isa; 198 | m_inst_width = getMinxInstWidth(isa); 199 | } 200 | void RawInstAnalyzer::changeModeTo(const ISAType &isa) { 201 | if (isa == ISAType::kARM) { 202 | m_isa = ISAType::kARM; 203 | } else { 204 | m_isa = ISAType::kThumb; 205 | } 206 | } 207 | } 208 | -------------------------------------------------------------------------------- /src/disasm/analysis/DisassemblyCallGraph.cpp: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2016 University of Kaiserslautern. 8 | 9 | #include "DisassemblyCallGraph.h" 10 | #include 11 | #include 12 | #include 13 | 14 | namespace disasm { 15 | 16 | DisassemblyCallGraph::DisassemblyCallGraph 17 | (addr_t sec_start_addr, addr_t sec_end_addr) : 18 | m_section_start_addr{sec_start_addr}, 19 | m_section_end_addr{sec_end_addr}, 20 | m_call_graph_ordered{false} { 21 | } 22 | 23 | void DisassemblyCallGraph::setSectionStartAddr(addr_t sec_start_addr) noexcept { 24 | m_section_start_addr = sec_start_addr; 25 | } 26 | 27 | void DisassemblyCallGraph::setSectionEndAddr(addr_t sec_end_addr) noexcept { 28 | m_section_end_addr = sec_end_addr; 29 | } 30 | 31 | ICFGNode *DisassemblyCallGraph::insertProcedure 32 | (const addr_t entry_addr, CFGNode *entry_node, ICFGProcedureType type) { 33 | 34 | auto result = m_call_graph_map.insert({entry_addr, nullptr}); 35 | if (result.second) { 36 | if (type == ICFGProcedureType::kExternal) { 37 | m_external_procs.emplace_back 38 | (ICFGNode(entry_addr, entry_node, type)); 39 | return &(m_external_procs.back()); 40 | } else { 41 | m_unmerged_procs.emplace_back 42 | (ICFGNode(entry_addr, entry_node, type)); 43 | return &(m_unmerged_procs.back()); 44 | } 45 | } 46 | return nullptr; 47 | } 48 | 49 | void DisassemblyCallGraph::AddProcedure 50 | (const addr_t entry_addr, 51 | CFGNode *entry_node, 52 | ICFGProcedureType proc_type) { 53 | 54 | auto result = m_call_graph_map.insert({entry_addr, nullptr}); 55 | if (result.second) { 56 | if (proc_type == ICFGProcedureType::kExternal) { 57 | m_external_procs.emplace_back 58 | (ICFGNode(entry_addr, entry_node, proc_type)); 59 | } else { 60 | m_unmerged_procs.emplace_back 61 | (ICFGNode(entry_addr, entry_node, proc_type)); 62 | } 63 | } 64 | } 65 | 66 | ICFGNode DisassemblyCallGraph::createProcedure 67 | (const addr_t entry_addr, CFGNode *entry_node) noexcept { 68 | return ICFGNode(entry_addr, entry_node, ICFGProcedureType::kDirectlyCalled); 69 | } 70 | 71 | void DisassemblyCallGraph::buildCallGraph() noexcept { 72 | for (auto &proc : m_unmerged_procs) { 73 | m_main_procs.push_back(proc); 74 | } 75 | m_unmerged_procs.clear(); 76 | // if func doesn't cover next && func has no overlap && func has no internal tail call 77 | // then pass 78 | // if cover next doesn't overlap with this 79 | std::sort(m_main_procs.begin(), m_main_procs.end()); 80 | for (auto proc_iter = m_main_procs.begin(); 81 | proc_iter < m_main_procs.end(); 82 | ++proc_iter) { 83 | for (auto &node_pair : (*proc_iter).m_exit_nodes) { 84 | if (node_pair.first == ICFGExitNodeType::kTailCallOrOverlap) { 85 | if (node_pair.second->remoteSuccessor()->isProcedureEntry()) { 86 | node_pair.first = ICFGExitNodeType::kTailCall; 87 | } else { 88 | node_pair.first = ICFGExitNodeType::kOverlap; 89 | } 90 | } 91 | // If tail call proc (node_pair) 92 | } 93 | prettyPrintProcedure(*proc_iter); 94 | } 95 | // if has invalid node only and node is last 96 | // TODO: restructure call graph 97 | // TODO: add each proc ptr to map, check if tail_calls and overlap persists, 98 | // add caller, callee relation. 99 | m_call_graph_ordered = true; 100 | } 101 | 102 | bool DisassemblyCallGraph::isNonReturnProcedure(const ICFGNode &proc) const noexcept { 103 | if (proc.getExitNodes().size() != 1 104 | || proc.endNode()->remoteSuccessor() != nullptr) { 105 | // procedure is not a non-return procedure 106 | return false; 107 | } 108 | return false; 109 | } 110 | 111 | void DisassemblyCallGraph::checkNonReturnProcedureAndFixCallers 112 | (ICFGNode &proc) const noexcept { 113 | 114 | if (!proc.isReturnsToCaller()) { 115 | for (const auto &type_node_pair : proc.getExitNodes()) { 116 | if (type_node_pair.first != ICFGExitNodeType::kTailCall) { 117 | // indirect branches with unknown destination 118 | return; 119 | } 120 | if (!type_node_pair.second->maximalBlock()->branchInfo().isCall()) { 121 | // branch to procedure that is not well-known non-return procedure 122 | return; 123 | } 124 | } 125 | // TODO: recursively identify non-return procedures? 126 | proc.setNonReturn(true); 127 | for (auto &cfg_edge : proc.entryNode()->getDirectPredecessors()) { 128 | if (cfg_edge.type() == CFGEdgeType::kDirect 129 | && cfg_edge.node()->isCall()) { 130 | cfg_edge.node()->setIsCall(false); 131 | } 132 | } 133 | } 134 | } 135 | 136 | std::vector &DisassemblyCallGraph::buildInitialCallGraph() noexcept { 137 | assert(m_main_procs.size() == 0 && "Initial call graph is not empty!!"); 138 | m_main_procs.swap(m_unmerged_procs); 139 | std::sort(m_main_procs.begin(), m_main_procs.end()); 140 | // XXX: assuming that there is at least one proc 141 | for (auto &proc : m_external_procs) { 142 | m_call_graph_map.at(proc.entryAddr()) = &proc; 143 | } 144 | for (auto proc_iter = m_main_procs.begin(); 145 | proc_iter < m_main_procs.end() - 1; 146 | ++proc_iter) { 147 | (*proc_iter).m_estimated_end_addr = (*(proc_iter + 1)).m_entry_addr; 148 | m_call_graph_map.insert({(*proc_iter).id(), &(*proc_iter)}); 149 | } 150 | m_main_procs.back().m_estimated_end_addr = m_section_end_addr; 151 | m_call_graph_ordered = true; 152 | return m_main_procs; 153 | } 154 | 155 | void DisassemblyCallGraph::reserve(size_t procedure_count) { 156 | m_unmerged_procs.reserve(procedure_count); 157 | m_main_procs.reserve(procedure_count); 158 | m_call_graph_map.reserve(procedure_count); 159 | } 160 | 161 | void DisassemblyCallGraph::prettyPrintProcedure 162 | (const ICFGNode &proc_node) noexcept { 163 | std::cout << std::endl; 164 | printf("Function 0x%lx 0x%lx\n", 165 | proc_node.entryAddr(), 166 | proc_node.m_end_addr); 167 | for (auto &exitNodePair : proc_node.m_exit_nodes) { 168 | switch (exitNodePair.first) { 169 | case ICFGExitNodeType::kInvalidLR: 170 | printf("Exit_invalid node %lu at: 0x%lx /", 171 | exitNodePair.second->id(), 172 | exitNodePair.second->getCandidateStartAddr()); 173 | break; 174 | case ICFGExitNodeType::kTailCall: 175 | printf("Exit_tail_call node %lu at: 0x%lx /", 176 | exitNodePair.second->id(), 177 | exitNodePair.second->getCandidateStartAddr()); 178 | if ((exitNodePair.second->remoteSuccessor() != nullptr) 179 | && !exitNodePair.second->maximalBlock()->branchInfo().isCall()) { 180 | printf("(internal)"); 181 | } 182 | break; 183 | case ICFGExitNodeType::kOverlap: 184 | printf("Exit_overlap node %lu at: 0x%lx /", 185 | exitNodePair.second->id(), 186 | exitNodePair.second->getCandidateStartAddr()); 187 | break; 188 | case ICFGExitNodeType::kTailCallOrOverlap: 189 | printf("Exit_overlap or tail call node %lu at: 0x%lx /", 190 | exitNodePair.second->id(), 191 | exitNodePair.second->getCandidateStartAddr()); 192 | break; 193 | case ICFGExitNodeType::kReturn: 194 | printf("Exit_return node %lu at: 0x%lx /", 195 | exitNodePair.second->id(), 196 | exitNodePair.second->getCandidateStartAddr()); 197 | break; 198 | case ICFGExitNodeType::kIndirect: 199 | printf("Exit_indirect node %lu at: 0x%lx /", 200 | exitNodePair.second->id(), 201 | exitNodePair.second->getCandidateStartAddr()); 202 | break; 203 | } 204 | printf("\n"); 205 | } 206 | printf("Procedure end ...\n"); 207 | } 208 | 209 | addr_t DisassemblyCallGraph::sectionEndAddr() const noexcept { 210 | return m_section_end_addr; 211 | } 212 | } 213 | -------------------------------------------------------------------------------- /src/binutils/dwarf/attrs.cc: -------------------------------------------------------------------------------- 1 | #include "dwarf++.hh" 2 | 3 | using namespace std; 4 | 5 | DWARFPP_BEGIN_NAMESPACE 6 | 7 | #define AT_ANY(name) \ 8 | value at_##name(const die &d) \ 9 | { \ 10 | return d[DW_AT::name]; \ 11 | } \ 12 | static_assert(true, "") 13 | 14 | #define AT_ADDRESS(name) \ 15 | taddr at_##name(const die &d) \ 16 | { \ 17 | return d[DW_AT::name].as_address(); \ 18 | } \ 19 | static_assert(true, "") 20 | 21 | #define AT_ENUM(name, type) \ 22 | type at_##name(const die &d) \ 23 | { \ 24 | return (type)d[DW_AT::name].as_uconstant(); \ 25 | } \ 26 | static_assert(true, "") 27 | 28 | #define AT_FLAG(name) \ 29 | bool at_##name(const die &d) \ 30 | { \ 31 | return d[DW_AT::name].as_flag(); \ 32 | } \ 33 | static_assert(true, "") 34 | 35 | #define AT_FLAG_(name) \ 36 | bool at_##name(const die &d) \ 37 | { \ 38 | return d[DW_AT::name##_].as_flag(); \ 39 | } \ 40 | static_assert(true, "") 41 | 42 | #define AT_REFERENCE(name) \ 43 | die at_##name(const die &d) \ 44 | { \ 45 | return d[DW_AT::name].as_reference(); \ 46 | } \ 47 | static_assert(true, "") 48 | 49 | #define AT_STRING(name) \ 50 | string at_##name(const die &d) \ 51 | { \ 52 | return d[DW_AT::name].as_string(); \ 53 | } \ 54 | static_assert(true, "") 55 | 56 | #define AT_UDYNAMIC(name) \ 57 | uint64_t at_##name(const die &d, expr_context *ctx) \ 58 | { \ 59 | return _at_udynamic(DW_AT::name, d, ctx); \ 60 | } \ 61 | static_assert(true, "") 62 | 63 | static uint64_t _at_udynamic(DW_AT attr, const die &d, expr_context *ctx, int depth = 0) 64 | { 65 | // DWARF4 section 2.19 66 | if (depth > 16) 67 | throw format_error("reference depth exceeded for " + to_string(attr)); 68 | 69 | value v(d[attr]); 70 | switch (v.get_type()) { 71 | case value::type::constant: 72 | case value::type::uconstant: 73 | return v.as_uconstant(); 74 | case value::type::reference: 75 | return _at_udynamic(attr, v.as_reference(), ctx, depth + 1); 76 | case value::type::exprloc: 77 | return v.as_exprloc().evaluate(ctx).value; 78 | default: 79 | throw format_error(to_string(attr) + " has unexpected type " + 80 | to_string(v.get_type())); 81 | } 82 | } 83 | 84 | ////////////////////////////////////////////////////////////////// 85 | // 0x0X 86 | // 87 | 88 | AT_REFERENCE(sibling); 89 | // XXX location 90 | AT_STRING(name); 91 | AT_ENUM(ordering, DW_ORD); 92 | AT_UDYNAMIC(byte_size); 93 | AT_UDYNAMIC(bit_offset); 94 | AT_UDYNAMIC(bit_size); 95 | 96 | ////////////////////////////////////////////////////////////////// 97 | // 0x1X 98 | // 99 | 100 | // XXX stmt_list 101 | AT_ADDRESS(low_pc); 102 | taddr 103 | at_high_pc(const die &d) 104 | { 105 | value v(d[DW_AT::high_pc]); 106 | switch (v.get_type()) { 107 | case value::type::address: 108 | return v.as_address(); 109 | case value::type::constant: 110 | case value::type::uconstant: 111 | return at_low_pc(d) + v.as_uconstant(); 112 | default: 113 | throw format_error(to_string(DW_AT::high_pc) + " has unexpected type " + 114 | to_string(v.get_type())); 115 | } 116 | } 117 | AT_ENUM(language, DW_LANG); 118 | AT_REFERENCE(discr); 119 | AT_ANY(discr_value); // XXX Signed or unsigned 120 | AT_ENUM(visibility, DW_VIS); 121 | AT_REFERENCE(import); 122 | // XXX string_length 123 | AT_REFERENCE(common_reference); 124 | AT_STRING(comp_dir); 125 | AT_ANY(const_value); 126 | AT_REFERENCE(containing_type); 127 | // XXX default_value 128 | 129 | ////////////////////////////////////////////////////////////////// 130 | // 0x2X 131 | // 132 | 133 | DW_INL at_inline(const die &d) 134 | { 135 | // XXX Missing attribute is equivalent to DW_INL_not_inlined 136 | // (DWARF4 section 3.3.8) 137 | return (DW_INL)d[DW_AT::inline_].as_uconstant(); 138 | } 139 | AT_FLAG(is_optional); 140 | AT_UDYNAMIC(lower_bound); // XXX Language-based default? 141 | AT_STRING(producer); 142 | AT_FLAG(prototyped); 143 | // XXX return_addr 144 | // XXX start_scope 145 | AT_UDYNAMIC(bit_stride); 146 | AT_UDYNAMIC(upper_bound); 147 | 148 | ////////////////////////////////////////////////////////////////// 149 | // 0x3X 150 | // 151 | 152 | AT_REFERENCE(abstract_origin); 153 | AT_ENUM(accessibility, DW_ACCESS); 154 | // XXX const address_class 155 | AT_FLAG(artificial); 156 | // XXX base_types 157 | AT_ENUM(calling_convention, DW_CC); 158 | AT_UDYNAMIC(count); 159 | expr_result 160 | at_data_member_location(const die &d, expr_context *ctx, taddr base, taddr pc) 161 | { 162 | value v(d[DW_AT::data_member_location]); 163 | switch (v.get_type()) { 164 | case value::type::constant: 165 | case value::type::uconstant: 166 | return {expr_result::type::address, base + v.as_uconstant()}; 167 | case value::type::exprloc: 168 | return v.as_exprloc().evaluate(ctx, base); 169 | case value::type::loclist: 170 | // XXX 171 | throw std::runtime_error("not implemented"); 172 | default: 173 | throw format_error("DW_AT_data_member_location has unexpected type " + 174 | to_string(v.get_type())); 175 | } 176 | } 177 | // XXX decl_column decl_file decl_line 178 | AT_FLAG(declaration); 179 | // XXX discr_list 180 | AT_ENUM(encoding, DW_ATE); 181 | AT_FLAG(external); 182 | 183 | ////////////////////////////////////////////////////////////////// 184 | // 0x4X 185 | // 186 | 187 | // XXX frame_base 188 | die at_friend(const die &d) 189 | { 190 | return d[DW_AT::friend_].as_reference(); 191 | } 192 | AT_ENUM(identifier_case, DW_ID); 193 | // XXX macro_info 194 | AT_REFERENCE(namelist_item); 195 | AT_REFERENCE(priority); // XXX Computed might be useful 196 | // XXX segment 197 | AT_REFERENCE(specification); 198 | // XXX static_link 199 | AT_REFERENCE(type); 200 | // XXX use_location 201 | AT_FLAG(variable_parameter); 202 | // XXX 7.11 The value DW_VIRTUALITY_none is equivalent to the absence 203 | // of the DW_AT_virtuality attribute. 204 | AT_ENUM(virtuality, DW_VIRTUALITY); 205 | // XXX vtable_elem_location 206 | AT_UDYNAMIC(allocated); 207 | AT_UDYNAMIC(associated); 208 | 209 | ////////////////////////////////////////////////////////////////// 210 | // 0x5X 211 | // 212 | 213 | // XXX data_location 214 | AT_UDYNAMIC(byte_stride); 215 | AT_ADDRESS(entry_pc); 216 | AT_FLAG(use_UTF8); 217 | AT_REFERENCE(extension); 218 | rangelist 219 | at_ranges(const die &d) 220 | { 221 | return d[DW_AT::ranges].as_rangelist(); 222 | } 223 | // XXX trampoline 224 | // XXX const call_column, call_file, call_line 225 | AT_STRING(description); 226 | // XXX const binary_scale 227 | // XXX const decimal_scale 228 | AT_REFERENCE(small); 229 | // XXX const decimal_sign 230 | // XXX const digit_count 231 | 232 | ////////////////////////////////////////////////////////////////// 233 | // 0x6X 234 | // 235 | 236 | AT_STRING(picture_string); 237 | AT_FLAG_(mutable); 238 | AT_FLAG(threads_scaled); 239 | AT_FLAG_(explicit); 240 | AT_REFERENCE(object_pointer); 241 | AT_ENUM(endianity, DW_END); 242 | AT_FLAG(elemental); 243 | AT_FLAG(pure); 244 | AT_FLAG(recursive); 245 | AT_REFERENCE(signature); // XXX Computed might be useful 246 | AT_FLAG(main_subprogram); 247 | // XXX const data_bit_offset 248 | AT_FLAG(const_expr); 249 | AT_FLAG(enum_class); 250 | AT_STRING(linkage_name); 251 | 252 | rangelist 253 | die_pc_range(const die &d) 254 | { 255 | // DWARF4 section 2.17 256 | if (d.has(DW_AT::ranges)) 257 | return at_ranges(d); 258 | taddr low = at_low_pc(d); 259 | taddr high = d.has(DW_AT::high_pc) ? at_high_pc(d) : (low + 1); 260 | return rangelist({{low, high}}); 261 | } 262 | 263 | DWARFPP_END_NAMESPACE 264 | -------------------------------------------------------------------------------- /src/disasm/MaximalBlockBuilder.cpp: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2015-2016 University of Kaiserslautern. 8 | 9 | #include "MaximalBlockBuilder.h" 10 | #include 11 | #include 12 | #include 13 | #include 14 | 15 | namespace disasm { 16 | 17 | MaximalBlockBuilder::MaximalBlockBuilder() : 18 | m_buildable{false}, 19 | m_bb_idx{0}, 20 | m_max_block_idx{0}, 21 | m_end_addr{0} { 22 | } 23 | 24 | std::vector 25 | MaximalBlockBuilder::appendableBasicBlocksAt(const addr_t addr) const { 26 | // XXX: an instruction can be appendable to multiple basic blocks 27 | // that share the same last fragment. 28 | std::vector result; 29 | for (auto &bblock : m_bblocks) { 30 | if (bblock.isAppendableAt(addr)) 31 | result.push_back(bblock.id()); 32 | } 33 | return result; 34 | } 35 | 36 | void 37 | MaximalBlockBuilder::createBasicBlockWith(const cs_insn *inst) { 38 | m_bblocks.emplace_back(BasicBlock(m_bb_idx, inst)); 39 | m_insts.emplace_back(MCInst(inst)); 40 | m_end_addr = inst->address + inst->size; 41 | m_bb_idx++; 42 | } 43 | 44 | void 45 | MaximalBlockBuilder::createValidBasicBlockWith(const cs_insn *inst) { 46 | createBasicBlockWith(inst); 47 | m_bblocks.back().m_valid = true; 48 | setBranch(inst); 49 | } 50 | 51 | MaximalBlock MaximalBlockBuilder::buildResultDirectlyAndReset() { 52 | MaximalBlock result{m_max_block_idx, m_branch}; 53 | // one BB & buildable then put in the result 54 | result.m_bblocks.swap(m_bblocks); 55 | result.m_insts.swap(m_insts); 56 | result.m_end_addr = result.m_insts.back().addr() 57 | + result.m_insts.back().size(); 58 | m_bb_idx = 0; 59 | m_end_addr = 0; 60 | m_buildable = false; 61 | m_max_block_idx++; 62 | return result; 63 | } 64 | 65 | MaximalBlock MaximalBlockBuilder::buildResultFromValidBasicBlocks 66 | (const std::vector &valid_blocks) { 67 | MaximalBlock result{m_max_block_idx, m_branch}; 68 | for (auto block : valid_blocks) { 69 | result.m_bblocks.push_back(*block); 70 | } 71 | // move only valid instructions to result 72 | for (auto inst_iter = m_insts.cbegin(); 73 | inst_iter < m_insts.cend(); ++inst_iter) { 74 | bool inst_valid = false; 75 | for (auto valid_block_iter = valid_blocks.cbegin(); 76 | valid_block_iter < valid_blocks.cend() && !inst_valid; 77 | ++valid_block_iter) { 78 | for (auto addr : 79 | (*valid_block_iter)->getInstructionAddresses()) { 80 | if ((*inst_iter).addr() == addr) { 81 | result.m_insts.push_back(*inst_iter); 82 | inst_valid = true; 83 | break; 84 | } 85 | } 86 | } 87 | } 88 | result.m_end_addr = result.m_insts.back().addr() 89 | + result.m_insts.back().size(); 90 | return result; 91 | } 92 | 93 | MaximalBlock MaximalBlockBuilder::build() { 94 | if (!m_buildable) { 95 | // return an invalid maximal block! 96 | m_max_block_idx++; 97 | return disasm::MaximalBlock(); 98 | } 99 | if (m_bblocks.size() == 1) { 100 | return buildResultDirectlyAndReset(); 101 | } 102 | // classify BBs to valid and overlap the rest (if found) should be discarded 103 | std::vector valid_blocks; 104 | std::vector overlap_blocks; 105 | 106 | for (auto bblock_iter = m_bblocks.begin(); 107 | bblock_iter < m_bblocks.end(); ++bblock_iter) { 108 | if ((*bblock_iter).isValid()) { 109 | valid_blocks.push_back(&(*bblock_iter)); 110 | } else { 111 | // we keep only potential overlapping BBs 112 | if (m_end_addr - (*bblock_iter).endAddr() <= 2) { 113 | overlap_blocks.push_back(&(*bblock_iter)); 114 | } 115 | } 116 | } 117 | // Case of no overlap 118 | if (overlap_blocks.size() == 0) { 119 | if (valid_blocks.size() == m_bblocks.size()) { 120 | // all basic blocks are valid and should be moved to result 121 | return buildResultDirectlyAndReset(); 122 | } else { 123 | MaximalBlock result = buildResultFromValidBasicBlocks(valid_blocks); 124 | // move only valid BB to result 125 | m_bblocks.clear(); 126 | m_insts.clear(); 127 | m_bb_idx = 0; 128 | m_end_addr = 0; 129 | m_buildable = false; 130 | m_max_block_idx++; 131 | return result; 132 | } 133 | } 134 | // Case of BB overlap then MB should maintain overlap BBs and their instructions. 135 | MaximalBlock result = buildResultFromValidBasicBlocks(valid_blocks); 136 | 137 | if (overlap_blocks.size() + result.m_bblocks.size() == m_bblocks.size() 138 | && result.m_insts.size() == 1) { 139 | // optimization for the case of spurious branch instructions. 140 | m_insts.pop_back(); 141 | m_bblocks.pop_back(); 142 | m_bb_idx = overlap_blocks.size(); 143 | m_end_addr = m_insts.back().addr() + m_insts.back().size(); 144 | m_buildable = false; 145 | m_max_block_idx++; 146 | return result; 147 | } 148 | 149 | std::vector insts_buffer; 150 | std::vector bb_buffer; 151 | // TODO: if valid block contain only one inst then optimize for that case 152 | // copy all overlap blocks 153 | for (auto block_iter = overlap_blocks.cbegin(); 154 | block_iter < overlap_blocks.cend(); ++block_iter) { 155 | bb_buffer.push_back(*(*block_iter)); 156 | } 157 | // move only overlap instructions to inst buffer 158 | for (auto inst_iter = m_insts.cbegin(); 159 | inst_iter < m_insts.cend(); ++inst_iter) { 160 | bool is_overlap_inst = false; 161 | for (auto overlap_block_iter = overlap_blocks.cbegin(); 162 | overlap_block_iter < overlap_blocks.cend() && !is_overlap_inst; 163 | ++overlap_block_iter) { 164 | for (auto addr : 165 | (*overlap_block_iter)->getInstructionAddresses()) { 166 | if ((*inst_iter).addr() == addr) { 167 | insts_buffer.push_back(*inst_iter); 168 | is_overlap_inst = true; 169 | break; 170 | } 171 | } 172 | } 173 | } 174 | // keep overlap instructions and blocks 175 | m_insts.swap(insts_buffer); 176 | m_bblocks.swap(bb_buffer); 177 | m_end_addr = m_insts.back().addr() + m_insts.back().size(); 178 | assert(result.m_bblocks.size() > 0 179 | && "No Basic Blocks in Maximal Block!!"); 180 | assert(result.m_insts.size() > 0 181 | && "No Instructions in Maximal Block!!"); 182 | m_bb_idx = overlap_blocks.size(); 183 | m_buildable = false; 184 | m_max_block_idx++; 185 | return result; 186 | } 187 | 188 | void MaximalBlockBuilder::append(const cs_insn *inst) { 189 | if (m_bblocks.size() == 0) { 190 | createBasicBlockWith(inst); 191 | return; 192 | } 193 | // get all appendable BBs 194 | bool appendable = false; 195 | for (auto &bblock : m_bblocks) { 196 | if (bblock.isAppendableBy(inst)) { 197 | bblock.append(inst); 198 | appendable = true; 199 | } 200 | } 201 | if (appendable) { 202 | m_insts.emplace_back(MCInst(inst)); 203 | } else { 204 | createBasicBlockWith(inst); 205 | } 206 | } 207 | 208 | void MaximalBlockBuilder::appendBranch(const cs_insn *inst) { 209 | m_buildable = true; 210 | 211 | if (m_bblocks.size() == 0) { 212 | createValidBasicBlockWith(inst); 213 | return; 214 | } 215 | bool found_appendable = false; 216 | // get all appendable BBs 217 | for (auto &bblock : m_bblocks) { 218 | if (bblock.isAppendableBy(inst)) { 219 | bblock.append(inst); 220 | // a BB that ends with a branch is valid 221 | bblock.m_valid = true; 222 | found_appendable = true; 223 | } 224 | } 225 | 226 | if (found_appendable) { 227 | m_insts.emplace_back(MCInst(inst)); 228 | m_end_addr = inst->address + inst->size; 229 | } else { 230 | createValidBasicBlockWith(inst); 231 | } 232 | setBranch(inst); 233 | } 234 | 235 | void MaximalBlockBuilder::setBranch(const cs_insn *inst) { 236 | cs_detail *detail = inst->detail; 237 | if (inst->id == ARM_INS_CBZ || inst->id == ARM_INS_CBNZ) { 238 | m_branch.m_conditional_branch = true; 239 | m_branch.m_direct_branch = true; 240 | m_branch.m_is_call = false; 241 | m_branch.m_target = static_cast(detail->arm.operands[1].imm); 242 | return; 243 | } 244 | if (inst->id == ARM_INS_BLX || inst->id == ARM_INS_BL) { 245 | m_branch.m_is_call = true; 246 | } else { 247 | m_branch.m_is_call = false; 248 | } 249 | m_branch.m_conditional_branch = (inst->detail->arm.cc != ARM_CC_AL); 250 | if (inst->detail->arm.op_count == 1 251 | && inst->detail->arm.operands[0].type == ARM_OP_IMM) { 252 | m_branch.m_direct_branch = true; 253 | m_branch.m_target = static_cast(detail->arm.operands[0].imm); 254 | return; 255 | } 256 | m_branch.m_direct_branch = false; 257 | } 258 | 259 | bool MaximalBlockBuilder::isCleanReset() { 260 | return !m_buildable && m_bblocks.size() == 0; 261 | } 262 | 263 | const std::vector 264 | MaximalBlockBuilder::getInstructionAddrsOf(const BasicBlock &bblock) const { 265 | return bblock.m_inst_addrs; 266 | } 267 | } 268 | -------------------------------------------------------------------------------- /src/disasm/analysis/CFGNode.cpp: -------------------------------------------------------------------------------- 1 | //===------------------------------------------------------------*- C++ -*-===// 2 | // 3 | // This file is distributed under BSD License. See LICENSE.TXT for details. 4 | // 5 | //===----------------------------------------------------------------------===// 6 | // 7 | // Copyright (c) 2016 University of Kaiserslautern. 8 | 9 | #include "CFGNode.h" 10 | #include 11 | 12 | namespace disasm { 13 | CFGNode::CFGNode() : 14 | m_type{CFGNodeType::kUnknown}, 15 | m_is_call{false}, 16 | m_traversal_status{NodeTraversalStatus::kUnvisited}, 17 | m_role_in_procedure{CFGNodeRoleInProcedure::kUnknown}, 18 | m_candidate_start_addr{0}, 19 | m_overlap_node{nullptr}, 20 | m_node_appendable_by_this{nullptr}, 21 | m_procedure_id{0}, 22 | m_immediate_successor{nullptr}, 23 | m_remote_successor{nullptr}, 24 | m_max_block{nullptr} { 25 | } 26 | 27 | CFGNode::CFGNode(MaximalBlock *current_block) : 28 | m_type{CFGNodeType::kUnknown}, 29 | m_is_call{false}, 30 | m_traversal_status{NodeTraversalStatus::kUnvisited}, 31 | m_role_in_procedure{CFGNodeRoleInProcedure::kUnknown}, 32 | m_candidate_start_addr{0}, 33 | m_overlap_node{nullptr}, 34 | m_node_appendable_by_this{nullptr}, 35 | m_procedure_id{0}, 36 | m_immediate_successor{nullptr}, 37 | m_remote_successor{nullptr}, 38 | m_max_block{current_block} { 39 | } 40 | 41 | void CFGNode::addRemotePredecessor(CFGNode *predecessor, addr_t target_addr) { 42 | m_direct_preds.emplace_back 43 | (CFGEdge(CFGEdgeType::kDirect, predecessor, target_addr)); 44 | } 45 | 46 | void CFGNode::addImmediatePredecessor 47 | (CFGNode *predecessor, addr_t target_addr) { 48 | m_direct_preds.emplace_back 49 | (CFGEdge(CFGEdgeType::kConditional, predecessor, target_addr)); 50 | } 51 | 52 | void CFGNode::setImmediateSuccessor(CFGNode *successor) { 53 | m_immediate_successor = successor; 54 | } 55 | 56 | void CFGNode::setRemoteSuccessor(CFGNode *successor) { 57 | m_remote_successor = successor; 58 | } 59 | 60 | const MaximalBlock *CFGNode::maximalBlock() const noexcept { 61 | return m_max_block; 62 | } 63 | 64 | MaximalBlock *CFGNode::maximalBlockPtr() const noexcept { 65 | return m_max_block; 66 | } 67 | 68 | const CFGNode *CFGNode::getOverlapNode() const { 69 | return m_overlap_node; 70 | } 71 | 72 | bool CFGNode::isData() const { 73 | return m_type == CFGNodeType::kData; 74 | } 75 | 76 | bool CFGNode::isCode() const { 77 | return m_type == CFGNodeType::kCode; 78 | } 79 | 80 | void CFGNode::setType(const CFGNodeType type) { 81 | m_type = type; 82 | } 83 | 84 | CFGNodeType CFGNode::getType() const { 85 | return m_type; 86 | } 87 | 88 | std::vector CFGNode::getCandidateInstructions() const { 89 | std::vector result; 90 | addr_t current = m_candidate_start_addr; 91 | for (const auto &inst : m_max_block->getInstructions()) { 92 | if (inst.addr() == current) { 93 | result.push_back(&inst); 94 | current += inst.size(); 95 | } 96 | } 97 | return result; 98 | } 99 | 100 | std::vector CFGNode::getCandidateInstructionsSatisfying 101 | (std::function predicate) const { 102 | std::vector result; 103 | addr_t current = m_candidate_start_addr; 104 | for (const auto &inst : m_max_block->getInstructions()) { 105 | if (inst.addr() == current) { 106 | if (predicate(&inst)) { 107 | result.push_back(&inst); 108 | } 109 | current += inst.size(); 110 | } 111 | } 112 | return result; 113 | } 114 | 115 | addr_t CFGNode::getCandidateStartAddr() const noexcept { 116 | return m_candidate_start_addr; 117 | } 118 | 119 | void CFGNode::setCandidateStartAddr(addr_t candidate_start) noexcept { 120 | // a candidate start address should be set to the first instruction that can 121 | // match it. 122 | for (const auto &inst : m_max_block->getInstructions()) { 123 | if (candidate_start <= inst.addr()) { 124 | m_candidate_start_addr = inst.addr(); 125 | break; 126 | } 127 | } 128 | } 129 | 130 | const CFGNode *CFGNode::immediateSuccessor() const { 131 | return m_immediate_successor; 132 | } 133 | 134 | const CFGNode *CFGNode::remoteSuccessor() const { 135 | return m_remote_successor; 136 | } 137 | 138 | const std::vector & 139 | CFGNode::getDirectPredecessors() const noexcept { 140 | return m_direct_preds; 141 | } 142 | 143 | const std::vector & 144 | CFGNode::getIndirectPredecessors() const noexcept { 145 | return m_indirect_preds; 146 | } 147 | 148 | const std::vector &CFGNode::getIndirectSuccessors() const noexcept { 149 | return m_indirect_succs; 150 | } 151 | 152 | void CFGNode::setMaximalBlock(MaximalBlock *maximal_block) noexcept { 153 | m_max_block = maximal_block; 154 | } 155 | 156 | size_t CFGNode::id() const noexcept { 157 | return m_max_block->id(); 158 | } 159 | 160 | CFGNode *CFGNode::getOverlapNodePtr() const noexcept { 161 | return m_overlap_node; 162 | } 163 | 164 | bool CFGNode::hasOverlapWithOtherNode() const noexcept { 165 | return m_overlap_node != nullptr; 166 | } 167 | 168 | bool CFGNode::isCandidateStartAddressSet() const noexcept { 169 | return m_candidate_start_addr != 0; 170 | } 171 | 172 | bool CFGNode::isProcedureEntry() const noexcept { 173 | return m_role_in_procedure == CFGNodeRoleInProcedure::kEntry; 174 | } 175 | 176 | bool CFGNode::isCandidateStartAddressValid 177 | (addr_t candidate_addr) const noexcept { 178 | return candidate_addr <= m_max_block->addrOfLastInst(); 179 | } 180 | 181 | void CFGNode::setToDataAndInvalidatePredecessors() { 182 | m_type = CFGNodeType::kData; 183 | for (auto pred_iter = m_direct_preds.begin(); 184 | pred_iter < m_direct_preds.end(); ++pred_iter) { 185 | if (!(*pred_iter).node()->isData() 186 | && ((*pred_iter).type() == CFGEdgeType::kDirect 187 | || (*pred_iter).type() == CFGEdgeType::kConditional)) { 188 | // printf("Invalidating predecessors of %lu at %lx: pred %lu\n", 189 | // this->id(), 190 | // this->maximalBlock()->addrOfLastInst(), 191 | // (*pred_iter).node()->id()); 192 | (*pred_iter).node()->setToDataAndInvalidatePredecessors(); 193 | } 194 | } 195 | } 196 | 197 | void CFGNode::resetCandidateStartAddress() { 198 | m_candidate_start_addr = 0; 199 | } 200 | 201 | bool CFGNode::operator==(const CFGNode &src) const noexcept { 202 | return this->id() == src.id(); 203 | } 204 | 205 | bool CFGNode::isAssignedToProcedure() const noexcept { 206 | return m_procedure_id != 0; 207 | } 208 | 209 | bool CFGNode::isCall() const noexcept { 210 | return m_is_call; 211 | } 212 | 213 | void CFGNode::setIsCall(bool value) noexcept { 214 | m_is_call = value; 215 | if (!value) { 216 | m_indirect_succs.clear(); 217 | } 218 | } 219 | 220 | bool CFGNode::isAlignedToPredecessor() const noexcept { 221 | return m_node_appendable_by_this != nullptr; 222 | } 223 | 224 | const CFGNode *CFGNode::getAlignedPredecessor() const noexcept { 225 | return m_node_appendable_by_this; 226 | } 227 | 228 | size_t CFGNode::getCountOfCandidateInstructions() const noexcept { 229 | size_t result = 0; 230 | addr_t current = m_candidate_start_addr; 231 | for (const auto &inst : m_max_block->getInstructions()) { 232 | if (inst.addr() == current) { 233 | current += inst.size(); 234 | result++; 235 | } 236 | } 237 | return result; 238 | } 239 | 240 | void CFGNode::setAsReturnNodeFrom(CFGNode &cfg_node) { 241 | m_node_appendable_by_this = &cfg_node; 242 | cfg_node.m_indirect_succs.emplace_back 243 | (CFGEdge(CFGEdgeType::kReturn, 244 | this, 245 | cfg_node.maximalBlock()->endAddr())); 246 | cfg_node.m_is_call = true; 247 | } 248 | 249 | void CFGNode::setAsSwitchCaseFor(CFGNode *cfg_node, const addr_t target_addr) { 250 | // printf("Node: %lu at %lx Target: %lx\n", cfg_node->id(), 251 | // cfg_node->maximalBlock()->endAddr(), target_addr); 252 | m_indirect_preds.emplace_back 253 | (CFGEdge(CFGEdgeType::kSwitchTable, cfg_node, target_addr)); 254 | cfg_node->m_indirect_succs.emplace_back 255 | (CFGEdge(CFGEdgeType::kSwitchTable, this, target_addr)); 256 | } 257 | 258 | bool CFGNode::hasPredecessors() const noexcept { 259 | return !(m_node_appendable_by_this == nullptr 260 | && m_indirect_preds.size() == 0 261 | && m_direct_preds.size() == 0); 262 | } 263 | 264 | bool CFGNode::isSwitchStatement() const noexcept { 265 | return m_indirect_succs.size() > 1; 266 | } 267 | 268 | bool CFGNode::isSwitchBranchTarget() const noexcept { 269 | for (const auto &cfg_edge : m_indirect_preds) { 270 | if (cfg_edge.type() == CFGEdgeType::kSwitchTable) 271 | return true; 272 | } 273 | return false; 274 | } 275 | 276 | addr_t CFGNode::getMinTargetAddrOfValidPredecessor() const noexcept { 277 | addr_t minimum_addr = UINT64_MAX; 278 | for (const auto &pred : m_indirect_preds) { 279 | if (pred.targetAddr() < minimum_addr) { 280 | minimum_addr = pred.targetAddr(); 281 | } 282 | } 283 | if (minimum_addr != UINT64_MAX) { 284 | return minimum_addr; 285 | } 286 | for (const auto &pred : m_direct_preds) { 287 | if (pred.targetAddr() < minimum_addr 288 | && pred.node()->getType() != CFGNodeType::kData 289 | && pred.type() != CFGEdgeType::kConditional 290 | && pred.node()->id() != this->id()) { 291 | minimum_addr = pred.targetAddr(); 292 | } 293 | } 294 | if (minimum_addr == UINT64_MAX) { 295 | return 0; 296 | } 297 | return minimum_addr; 298 | } 299 | 300 | bool CFGNode::isImmediateSuccessorSet() const noexcept { 301 | return m_immediate_successor != nullptr; 302 | } 303 | 304 | bool CFGNode::isAppendableBy(const CFGNode *cfg_node) const { 305 | return m_max_block->endAddr() == 306 | cfg_node->maximalBlock()->addrOfFirstInst(); 307 | } 308 | 309 | CFGNode *CFGNode::getReturnSuccessorNode() const noexcept { 310 | if (m_indirect_succs.size() == 1 311 | && m_indirect_succs[0].type() == CFGEdgeType::kReturn) { 312 | return m_indirect_succs[0].node(); 313 | } 314 | return nullptr; 315 | } 316 | 317 | CFGNodeRoleInProcedure CFGNode::roleInProcedure() const noexcept { 318 | return m_role_in_procedure; 319 | } 320 | 321 | addr_t CFGNode::procedure_id() const noexcept { 322 | return m_procedure_id; 323 | } 324 | } 325 | -------------------------------------------------------------------------------- /src/binutils/dwarf/value.cc: -------------------------------------------------------------------------------- 1 | #include "internal.hh" 2 | 3 | #include 4 | 5 | using namespace std; 6 | 7 | DWARFPP_BEGIN_NAMESPACE 8 | 9 | value::value(const unit *cu, 10 | DW_AT name, DW_FORM form, type typ, section_offset offset) 11 | : cu(cu), form(form), typ(typ), offset(offset) { 12 | if (form == DW_FORM::indirect) 13 | resolve_indirect(name); 14 | } 15 | 16 | section_offset 17 | value::get_section_offset() const 18 | { 19 | return cu->get_section_offset() + offset; 20 | } 21 | 22 | taddr 23 | value::as_address() const 24 | { 25 | if (form != DW_FORM::addr) 26 | throw value_type_mismatch("cannot read " + to_string(typ) + " as address"); 27 | 28 | cursor cur(cu->data(), offset); 29 | return cur.address(); 30 | } 31 | 32 | const void * 33 | value::as_block(size_t *size_out) const 34 | { 35 | // XXX Blocks can contain all sorts of things, including 36 | // references, which couldn't be resolved by callers in the 37 | // current minimal API. 38 | cursor cur(cu->data(), offset); 39 | switch (form) { 40 | case DW_FORM::block1: 41 | *size_out = cur.fixed(); 42 | break; 43 | case DW_FORM::block2: 44 | *size_out = cur.fixed(); 45 | break; 46 | case DW_FORM::block4: 47 | *size_out = cur.fixed(); 48 | break; 49 | case DW_FORM::block: 50 | case DW_FORM::exprloc: 51 | *size_out = cur.uleb128(); 52 | break; 53 | default: 54 | throw value_type_mismatch("cannot read " + to_string(typ) + " as block"); 55 | } 56 | cur.ensure(*size_out); 57 | return cur.pos; 58 | } 59 | 60 | uint64_t 61 | value::as_uconstant() const 62 | { 63 | cursor cur(cu->data(), offset); 64 | switch (form) { 65 | case DW_FORM::data1: 66 | return cur.fixed(); 67 | case DW_FORM::data2: 68 | return cur.fixed(); 69 | case DW_FORM::data4: 70 | return cur.fixed(); 71 | case DW_FORM::data8: 72 | return cur.fixed(); 73 | case DW_FORM::udata: 74 | return cur.uleb128(); 75 | default: 76 | throw value_type_mismatch("cannot read " + to_string(typ) + " as uconstant"); 77 | } 78 | } 79 | 80 | int64_t 81 | value::as_sconstant() const 82 | { 83 | cursor cur(cu->data(), offset); 84 | switch (form) { 85 | case DW_FORM::data1: 86 | return cur.fixed(); 87 | case DW_FORM::data2: 88 | return cur.fixed(); 89 | case DW_FORM::data4: 90 | return cur.fixed(); 91 | case DW_FORM::data8: 92 | return cur.fixed(); 93 | case DW_FORM::sdata: 94 | return cur.sleb128(); 95 | default: 96 | throw value_type_mismatch("cannot read " + to_string(typ) + " as sconstant"); 97 | } 98 | } 99 | 100 | expr 101 | value::as_exprloc() const 102 | { 103 | cursor cur(cu->data(), offset); 104 | size_t size; 105 | // Prior to DWARF 4, exprlocs were encoded as blocks. 106 | switch (form) { 107 | case DW_FORM::exprloc: 108 | case DW_FORM::block: 109 | size = cur.uleb128(); 110 | break; 111 | case DW_FORM::block1: 112 | size = cur.fixed(); 113 | break; 114 | case DW_FORM::block2: 115 | size = cur.fixed(); 116 | break; 117 | case DW_FORM::block4: 118 | size = cur.fixed(); 119 | break; 120 | default: 121 | throw value_type_mismatch("cannot read " + to_string(typ) + " as exprloc"); 122 | } 123 | return expr(cu, cur.get_section_offset(), size); 124 | } 125 | 126 | bool 127 | value::as_flag() const 128 | { 129 | switch (form) { 130 | case DW_FORM::flag: { 131 | cursor cur(cu->data(), offset); 132 | return cur.fixed() != 0; 133 | } 134 | case DW_FORM::flag_present: 135 | return true; 136 | default: 137 | throw value_type_mismatch("cannot read " + to_string(typ) + " as flag"); 138 | } 139 | } 140 | 141 | rangelist 142 | value::as_rangelist() const 143 | { 144 | section_offset off = as_sec_offset(); 145 | 146 | // The compilation unit may not have a base address. In this 147 | // case, the first entry in the range list must be a base 148 | // address entry, but we'll just assume 0 for the initial base 149 | // address. 150 | die cudie = cu->root(); 151 | taddr cu_low_pc = cudie.has(DW_AT::low_pc) ? at_low_pc(cudie) : 0; 152 | auto sec = cu->get_dwarf().get_section(section_type::ranges); 153 | auto cusec = cu->data(); 154 | return rangelist(sec, off, cusec->addr_size, cu_low_pc); 155 | } 156 | 157 | die 158 | value::as_reference() const 159 | { 160 | section_offset off; 161 | // XXX Would be nice if we could avoid this. The cursor is 162 | // all overhead here. 163 | cursor cur(cu->data(), offset); 164 | switch (form) { 165 | case DW_FORM::ref1: 166 | off = cur.fixed(); 167 | break; 168 | case DW_FORM::ref2: 169 | off = cur.fixed(); 170 | break; 171 | case DW_FORM::ref4: 172 | off = cur.fixed(); 173 | break; 174 | case DW_FORM::ref8: 175 | off = cur.fixed(); 176 | break; 177 | case DW_FORM::ref_udata: 178 | off = cur.uleb128(); 179 | break; 180 | 181 | case DW_FORM::ref_addr: { 182 | off = cur.offset(); 183 | // These seem to be extremely rare in practice (I 184 | // haven't been able to get gcc to produce a 185 | // ref_addr), so it's not worth caching this lookup. 186 | const compilation_unit *base_cu = nullptr; 187 | for (auto &file_cu : cu->get_dwarf().compilation_units()) { 188 | if (file_cu.get_section_offset() > off) 189 | break; 190 | base_cu = &file_cu; 191 | } 192 | die d(base_cu); 193 | d.read(off - base_cu->get_section_offset()); 194 | return d; 195 | } 196 | 197 | case DW_FORM::ref_sig8: { 198 | uint64_t sig = cur.fixed(); 199 | try { 200 | return cu->get_dwarf().get_type_unit(sig).type(); 201 | } catch (std::out_of_range &e) { 202 | throw format_error("unknown type signature 0x" + to_hex(sig)); 203 | } 204 | } 205 | 206 | default: 207 | throw value_type_mismatch("cannot read " + to_string(typ) + " as reference"); 208 | } 209 | 210 | die d(cu); 211 | d.read(off); 212 | return d; 213 | } 214 | 215 | void 216 | value::as_string(string &buf) const 217 | { 218 | size_t size; 219 | const char *p = as_cstr(&size); 220 | buf.resize(size); 221 | memmove(&buf.front(), p, size); 222 | } 223 | 224 | string 225 | value::as_string() const 226 | { 227 | size_t size; 228 | const char *s = as_cstr(&size); 229 | return string(s, size); 230 | } 231 | 232 | const char * 233 | value::as_cstr(size_t *size_out) const 234 | { 235 | cursor cur(cu->data(), offset); 236 | switch (form) { 237 | case DW_FORM::string: 238 | return cur.cstr(size_out); 239 | case DW_FORM::strp: { 240 | section_offset off = cur.offset(); 241 | cursor scur(cu->get_dwarf().get_section(section_type::str), off); 242 | return scur.cstr(size_out); 243 | } 244 | default: 245 | throw value_type_mismatch("cannot read " + to_string(typ) + " as string"); 246 | } 247 | } 248 | 249 | section_offset 250 | value::as_sec_offset() const 251 | { 252 | // Prior to DWARF 4, sec_offsets were encoded as data4 or 253 | // data8. 254 | cursor cur(cu->data(), offset); 255 | switch (form) { 256 | case DW_FORM::data4: 257 | return cur.fixed(); 258 | case DW_FORM::data8: 259 | return cur.fixed(); 260 | case DW_FORM::sec_offset: 261 | return cur.offset(); 262 | default: 263 | throw value_type_mismatch("cannot read " + to_string(typ) + " as sec_offset"); 264 | } 265 | } 266 | 267 | void 268 | value::resolve_indirect(DW_AT name) 269 | { 270 | if (form != DW_FORM::indirect) 271 | return; 272 | 273 | cursor c(cu->data(), offset); 274 | DW_FORM form; 275 | do { 276 | form = (DW_FORM)c.uleb128(); 277 | } while (form == DW_FORM::indirect); 278 | typ = attribute_spec(name, form).type; 279 | offset = c.get_section_offset(); 280 | } 281 | 282 | string 283 | to_string(const value &v) 284 | { 285 | switch (v.get_type()) { 286 | case value::type::invalid: 287 | return ""; 288 | case value::type::address: 289 | return "0x" + to_hex(v.as_address()); 290 | case value::type::block: { 291 | size_t size; 292 | const char *b = (const char*)v.as_block(&size); 293 | string res = ::to_string(size) + " byte block:"; 294 | for (size_t pos = 0; pos < size; ++pos) { 295 | res += ' '; 296 | res += to_hex(b[pos]); 297 | } 298 | return res; 299 | } 300 | case value::type::constant: 301 | return "0x" + to_hex(v.as_uconstant()); 302 | case value::type::uconstant: 303 | return ::to_string(v.as_uconstant()); 304 | case value::type::sconstant: 305 | return ::to_string(v.as_sconstant()); 306 | case value::type::exprloc: 307 | // XXX 308 | return ""; 309 | case value::type::flag: 310 | return v.as_flag() ? "true" : "false"; 311 | case value::type::line: 312 | return ""; 313 | case value::type::loclist: 314 | return ""; 315 | case value::type::mac: 316 | return ""; 317 | case value::type::rangelist: 318 | return ""; 319 | case value::type::reference: { 320 | die d = v.as_reference(); 321 | auto tu = dynamic_cast(&d.get_unit()); 322 | if (tu) 323 | return "<.debug_types+0x" + to_hex(d.get_section_offset()) + ">"; 324 | return "<0x" + to_hex(d.get_section_offset()) + ">"; 325 | } 326 | case value::type::string: 327 | return v.as_string(); 328 | } 329 | return ""; 330 | } 331 | 332 | DWARFPP_END_NAMESPACE 333 | -------------------------------------------------------------------------------- /src/binutils/dwarf/dwarf.cc: -------------------------------------------------------------------------------- 1 | #include "internal.hh" 2 | 3 | using namespace std; 4 | 5 | DWARFPP_BEGIN_NAMESPACE 6 | 7 | ////////////////////////////////////////////////////////////////// 8 | // class dwarf 9 | // 10 | 11 | struct dwarf::impl 12 | { 13 | impl(const std::shared_ptr &l) 14 | : l(l), have_type_units(false) { } 15 | 16 | std::shared_ptr l; 17 | 18 | std::shared_ptr

sec_info; 19 | std::shared_ptr

sec_abbrev; 20 | 21 | std::vector compilation_units; 22 | 23 | std::unordered_map type_units; 24 | bool have_type_units; 25 | 26 | std::map > sections; 27 | }; 28 | 29 | dwarf::dwarf(const std::shared_ptr &l) 30 | : m(make_shared(l)) 31 | { 32 | const void *data; 33 | size_t size; 34 | 35 | // Get required sections 36 | data = l->load(section_type::info, &size); 37 | if (!data) 38 | throw format_error("required .debug_info section missing"); 39 | m->sec_info = make_shared

(section_type::abbrev, data, size); 45 | 46 | // Get compilation units. Everything derives from these, so 47 | // there's no point in doing it lazily. 48 | cursor infocur(m->sec_info); 49 | while (!infocur.end()) { 50 | // XXX Circular reference. Given that we now require 51 | // the dwarf object to stick around for DIEs, maybe we 52 | // might as well require that for units, too. 53 | m->compilation_units.emplace_back( 54 | *this, infocur.get_section_offset()); 55 | infocur.subsection(); 56 | } 57 | } 58 | 59 | dwarf::~dwarf() 60 | { 61 | } 62 | 63 | const std::vector & 64 | dwarf::compilation_units() const 65 | { 66 | static std::vector empty; 67 | if (!m) 68 | return empty; 69 | return m->compilation_units; 70 | } 71 | 72 | const type_unit & 73 | dwarf::get_type_unit(uint64_t type_signature) const 74 | { 75 | if (!m->have_type_units) { 76 | cursor tucur(get_section(section_type::types)); 77 | while (!tucur.end()) { 78 | // XXX Circular reference 79 | type_unit tu(*this, tucur.get_section_offset()); 80 | m->type_units[tu.get_type_signature()] = tu; 81 | tucur.subsection(); 82 | } 83 | m->have_type_units = true; 84 | } 85 | if (!m->type_units.count(type_signature)) 86 | throw out_of_range("type signature 0x" + to_hex(type_signature)); 87 | return m->type_units[type_signature]; 88 | } 89 | 90 | std::shared_ptr

91 | dwarf::get_section(section_type type) const 92 | { 93 | if (type == section_type::info) 94 | return m->sec_info; 95 | if (type == section_type::abbrev) 96 | return m->sec_abbrev; 97 | 98 | auto it = m->sections.find(type); 99 | if (it != m->sections.end()) 100 | return it->second; 101 | 102 | size_t size; 103 | const void *data = m->l->load(type, &size); 104 | if (!data) 105 | throw format_error(std::string(elf::section_type_to_name(type)) 106 | + " section missing"); 107 | m->sections[type] = std::make_shared

(section_type::str, data, size); 108 | return m->sections[type]; 109 | } 110 | 111 | ////////////////////////////////////////////////////////////////// 112 | // class unit 113 | // 114 | 115 | /** 116 | * Implementation of a unit. 117 | */ 118 | struct unit::impl 119 | { 120 | const dwarf file; 121 | const section_offset offset; 122 | const std::shared_ptr

&subsec, 145 | section_offset debug_abbrev_offset, section_offset root_offset, 146 | uint64_t type_signature = 0, section_offset type_offset = 0) 147 | : file(file), offset(offset), subsec(subsec), 148 | debug_abbrev_offset(debug_abbrev_offset), 149 | root_offset(root_offset), type_signature(type_signature), 150 | type_offset(type_offset), have_abbrevs(false) { } 151 | 152 | void force_abbrevs(); 153 | }; 154 | 155 | unit::~unit() 156 | { 157 | } 158 | 159 | const dwarf & 160 | unit::get_dwarf() const 161 | { 162 | return m->file; 163 | } 164 | 165 | section_offset 166 | unit::get_section_offset() const 167 | { 168 | return m->offset; 169 | } 170 | 171 | const die& 172 | unit::root() const 173 | { 174 | if (!m->root.valid()) { 175 | m->force_abbrevs(); 176 | m->root = die(this); 177 | m->root.read(m->root_offset); 178 | } 179 | return m->root; 180 | } 181 | 182 | const std::shared_ptr

& 183 | unit::data() const 184 | { 185 | return m->subsec; 186 | } 187 | 188 | const abbrev_entry & 189 | unit::get_abbrev(abbrev_code acode) const 190 | { 191 | if (!m->have_abbrevs) 192 | m->force_abbrevs(); 193 | 194 | if (!m->abbrevs_vec.empty()) { 195 | if (acode >= m->abbrevs_vec.size()) 196 | goto unknown; 197 | const abbrev_entry &entry = m->abbrevs_vec[acode]; 198 | if (entry.code == 0) 199 | goto unknown; 200 | return entry; 201 | } else { 202 | auto it = m->abbrevs_map.find(acode); 203 | if (it == m->abbrevs_map.end()) 204 | goto unknown; 205 | return it->second; 206 | } 207 | 208 | unknown: 209 | throw format_error("unknown abbrev code 0x" + to_hex(acode)); 210 | } 211 | 212 | void 213 | unit::impl::force_abbrevs() 214 | { 215 | // XXX Compilation units can share abbrevs. Parse each table 216 | // at most once. 217 | if (have_abbrevs) 218 | return; 219 | 220 | // Section 7.5.3 221 | cursor c(file.get_section(section_type::abbrev), 222 | debug_abbrev_offset); 223 | abbrev_entry entry; 224 | abbrev_code highest = 0; 225 | while (entry.read(&c)) { 226 | abbrevs_map[entry.code] = entry; 227 | if (entry.code > highest) 228 | highest = entry.code; 229 | } 230 | 231 | // Typically, abbrev codes are assigned linearly, so it's more 232 | // space efficient and time efficient to store the table in a 233 | // vector. Convert to a vector if it's dense enough, by some 234 | // rough estimate of "enough". 235 | if (highest * 10 < abbrevs_map.size() * 15) { 236 | // Move the map into the vector 237 | abbrevs_vec.resize(highest + 1); 238 | for (auto &entry : abbrevs_map) 239 | abbrevs_vec[entry.first] = move(entry.second); 240 | abbrevs_map.clear(); 241 | } 242 | 243 | have_abbrevs = true; 244 | } 245 | 246 | ////////////////////////////////////////////////////////////////// 247 | // class compilation_unit 248 | // 249 | 250 | compilation_unit::compilation_unit(const dwarf &file, section_offset offset) 251 | { 252 | // Read the CU header (DWARF4 section 7.5.1.1) 253 | cursor cur(file.get_section(section_type::info), offset); 254 | std::shared_ptr

subsec = cur.subsection(); 255 | cursor sub(subsec); 256 | sub.skip_initial_length(); 257 | uhalf version = sub.fixed(); 258 | if (version < 2 || version > 4) 259 | throw format_error("unknown compilation unit version " + std::to_string(version)); 260 | // .debug_abbrev-relative offset of this unit's abbrevs 261 | section_offset debug_abbrev_offset = sub.offset(); 262 | ubyte address_size = sub.fixed(); 263 | subsec->addr_size = address_size; 264 | 265 | m = make_shared(file, offset, subsec, debug_abbrev_offset, 266 | sub.get_section_offset()); 267 | } 268 | 269 | const line_table & 270 | compilation_unit::get_line_table() const 271 | { 272 | if (!m->lt.valid()) { 273 | const die &d = root(); 274 | if (!d.has(DW_AT::stmt_list) || !d.has(DW_AT::name) || 275 | !d.has(DW_AT::comp_dir)) 276 | goto done; 277 | 278 | shared_ptr

sec; 279 | try { 280 | sec = m->file.get_section(section_type::line); 281 | } catch (format_error &e) { 282 | goto done; 283 | } 284 | 285 | m->lt = line_table(sec, d[DW_AT::stmt_list].as_sec_offset(), 286 | m->subsec->addr_size, at_comp_dir(d), 287 | at_name(d)); 288 | } 289 | done: 290 | return m->lt; 291 | } 292 | 293 | ////////////////////////////////////////////////////////////////// 294 | // class type_unit 295 | // 296 | 297 | type_unit::type_unit(const dwarf &file, section_offset offset) 298 | { 299 | // Read the type unit header (DWARF4 section 7.5.1.2) 300 | cursor cur(file.get_section(section_type::types), offset); 301 | std::shared_ptr

subsec = cur.subsection(); 302 | cursor sub(subsec); 303 | sub.skip_initial_length(); 304 | uhalf version = sub.fixed(); 305 | if (version != 4) 306 | throw format_error("unknown type unit version " + std::to_string(version)); 307 | // .debug_abbrev-relative offset of this unit's abbrevs 308 | section_offset debug_abbrev_offset = sub.offset(); 309 | ubyte address_size = sub.fixed(); 310 | subsec->addr_size = address_size; 311 | uint64_t type_signature = sub.fixed(); 312 | section_offset type_offset = sub.offset(); 313 | 314 | m = make_shared(file, offset, subsec, debug_abbrev_offset, 315 | sub.get_section_offset(), type_signature, 316 | type_offset); 317 | } 318 | 319 | uint64_t 320 | type_unit::get_type_signature() const 321 | { 322 | return m->type_signature; 323 | } 324 | 325 | const die & 326 | type_unit::type() const 327 | { 328 | if (!m->type.valid()) { 329 | m->force_abbrevs(); 330 | m->type = die(this); 331 | m->type.read(m->type_offset); 332 | } 333 | return m->type; 334 | } 335 | 336 | DWARFPP_END_NAMESPACE 337 | --------------------------------------------------------------------------------