├── setup.cfg ├── tstbs.py ├── LICENSE ├── idaunpack.py ├── test_idblib.py ├── README.md ├── tree-walking.py ├── idbtool.py └── idblib.py /setup.cfg: -------------------------------------------------------------------------------- 1 | [flake8] 2 | ignore = E402,E501,E731 3 | 4 | -------------------------------------------------------------------------------- /tstbs.py: -------------------------------------------------------------------------------- 1 | def binary_search(a, k): 2 | # c++: a.upperbound(k)-- 3 | first, last = 0, len(a) 4 | while first>1 6 | if k < a[mid]: 7 | last = mid 8 | else: 9 | first = mid+1 10 | return first-1 11 | for x in range(8): 12 | print(x, binary_search([2,3,5,6], x)) 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Willem Hengeveld 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /idaunpack.py: -------------------------------------------------------------------------------- 1 | """ 2 | `idaunpack` is a tool to aid in decoding packed data structures from an 3 | IDA idb or i64 database. 4 | """ 5 | from __future__ import print_function, division 6 | import struct 7 | import re 8 | import sys 9 | from binascii import a2b_hex, b2a_hex 10 | from idblib import IdaUnpacker 11 | 12 | def dump_packed(data, wordsize, pattern): 13 | p = IdaUnpacker(wordsize, data) 14 | if pattern: 15 | for c in pattern: 16 | if p.eof(): 17 | print("EOF") 18 | break 19 | if c == 'H': 20 | val = p.next16() 21 | fmt = "%04x" 22 | elif c == 'L': 23 | val = p.next32() 24 | fmt = "%08x" 25 | elif c == 'Q': 26 | val = p.next64() 27 | fmt = "%016x" 28 | elif c == 'W': 29 | val = p.nextword() 30 | if wordsize==4: 31 | fmt = "[%08x]" 32 | else: 33 | fmt = "[%016x]" 34 | else: 35 | raise Exception("unknown pattern: %s" % c) 36 | print(fmt % val, end=" ") 37 | 38 | while not p.eof(): 39 | val = p.next32() 40 | print("%08x" % val, end=" ") 41 | 42 | print() 43 | 44 | def unhex(hextxt): 45 | return a2b_hex(re.sub(r'\W+', '', hextxt, flags=re.DOTALL)) 46 | 47 | def main(): 48 | import argparse 49 | parser = argparse.ArgumentParser(description='idaunpack') 50 | parser.add_argument('--verbose', '-v', action='store_true') 51 | parser.add_argument('--debug', action='store_true', help='abort on exceptions.') 52 | parser.add_argument('--pattern', '-p', type=str, help='unpack pattern: sequence of H, L, Q, W') 53 | parser.add_argument('-4', '-3', '-32', const=4, dest='wordsize', action='store_const', help='use 32 bit words') 54 | parser.add_argument('-8', '-6', '-64', const=8, dest='wordsize', action='store_const', help='use 64 bit words') 55 | parser.add_argument('--wordsize', '-w', type=int, help='specify wordsize') 56 | parser.add_argument('hexconsts', nargs='*', type=str) 57 | 58 | args = parser.parse_args() 59 | if args.wordsize is None: 60 | args.wordsize = 4 61 | 62 | for x in args.hexconsts: 63 | dump_packed(unhex(x), args.wordsize, args.pattern) 64 | 65 | if __name__ == '__main__': 66 | main() 67 | -------------------------------------------------------------------------------- /test_idblib.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | from idblib import FileSection, binary_search, makeStringIO 3 | 4 | 5 | class TestFileSection(unittest.TestCase): 6 | """ unittest for FileSection object """ 7 | def test_file(self): 8 | s = makeStringIO(b"0123456789abcdef") 9 | fh = FileSection(s, 3, 11) 10 | self.assertEqual(fh.read(3), b"345") 11 | self.assertEqual(fh.read(8), b"6789a") 12 | self.assertEqual(fh.read(8), b"") 13 | 14 | fh.seek(-1, 2) 15 | self.assertEqual(fh.read(8), b"a") 16 | fh.seek(3) 17 | self.assertEqual(fh.read(2), b"67") 18 | fh.seek(-2, 1) 19 | self.assertEqual(fh.read(2), b"67") 20 | fh.seek(2, 1) 21 | self.assertEqual(fh.read(2), b"a") 22 | 23 | fh.seek(8) 24 | self.assertEqual(fh.read(1), b"") 25 | with self.assertRaises(Exception): 26 | fh.seek(9) 27 | 28 | 29 | class TestBinarySearch(unittest.TestCase): 30 | """ unittests for binary_search """ 31 | class Object: 32 | def __init__(self, num): 33 | self.key = num 34 | 35 | def __repr__(self): 36 | return "o(%d)" % self.num 37 | 38 | def test_bs(self): 39 | obj = self.Object 40 | lst = [obj(_) for _ in (2, 3, 5, 6)] 41 | self.assertEqual(binary_search(lst, 1), -1) 42 | self.assertEqual(binary_search(lst, 2), 0) 43 | self.assertEqual(binary_search(lst, 3), 1) 44 | self.assertEqual(binary_search(lst, 4), 1) 45 | self.assertEqual(binary_search(lst, 5), 2) 46 | self.assertEqual(binary_search(lst, 6), 3) 47 | self.assertEqual(binary_search(lst, 7), 3) 48 | 49 | def test_emptylist(self): 50 | obj = self.Object 51 | lst = [] 52 | self.assertEqual(binary_search(lst, 1), -1) 53 | 54 | def test_oneelem(self): 55 | obj = self.Object 56 | lst = [obj(1)] 57 | self.assertEqual(binary_search(lst, 0), -1) 58 | self.assertEqual(binary_search(lst, 1), 0) 59 | self.assertEqual(binary_search(lst, 2), 0) 60 | 61 | def test_twoelem(self): 62 | obj = self.Object 63 | lst = [obj(1), obj(3)] 64 | self.assertEqual(binary_search(lst, 0), -1) 65 | self.assertEqual(binary_search(lst, 1), 0) 66 | self.assertEqual(binary_search(lst, 2), 0) 67 | self.assertEqual(binary_search(lst, 3), 1) 68 | self.assertEqual(binary_search(lst, 4), 1) 69 | 70 | def test_listsize(self): 71 | obj = self.Object 72 | for l in range(3, 32): 73 | lst = [obj(_ + 1) for _ in range(l)] 74 | lst = lst[:1] + lst[2:] 75 | self.assertEqual(binary_search(lst, 0), -1) 76 | self.assertEqual(binary_search(lst, 1), 0) 77 | self.assertEqual(binary_search(lst, 2), 0) 78 | self.assertEqual(binary_search(lst, 3), 1) 79 | self.assertEqual(binary_search(lst, l - 1), l - 3) 80 | self.assertEqual(binary_search(lst, l), l - 2) 81 | self.assertEqual(binary_search(lst, l + 1), l - 2) 82 | self.assertEqual(binary_search(lst, l + 2), l - 2) 83 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | IDBTOOL 2 | ======= 3 | 4 | A tool for extracting information from IDA databases. 5 | `idbtool` knows how to handle databases from all IDA versions since v2.0, both `i64` and `idb` files. 6 | You can also use `idbtool` to recover information from unclosed databases. 7 | 8 | `idbtool` works without change with IDA v7.0. 9 | 10 | 11 | Much faster than loading a file in IDA 12 | -------------------------------------- 13 | 14 | With idbtool you can search thousands of .idb files in seconds. 15 | 16 | More precisely: on my laptop it takes: 17 | 18 | * 1.5 seconds to extract 143 idc scripts from 119 idb and i64 files. 19 | * 3.8 seconds to print idb info for 441 files. 20 | * 5.6 seconds to extract 281 enums containing 4726 members from 35 files. 21 | * 67.8 seconds to extract 5942 structs containing 33672 members from 265 files. 22 | 23 | Loading a approximately 5 Gbyte idb file in IDA, takes about 45 minutes. 24 | While idb3.h takes basically no time at all, no more than a few milliseconds. 25 | 26 | 27 | 28 | Download 29 | ======== 30 | 31 | Two versions of this tool exist: 32 | 33 | One written in python 34 | * https://github.com/nlitsme/pyidbutil 35 | 36 | One written in C++ 37 | * https://github.com/nlitsme/idbutil 38 | 39 | Both repositories contain a library which can be used for reading `.idb` or `.i64` files. 40 | 41 | 42 | Usage 43 | ===== 44 | 45 | Usage: 46 | 47 | idbtool [options] [database file(s)] 48 | 49 | * `-n` or `--names` will list all named values in the database. 50 | * `-s` or `--scripts` will list all scripts stored in the database. 51 | * `-u` or `--structs` will list all structs stored in the database. 52 | * `-e` or `--enums` will list all enums stored in the database. 53 | * `--imports` will list all imported symbols from the database. 54 | * `--funcdirs` will list function folders stored in the database. 55 | * `-i` or `--info` will print some general info about the database. 56 | * `-d` or `--pagedump` dump btree page tree contents. 57 | * `--inc`, `--dec` list all records in ascending / descending order. 58 | * `-q` or `--query` search specific records in the database. 59 | * `-m` or `--limit` limit the number of results returned by `-q`. 60 | * `-id0`, `-id1` dump only one specific section. 61 | * `--i64`, `--i32` tell idbtool that the specified file is from a 64 or 32 bit database. 62 | * `--recover` group files from an unpacked database. 63 | * `--classify` summarizes node usage in the database 64 | * `--dump` hexdump the original binary data 65 | 66 | query 67 | ----- 68 | 69 | Queries need to be specified last on the commandline. 70 | 71 | example: 72 | 73 | idbtool [database file(s)] --query "Root Node;V" 74 | 75 | Will list the source binary for all the databases specified on the commandline. 76 | 77 | A query is a string with the following format: 78 | 79 | * [==,<=,>=,<,>] - optional relation, default: == 80 | * a base node key: 81 | * a DOT followed by the numeric value of the nodeid. 82 | * a HASH followed by the numeric value of the system-nodeid. 83 | * a QUESTION followed by the name of the node. -> a 'N'ame node 84 | * the name of the node. -> the name is resolved, results in a '.'Dot node 85 | * an optional tag ( A for Alt, S for Supval, etc ) 86 | * an optional index value 87 | 88 | example queries: 89 | * `Root Node;V` -> prints record containing the source binary name 90 | * `?Root Node` -> prints the Name record pointing to the root 91 | * `>Root Node` -> prints the first 10 records starting with the root node id. 92 | * ` prints the 10 records startng with the recordsbefore the rootnode. 93 | * `.0xff000001;N` -> prints the rootnode name entry. 94 | * `#1;N` -> prints the rootnode name entry. 95 | 96 | List the highest node and following record in the database in two different ways, 97 | the first: starting at the first record below `ffc00000`, and listing the next. 98 | The second: starting at the first record after `ffc00000`, and listing the previous: 99 | * `--query "<#0xc00000" --limit 2 --inc -v` 100 | * `--query ">#0xc00000" --limit 2 --dec -v` 101 | 102 | Note that this should be the nodeid in the `$ MAX NODE` record. 103 | 104 | List the last two records: 105 | * `--limit 2 --dec -v` 106 | 107 | List the first two records, the `$ MAX LINK` and `$ MAX NODE` records: 108 | * `--limit 2 --inc -v` 109 | 110 | 111 | A full database dump 112 | -------------------- 113 | 114 | Several methods exist for printing all records in the database. This may be useful if 115 | you want to investigate more of IDA''s internals. But can also be useful in recovering 116 | data from corrupted databases. 117 | 118 | * `--inc`, `--dec` can be used to enumerate all b-tree records in either forward, or backward direction. 119 | * add `-v` to get a prettier key/value output 120 | * `--id0` walks the page tree, instead of the record tree, printing the contents of each page 121 | * `--pagedump` linearly skip through the file, this will also reveal information in deleted pages. 122 | 123 | naked files 124 | =========== 125 | 126 | When IDA or your computer crashed while working on a disassembly, and you did not yet save the database, 127 | you are left with a couple of files with extensions like `.id0`, `.id1`, `.nam`, etc. 128 | 129 | These files are the unpacked database, i call them `naked` files. 130 | 131 | Using the `--filetype` and `--i64` or `--i32` options you can inspect these `naked` files individually. 132 | or use the `--recover` option to view them as a complete database together. 133 | `idbtool` will figure out automatically which files would belong together. 134 | 135 | `idbtool` can figure out the bitsize of the database from an `.id0` file, but not(yet) from the others. 136 | 137 | 138 | LIBRARY 139 | ======= 140 | 141 | The file `idblib.py` contains a library. 142 | 143 | 144 | TODO 145 | ==== 146 | 147 | * add option to list all comments stored in the database 148 | * add option to list flags for a list of addresses. 149 | 150 | Author 151 | ====== 152 | 153 | Willem Hengeveld 154 | 155 | -------------------------------------------------------------------------------- /tree-walking.py: -------------------------------------------------------------------------------- 1 | """ 2 | Copyright (c) 2016 Willem Hengeveld 3 | 4 | Experiment in btree walking 5 | 6 | 7 | *-------->[00] 8 | *------>[02]---+ [01] 9 | root ->[08]---+ [05]-+ | 10 | [17]-+ | | +--->[03] 11 | | | | [04] 12 | | | | 13 | | | +----->[06] 14 | | | [07] 15 | | | 16 | | | *-------->[09] 17 | | +->[11]---+ [10] 18 | | [14]-+ | 19 | | | +--->[12] 20 | | | [13] 21 | | | 22 | | +----->[15] 23 | | [16] 24 | | 25 | | *-------->[18] 26 | +--->[20]---+ [19] 27 | [23]-+ | 28 | | +--->[21] 29 | | [22] 30 | | 31 | +----->[24] 32 | [25] 33 | 34 | 35 | decrement from 08 : ix-- -> getpage, ix=len-1 -> getpage -> ix=len-1 36 | decrement from 17 : ix-- -> getpage, ix=len-1 -> getpage -> ix=len-1 37 | decrement from 02 : ix-- -> getpage, ix=len-1 38 | decrement from 05 : ix-- -> getpage, ix=len-1 39 | 40 | decrement from 01 : ix-- -> ix>=0 -> use key at ix 41 | decrement from 03 : ix-- -> <0 -> pop -> ix>=0 -> use key at ix 42 | decrement from 09 : ix-- -> <0 -> pop -> ix<0 -> pop -> ix>=0 -> use key at ix 43 | 44 | increment from 09 : ix++ 45 | increment from 10 : ix++ -> ix==len(index) -> pop: ix==-1 -> ix++ -> ix==0 -> use 46 | increment from 11 : recurse, ix=0 -> use 47 | increment from 08 : recurse, ix=-1 -> recurse, ix=0 -> use 48 | increment from 07 : ix++ -> ix==len(index) -> pop, ix++ -> ix==len -> pop -> ix++ -> ix==0 -> use 49 | """ 50 | from __future__ import division, print_function, absolute_import, unicode_literals 51 | 52 | # shape of the tree 53 | # a <2,2> tree is basically like the tree pictured in the ascii art above. 54 | TREEDEPTH = 2 55 | NODEWIDTH = 2 56 | 57 | 58 | def binary_search(a, k): 59 | # c++: a.upperbound(k)-- 60 | first, last = 0, len(a) 61 | while first < last: 62 | mid = (first + last) >> 1 63 | if k < a[mid].key: 64 | last = mid 65 | else: 66 | first = mid + 1 67 | return first - 1 68 | 69 | 70 | class Entry(object): 71 | """ 72 | a key/value entry from a b-tree page 73 | """ 74 | def __init__(self, key, val): 75 | self.key = key 76 | self.val = val 77 | 78 | def __repr__(self): 79 | return "%s=%d" % (self.key, self.val) 80 | 81 | 82 | class BasePage(object): 83 | """ 84 | BasePage has methods common to both leaf and index pages 85 | """ 86 | def __init__(self, kv): 87 | self.index = [] 88 | for k, v in kv: 89 | self.index.append(Entry(k, v)) 90 | 91 | def find(self, key): 92 | i = binary_search(self.index, key) 93 | if i < 0: 94 | if self.isindex(): 95 | return ('recurse', -1) 96 | return ('gt', 0) 97 | if self.index[i].key == key: 98 | return ('eq', i) 99 | if self.isindex(): 100 | return ('recurse', i) 101 | return ('lt', i) 102 | 103 | def getkey(self, ix): 104 | return self.index[ix].key 105 | 106 | def getval(self, ix): 107 | return self.index[ix].val 108 | 109 | def isleaf(self): 110 | return self.preceeding is None 111 | 112 | def isindex(self): 113 | return self.preceeding is not None 114 | 115 | def __repr__(self): 116 | return ("leaf" if self.isleaf() else ("index<%d>" % self.preceeding)) + repr(self.index) 117 | 118 | 119 | class LeafPage(BasePage): 120 | """ a leaf page in the b-tree """ 121 | def __init__(self, kv): 122 | super(self.__class__, self).__init__(kv) 123 | self.preceeding = None 124 | 125 | 126 | class IndexPage(BasePage): 127 | """ 128 | An index page in the b-tree. 129 | This page has a preceeding page plus several key+subpage pairs. 130 | For each key+subpage: all keys in the subpage are greater than the key 131 | """ 132 | def __init__(self, preceeding, kv): 133 | super(self.__class__, self).__init__(kv) 134 | self.preceeding = preceeding 135 | 136 | def getpage(self, ix): 137 | return self.preceeding if ix < 0 else self.index[ix].val 138 | 139 | 140 | class Cursor: 141 | """ 142 | A Cursor object represents a position in the b-tree. 143 | 144 | It has methods for moving to the next or previous item. 145 | And methods for retrieving the key and value of the current position 146 | """ 147 | def __init__(self, db, stack): 148 | self.db = db 149 | self.stack = stack 150 | 151 | def next(self): 152 | page, ix = self.stack.pop() 153 | if page.isleaf(): 154 | # from leaf move towards root 155 | ix += 1 156 | while self.stack and ix == len(page.index): 157 | page, ix = self.stack.pop() 158 | ix += 1 159 | if ix < len(page.index): 160 | self.stack.append((page, ix)) 161 | else: 162 | # from node move towards leaf 163 | self.stack.append((page, ix)) 164 | page = self.db.readpage(page.getpage(ix)) 165 | while page.isindex(): 166 | ix = -1 167 | self.stack.append((page, ix)) 168 | page = self.db.readpage(page.getpage(ix)) 169 | ix = 0 170 | self.stack.append((page, ix)) 171 | 172 | self.verify() 173 | 174 | def prev(self): 175 | page, ix = self.stack.pop() 176 | ix -= 1 177 | if page.isleaf(): 178 | # move towards root, until non 'prec' item found 179 | while self.stack and ix < 0: 180 | page, ix = self.stack.pop() 181 | if ix >= 0: 182 | self.stack.append((page, ix)) 183 | else: 184 | # move towards leaf 185 | self.stack.append((page, ix)) 186 | while page.isindex(): 187 | page = self.db.readpage(page.getpage(ix)) 188 | ix = len(page.index) - 1 189 | self.stack.append((page, ix)) 190 | 191 | self.verify() 192 | 193 | def verify(self): 194 | """ verify cursor state consistency """ 195 | if len(self.stack) == 3: 196 | if not self.stack[-1][0].isleaf(): 197 | print("WARN no leaf") 198 | elif len(self.stack) > 3: 199 | print("WARN: stack too large") 200 | 201 | if len(self.stack) >= 2: 202 | if self.stack[0][0] == self.stack[1][0]: 203 | print("WARN: identical index pages on stack") 204 | if not self.stack[0][0].isindex(): 205 | print("WARN: expected root=index") 206 | if not self.stack[1][0].isindex(): 207 | print("WARN: expected 2nd=index") 208 | 209 | def eof(self): 210 | return len(self.stack) == 0 211 | 212 | def getkey(self): 213 | page, ix = self.stack[-1] 214 | return page.getkey(ix) 215 | 216 | def getval(self): 217 | page, ix = self.stack[-1] 218 | return page.getval(ix) 219 | 220 | def __repr__(self): 221 | return "cursor:" + repr(self.stack) 222 | 223 | 224 | class Btree: 225 | """ 226 | A B-tree implementation 227 | """ 228 | def __init__(self): 229 | self.pages = [] 230 | self.generate(TREEDEPTH, NODEWIDTH) 231 | 232 | def manual(self): 233 | """ manually construct the ascii art tree """ 234 | for i in range(9): 235 | self.pages.append(LeafPage((("%02d" % (3 * i), 0), ("%02d" % (3 * i + 1), 0)))) 236 | for i in range(3): 237 | self.pages.append(IndexPage(3 * i, (("%02d" % (9 * i + 2), 3 * i + 1), ("%02d" % (9 * i + 5), 3 * i + 2)))) 238 | self.pages.append(IndexPage(9, (("08", 10), ("17", 11)))) 239 | self.rootindex = len(self.pages) - 1 240 | 241 | def generate(self, depth, nodesize): 242 | """ automatically generate the try in the ascii art above """ 243 | 244 | def namegen(): 245 | i = 0 246 | while True: 247 | yield "%03d" % i 248 | i += 1 249 | 250 | self.rootindex = self.construct(namegen(), depth, nodesize) 251 | print("%d pages" % (len(self.pages))) 252 | 253 | def construct(self, namegen, depth, nodesize): 254 | if depth: 255 | return self.createindex(namegen, depth, nodesize) 256 | else: 257 | return self.createleaf(namegen, nodesize) 258 | 259 | def createindex(self, namegen, depth, nodesize): 260 | page = IndexPage(self.construct(namegen, depth - 1, nodesize), 261 | [(next(namegen), self.construct(namegen, depth - 1, nodesize)) for _ in range(nodesize)]) 262 | self.pages.append(page) 263 | return len(self.pages) - 1 264 | 265 | def createleaf(self, namegen, nodesize): 266 | page = LeafPage([(next(namegen), 0) for _ in range(nodesize)]) 267 | self.pages.append(page) 268 | return len(self.pages) - 1 269 | 270 | def readpage(self, pn): 271 | return self.pages[pn] 272 | 273 | def find(self, key): 274 | """ 275 | Find a node in the tree, returns the cursor plus the reletion to the wanted key: 276 | 'eq' for equal, 'lt' when the found key is less than the wanted key, 277 | or 'gt' when the found key is greater than the wanted key. 278 | """ 279 | page = self.readpage(self.rootindex) 280 | stack = [] 281 | while True: 282 | act, ix = page.find(key) 283 | stack.append((page, ix)) 284 | if act != 'recurse': 285 | break 286 | page = self.readpage(page.getpage(ix)) 287 | return act, Cursor(self, stack) 288 | 289 | def dumptree(self, pn, indent=0): 290 | """ dump all nodes of the current b-tree """ 291 | page = self.readpage(pn) 292 | print(" " * indent, page) 293 | if page.isindex(): 294 | print(" " * indent, end="") 295 | self.dumptree(page.preceeding, indent + 1) 296 | for p in range(len(page.index)): 297 | print(" " * indent, end="") 298 | self.dumptree(page.getpage(p), indent + 1) 299 | 300 | 301 | db = Btree() 302 | print("<<") 303 | db.dumptree(db.rootindex) 304 | print(">>") 305 | 306 | 307 | for i in range(NODEWIDTH * len(db.pages)): 308 | print("--------- %03d" % i) 309 | act, cursor = db.find("%03d" % i) 310 | print("found", act, cursor.getkey(), cursor) 311 | cursor.prev() 312 | if not cursor.eof(): 313 | print("prev:", "..", cursor.getkey(), cursor) 314 | else: 315 | print("prev: EOF", cursor) 316 | 317 | for i in range(NODEWIDTH * len(db.pages)): 318 | print("--------- %03d" % i) 319 | act, cursor = db.find("%03d" % i) 320 | print("found", act, cursor.getkey(), cursor) 321 | cursor.next() 322 | if not cursor.eof(): 323 | print("next:", "..", cursor.getkey(), cursor) 324 | else: 325 | print("next: EOF", cursor) 326 | 327 | for k in ('', '0', '1', '2', '3', '000', '010', '020', '100'): 328 | print("--------- %s" % k) 329 | act, cursor = db.find(k) 330 | print(cursor) 331 | print(act, cursor.getkey(), end=" next=") 332 | cursor.next() 333 | if cursor.eof(): 334 | print("EOF") 335 | else: 336 | print(cursor.getkey()) 337 | 338 | act, cursor = db.find("000") 339 | print("get000", end=" ") 340 | for i in range(NODEWIDTH * len(db.pages)): 341 | cursor.next() 342 | if cursor.eof(): 343 | print("EOF") 344 | else: 345 | print("-> %s" % cursor.getkey(), end=" ") 346 | print() 347 | 348 | act, cursor = db.find("025") 349 | print("get025", end=" ") 350 | for i in range(NODEWIDTH * len(db.pages)): 351 | cursor.prev() 352 | if cursor.eof(): 353 | print("EOF") 354 | else: 355 | print("-> %s" % cursor.getkey(), end=" ") 356 | print() 357 | -------------------------------------------------------------------------------- /idbtool.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python3 2 | """ 3 | Tool for querying information from Hexrays .idb and .i64 files 4 | without launching IDA. 5 | 6 | Copyright (c) 2016 Willem Hengeveld 7 | """ 8 | 9 | # todo: 10 | # '$ segs' 11 | # S = packed(startea, size, ....) 12 | # '$ srareas' 13 | # a = packed(startea, size, flag, flag) -- includes functions 14 | # b = packed(startea, size, flag, flag) -- segment 15 | # c = packed(startea, size, flag, flag) -- same as 'b' 16 | # 17 | from __future__ import division, print_function, absolute_import, unicode_literals 18 | import sys 19 | import os 20 | if sys.version_info[0] == 2: 21 | import scandir 22 | os.scandir = scandir.scandir 23 | if sys.version_info[0] == 2: 24 | reload(sys) 25 | sys.setdefaultencoding('utf-8') 26 | 27 | if sys.version_info[0] == 2: 28 | stdout = sys.stdout 29 | else: 30 | stdout = sys.stdout.buffer 31 | 32 | import struct 33 | import binascii 34 | import argparse 35 | import itertools 36 | from collections import defaultdict 37 | 38 | import re 39 | 40 | from datetime import datetime 41 | 42 | import idblib 43 | from idblib import hexdump 44 | 45 | 46 | def timestring(t): 47 | if t == 0: 48 | return "....-..-.. ..:..:.." 49 | return datetime.strftime(datetime.fromtimestamp(t), "%Y-%m-%d %H:%M:%S") 50 | 51 | 52 | def strz(b, o): 53 | return b[o:b.find(b'\x00', o)].decode('utf-8', 'ignore') 54 | 55 | def nonefmt(fmt, num): 56 | if num is None: 57 | return "-" 58 | return fmt % num 59 | 60 | ######### license encoding ################ 61 | 62 | 63 | def decryptuser(data): 64 | """ 65 | The '$ original user' node is encrypted with hexray's private key. 66 | Hence we can easily decrypt it, but not change it to something else. 67 | We can however copy the entry from another database, or just replace it with garbage. 68 | 69 | The node contains 128 bytes encrypted license, followed by 32 bytes zero. 70 | 71 | Note: i found several ida55 databases online where this does not work. 72 | possible these were created using a cracked version of IDA. 73 | """ 74 | data = int(binascii.b2a_hex(data[127::-1]), 16) 75 | user = pow(data, 0x13, 0x93AF7A8E3A6EB93D1B4D1FB7EC29299D2BC8F3CE5F84BFE88E47DDBDD5550C3CE3D2B16A2E2FBD0FBD919E8038BB05752EC92DD1498CB283AA087A93184F1DD9DD5D5DF7857322DFCD70890F814B58448071BBABB0FC8A7868B62EB29CC2664C8FE61DFBC5DB0EE8BF6ECF0B65250514576C4384582211896E5478F95C42FDED) 76 | user = binascii.a2b_hex("%0256x" % user) 77 | return user[1:] 78 | 79 | 80 | def licensestring(lic): 81 | """ decode a license blob """ 82 | if not lic: 83 | return 84 | if len(lic) < 127: 85 | print("too short license format: %s" % binascii.b2a_hex(lic)) 86 | return 87 | elif len(lic) > 127 and sum(lic[127:]) != 0: 88 | print("too long license format: %s" % binascii.b2a_hex(lic)) 89 | return 90 | 91 | if struct.unpack_from("= 128: 162 | user0 = decryptuser(user0) 163 | else: 164 | user0 = user0[:127] 165 | # user0 has 128 bytes rsa encrypted license, followed by 32 bytes zero 166 | print("orig: %s" % licensestring(user0)) 167 | # ida9 has S10+S11 == license json 168 | user10 = id0.blob(orignode, 'S', 16) 169 | if user10: 170 | import json 171 | user10 = json.loads(user10) 172 | print("orig: %s" % user10) 173 | curnode = id0.nodeByName('$ user1') 174 | if curnode: 175 | user1 = id0.bytes(curnode, 'S', 0) 176 | print("user: %s" % licensestring(user1)) 177 | 178 | 179 | ######### idb summary ######### 180 | 181 | 182 | filetypelist = [ 183 | "MS DOS EXE File", 184 | "MS DOS COM File", 185 | "Binary File", 186 | "MS DOS Driver", 187 | "New Executable (NE)", 188 | "Intel Hex Object File", 189 | "MOS Technology Hex Object File", 190 | "Linear Executable (LX)", 191 | "Linear Executable (LE)", 192 | "Netware Loadable Module (NLM)", 193 | "Common Object File Format (COFF)", 194 | "Portable Executable (PE)", 195 | "Object Module Format", 196 | "R-records", 197 | "ZIP file (this file is never loaded to IDA database)", 198 | "Library of OMF Modules", 199 | "ar library", 200 | "file is loaded using LOADER DLL", 201 | "Executable and Linkable Format (ELF)", 202 | "Watcom DOS32 Extender (W32RUN)", 203 | "Linux a.out (AOUT)", 204 | "PalmPilot program file", 205 | "MS DOS EXE File", 206 | "MS DOS COM File", 207 | "AIX ar library", 208 | "Mac OS X Mach-O file", 209 | ] 210 | 211 | 212 | def dumpinfo(id0): 213 | """ print various infos on the idb file """ 214 | def ftstring(ft): 215 | if 0 < ft < len(filetypelist): 216 | return "%02x:%s" % (ft, filetypelist[ft]) 217 | return "%02x:unknown" % ft 218 | 219 | def decodebitmask(fl, bitnames): 220 | l = [] 221 | knownbits = 0 222 | for bit, name in enumerate(bitnames): 223 | if fl & (1 << bit) and name is not None: 224 | l.append(name) 225 | knownbits |= 1 << bit 226 | if fl & ~knownbits: 227 | l.append("unknown_%x" % (fl & ~knownbits)) 228 | return ",".join(l) 229 | 230 | def osstring(fl): 231 | return decodebitmask(fl, ['msdos', 'win', 'os2', 'netw', 'unix', 'other']) 232 | 233 | def appstring(fl): 234 | return decodebitmask(fl, ['console', 'graphics', 'exe', 'dll', 'driver', '1thread', 'mthread', '16bit', '32bit', '64bit']) 235 | 236 | ldr = id0.nodeByName("$ loader name") 237 | if ldr: 238 | print("loader: %s %s" % (id0.string(ldr, 'S', 0), id0.string(ldr, 'S', 1))) 239 | 240 | if not id0.root: 241 | print("database has no RootNode") 242 | return 243 | 244 | if id0.idbparams: 245 | params = idblib.IDBParams(id0, id0.idbparams) 246 | print("cpu: %s, version=%d, filetype=%s, ostype=%s, apptype=%s, core:%x, size:%x" % (params.cpu, params.version, ftstring(params.filetype), osstring(params.ostype), appstring(params.apptype), params.corestart, params.coresize)) 247 | 248 | print("idaver=%s: %s" % (nonefmt("%04d", id0.idaver), id0.idaverstr)) 249 | 250 | srcmd5 = id0.originmd5 251 | print("nopens=%s, ctime=%s, crc=%s, md5=%s" % (nonefmt("%d", id0.nropens), nonefmt("%08x", id0.creationtime), nonefmt("%08x", id0.somecrc), hexdump(srcmd5) if srcmd5 else "-")) 252 | 253 | dumpuser(id0) 254 | 255 | 256 | def dumpnames(args, id0, nam): 257 | for ea in nam.allnames(): 258 | print("%08x: %s" % (ea, id0.name(ea))) 259 | 260 | 261 | def dumpscript(id0, node): 262 | """ dump all stored scripts """ 263 | s = idblib.Script(id0, node) 264 | 265 | print("======= %s %s =======" % (s.language, s.name)) 266 | print(s.body) 267 | 268 | 269 | def dumpstructmember(m): 270 | """ 271 | Dump info for a struct member. 272 | """ 273 | print(" %02x %02x %08x %02x: %-40s" % (m.skip, m.size, m.flags, m.props, m.name), end="") 274 | if m.enumid: 275 | print(" enum %08x" % m.enumid, end="") 276 | if m.structid: 277 | print(" struct %08x" % m.structid, end="") 278 | if m.ptrinfo: 279 | # packed 280 | # note: 64bit nrs are stored low32, high32 281 | # flags1, target, base, delta, flags2 282 | 283 | # flags1: 284 | # 0=off8 1=off16 2=off32 3=low8 4=low16 5=high8 6=high16 9=off64 285 | # 0x10 = targetaddr, 0x20 = baseaddr, 0x40 = delta, 0x80 = base is plainnum 286 | # flags2: 287 | # 1=image is off, 0x10 = subtract, 0x20 = signed operand 288 | print(" ptr %s" % m.ptrinfo, end="") 289 | if m.typeinfo: 290 | print(" type %s" % m.typeinfo, end="") 291 | print() 292 | 293 | 294 | def dumpstruct(id0, node): 295 | """ 296 | dump all info for the struct defined by `node` 297 | """ 298 | s = idblib.Struct(id0, node) 299 | 300 | 301 | print("struct %s, 0x%x" % (s.name, s.flags)) 302 | for m in s: 303 | dumpstructmember(m) 304 | 305 | def dumpbitmember(m): 306 | print(" %08x %s" % (m.value or 0, m.name)) 307 | def dumpmask(m): 308 | print(" mask %08x %s" % (m.mask, m.name)) 309 | for m in m: 310 | dumpbitmember(m) 311 | def dumpbitfield(id0, node): 312 | b = idblib.Bitfield(id0, node) 313 | print("bitfield %s, %s, %s, %s" % (b.name, nonefmt("0x%x", b.count), nonefmt("0x%x", b.representation), nonefmt("0x%x", b.flags))) 314 | for m in b: 315 | dumpmask(m) 316 | 317 | def dumpenummember(m): 318 | """ 319 | Print information on a single enum member 320 | """ 321 | print(" %08x %s" % (m.value or 0, m.name)) 322 | 323 | def dumpenum(id0, node): 324 | """ 325 | Dump all info for the enum defined by `node` 326 | """ 327 | e = idblib.Enum(id0, node) 328 | if e.flags and e.flags&1: 329 | dumpbitfield(id0, node) 330 | return 331 | print("enum %s, %s, %s, %s" % (e.name, nonefmt("0x%x", e.count), nonefmt("0x%x", e.representation), nonefmt("0x%x", e.flags))) 332 | 333 | for m in e: 334 | dumpenummember(m) 335 | 336 | 337 | def dumpimport(id0, node): 338 | # Note that '$ imports' is a list where the actual nodes 339 | # are stored in the list, therefore we add '1' to the node here. 340 | 341 | # first the named imports 342 | startkey = id0.makekey(node+1, 'S') 343 | endkey = id0.makekey(node+1, 'T') 344 | cur = id0.btree.find('ge', startkey) 345 | while cur.getkey() < endkey: 346 | txt = id0.string(cur) 347 | key = cur.getkey() 348 | ea = id0.decodekey(key)[3] 349 | print("%08x: %s" % (ea, txt)) 350 | cur.next() 351 | 352 | # then list the imports by ordinal 353 | startkey = id0.makekey(node+1, 'A') 354 | endkey = id0.makekey(node+1, 'B') 355 | cur = id0.btree.find('ge', startkey) 356 | while cur.getkey() < endkey: 357 | ordinal = id0.decodekey(cur.getkey())[3] 358 | ea = id0.int(cur) 359 | print("%08x: (ord%04d) %s" % (ea, ordinal, id0.name(ea))) 360 | cur.next() 361 | 362 | 363 | def enumlist(id0, listname, callback): 364 | """ 365 | Lists are all stored in a similar way. 366 | 367 | (listnode, 'N') = listname 368 | (listnode, 'A', -1) = list size <-- not for '$ scriptsnippets' 369 | (listnode, 'A', seqnr) = itemnode+1 370 | 371 | (listnode, 'Y', itemnode) = seqnr <-- only with '$ enums' 372 | 373 | (listnode, 'Y', 0) = list size <-- only '$ scriptsnippets' 374 | (listnode, 'Y', 1) = ? <-- only '$ scriptsnippets' 375 | 376 | (listnode, 'S', seqnr) = dllname <-- only '$ imports' 377 | 378 | """ 379 | listnode = id0.nodeByName(listname) 380 | if not listnode: 381 | return 382 | 383 | startkey = id0.makekey(listnode, 'A') 384 | endkey = id0.makekey(listnode, 'A', 0xFFFFFFFF) 385 | cur = id0.btree.find('ge', startkey) 386 | while cur.getkey() < endkey: 387 | item = id0.int(cur) 388 | callback(id0, item - 1) 389 | cur.next() 390 | 391 | 392 | def listfuncdirs(id0): 393 | listnode = id0.nodeByName('$ dirtree/funcs') 394 | if not listnode: 395 | return 396 | 397 | dir_id = 0 398 | while True: 399 | start = dir_id * 0x10000 400 | end = start + 0xFFFF 401 | data = id0.blob(listnode, 'S', start, end) 402 | if data == b'': 403 | break 404 | dumpfuncdir(id0, dir_id, data) 405 | dir_id += 1 406 | 407 | 408 | def dumpfuncdir(id0, dir_index, data): 409 | terminate = data.find(b'\0', 1) 410 | name = data[1:terminate].decode('utf-8') 411 | 412 | p = idblib.IdaUnpacker(id0.wordsize, data[terminate+1:]) 413 | parent = p.nextword() 414 | unk = p.next32() 415 | 416 | if data[0] == 0: # IDA 7.5 417 | subdir_count = p.next32() 418 | subdirs = [] 419 | while subdir_count: 420 | subdir_id = p.nextwordsigned() 421 | if subdirs: 422 | subdir_id = subdirs[-1] + subdir_id 423 | subdirs.append(subdir_id) 424 | subdir_count -= 1 425 | 426 | func_count = p.next32() 427 | funcs = [] 428 | while func_count: 429 | func_id = p.nextwordsigned() 430 | if funcs: 431 | func_id = funcs[-1] + func_id 432 | funcs.append(func_id) 433 | func_count -= 1 434 | 435 | elif data[0] == 1: # IDA 7.6 436 | children_count = p.next32() 437 | children = [] 438 | for i in range(children_count): 439 | next_child = p.nextwordsigned() 440 | if children: 441 | next_child += children[-1] 442 | children.append(next_child) 443 | 444 | subdir_count = p.next32() 445 | children_count -= subdir_count 446 | childtype_counts = [subdir_count] 447 | while children_count: 448 | childtype_count = p.next32() 449 | children_count -= childtype_count 450 | childtype_counts.append(childtype_count) 451 | 452 | subdirs = [] 453 | funcs = [] 454 | i = 0 455 | parsing_subdirs = True # switch back and forth 456 | for childtype_count in childtype_counts: 457 | for _ in range(childtype_count): 458 | if parsing_subdirs: 459 | subdirs.append(children[i]) 460 | else: 461 | funcs.append(children[i]) 462 | i += 1 463 | parsing_subdirs = not parsing_subdirs 464 | else: 465 | raise NotImplementedError('unsupported funcdir schema') 466 | 467 | if not p.eof(): 468 | raise Exception('not EOF after dir parsed') 469 | 470 | print("dir %d = %s" % (dir_index, name)) 471 | print(" parent = %d" % parent) 472 | print(" subdirs:") 473 | for subdir in subdirs: 474 | print(" %d" % subdir) 475 | print(" functions:") 476 | for func in funcs: 477 | print(" 0x%x" % func) 478 | 479 | 480 | def printent(args, id0, c): 481 | if args.verbose: 482 | print("%s = %s" % (id0.prettykey(c.getkey()), id0.prettyval(c.getval()))) 483 | else: 484 | print("%s = %s" % (hexdump(c.getkey()), hexdump(c.getval()))) 485 | 486 | 487 | def createkey(args, id0, base, tag, ix): 488 | """ 489 | 490 | parse base node specification: 491 | 492 | '?' -> explicit N key 493 | '#' -> relative to nodebase 494 | '.' -> absolute nodeid 495 | 496 | '' -> lookup by name. 497 | 498 | """ 499 | if base[:1] == '?': 500 | return id0.namekey(base[1:]) 501 | 502 | if re.match(r'^#(?:0[xX][0-9a-fA-F]+|\d+)$', base): 503 | nodeid = int(base[1:], 0) + id0.nodebase 504 | elif re.match(r'^\.(?:0[xX][0-9a-fA-F]+|\d+)$', base): 505 | nodeid = int(base[1:], 0) 506 | else: 507 | nodeid = id0.nodeByName(base) 508 | if nodeid and args.verbose > 1: 509 | print("found node %x for %s" % (nodeid, base)) 510 | if nodeid is None: 511 | print("Could not find '%s'" % base) 512 | return 513 | 514 | s = [nodeid] 515 | if tag is not None: 516 | s.append(tag) 517 | if ix is not None: 518 | try: 519 | ix = int(ix, 0) 520 | except: 521 | pass 522 | s.append(ix) 523 | 524 | return id0.makekey(*s) 525 | 526 | 527 | def enumeratecursor(args, c, onerec, callback): 528 | """ 529 | Enumerate cursor in direction specified by `--dec` or `--inc`, 530 | taking into account the optional limit set by `--limit` 531 | 532 | Output according to verbosity level set by `--verbose`. 533 | """ 534 | limit = args.limit 535 | while c and not c.eof() and (limit is None or limit > 0): 536 | callback(c) 537 | if args.dec: 538 | c.prev() 539 | else: 540 | c.next() 541 | if limit is not None: 542 | limit -= 1 543 | elif onerec: 544 | break 545 | 546 | 547 | def id0query(args, id0, query): 548 | """ 549 | queries start with an optional operator: <,<=,>,>=,== 550 | 551 | followed by either a name or address or nodeid 552 | 553 | Addresses are specified as a sequence of hexadecimal charaters. 554 | Nodeid's may be specified either as the full node id, starting with ff00, 555 | or starting with a '_' 556 | Names are anything which can be found under the name tree in the database. 557 | 558 | after the name/addr/node there is optionally a slash, followed by a node tag, 559 | and another slash, followed by a index or hash string. 560 | 561 | """ 562 | 563 | xlatop = {'=': 'eq', '==': 'eq', '>': 'gt', '<': 'lt', '>=': 'ge', '<=': 'le'} 564 | 565 | SEP = r";" 566 | m = re.match(r'^([=<>]=?)?(.+?)(?:' + SEP + r'(\w+)(?:' + SEP + r'(.+))?)?$', query) 567 | op = m.group(1) or "==" 568 | base = m.group(2) 569 | tag = m.group(3) # optional ;tag 570 | ix = m.group(4) # optional ;ix 571 | 572 | op = xlatop[op] 573 | 574 | c = id0.btree.find(op, createkey(args, id0, base, tag, ix)) 575 | 576 | enumeratecursor(args, c, op=='eq', lambda c:printent(args, id0, c)) 577 | 578 | 579 | def getsegs(id0): 580 | """ 581 | Returns a list of all segments. 582 | """ 583 | seglist = [] 584 | node = id0.nodeByName('$ segs') 585 | if not node: 586 | return 587 | startkey = id0.makekey(node, 'S') 588 | endkey = id0.makekey(node, 'T') 589 | cur = id0.btree.find('ge', startkey) 590 | while cur.getkey() < endkey: 591 | s = idblib.Segment(id0, cur.getval()) 592 | seglist.append(s) 593 | cur.next() 594 | 595 | return seglist 596 | 597 | 598 | def listsegments(id0): 599 | """ 600 | Print a summary of all segments found in the IDB. 601 | """ 602 | ssnode = id0.nodeByName('$ segstrings') 603 | if not ssnode: 604 | print("can't find '$ segstrings' node") 605 | return 606 | segstrings = id0.blob(ssnode, 'S') 607 | p = idblib.IdaUnpacker(id0.wordsize, segstrings) 608 | unk = p.next32() 609 | nextid = p.next32() 610 | slist = [] 611 | while not p.eof(): 612 | slen = p.next32() 613 | if slen is None: 614 | break 615 | name = p.bytes(slen) 616 | if name is None: 617 | break 618 | slist.append(name.decode('utf-8', 'ignore')) 619 | 620 | segs = getsegs(id0) 621 | for s in segs: 622 | print("%08x - %08x %s" % (s.startea, s.startea+s.size, slist[s.name_id-1])) 623 | 624 | def classifynodes(args, id0): 625 | """ 626 | Attempt to classify all nodes in the IDA database. 627 | 628 | Note: this does not work for very old dbs 629 | """ 630 | nodetype = {} 631 | tagstats = defaultdict(lambda : defaultdict(int)) 632 | 633 | segs = getsegs(id0) 634 | 635 | print("node: %x .. %x" % (id0.nodebase, id0.maxnode)) 636 | 637 | def addstat(nodetype, k): 638 | if len(k)<3: 639 | print("??? strange, expected longer key - %s" % k) 640 | return 641 | tag = k[2].decode('utf-8') 642 | if len(k)==3: 643 | tagstats[nodetype][(tag, )] += 1 644 | elif len(k)==4: 645 | value = k[3] 646 | if type(value)==int: 647 | if isaddress(value): 648 | tagstats[nodetype][(tag, 'addr')] += 1 649 | elif isnode(value): 650 | tagstats[nodetype][(tag, 'node')] += 1 651 | else: 652 | if value >= id0.maxnode: 653 | value -= pow(0x100, id0.wordsize) 654 | tagstats[nodetype][(tag, value)] += 1 655 | else: 656 | tagstats[nodetype][(tag, 'string')] += 1 657 | else: 658 | print("??? strange, expected shorter key - %s" % k) 659 | return 660 | 661 | def isaddress(addr): 662 | for s in segs: 663 | if s.startea <= addr < s.startea+s.size: 664 | return True 665 | 666 | def isnode(addr): 667 | return id0.nodebase <= addr <= id0.maxnode 668 | 669 | def processbitfieldvalue(v): 670 | nodetype[v._nodeid] = 'bitfieldvalue' 671 | 672 | def processbitfieldmask(m): 673 | nodetype[m._nodeid] = 'bitfieldmask' 674 | 675 | for m in m: 676 | processbitfieldvalue(m) 677 | 678 | def processbitfield(id0, node): 679 | nodetype[node] = 'bitfield' 680 | 681 | b = idblib.Bitfield(id0, node) 682 | for m in b: 683 | processbitfieldmask(m) 684 | 685 | 686 | def processenummember(m): 687 | nodetype[m._nodeid] = 'enummember' 688 | 689 | def processenums(id0, node): 690 | nodetype[node] = 'enum' 691 | 692 | e = idblib.Enum(id0, node) 693 | if e.flags&1: 694 | processbitfield(id0, node) 695 | return 696 | 697 | for m in e: 698 | processenummember(m) 699 | 700 | def processstructmember(m, typename): 701 | nodetype[m._nodeid] = typename 702 | 703 | def processstructs(id0, node, typename): 704 | nodetype[node] = typename 705 | s = idblib.Struct(id0, node) 706 | 707 | for m in s: 708 | processstructmember(m, typename+"member") 709 | 710 | def processscripts(id0, node): 711 | nodetype[node] = 'script' 712 | 713 | def processaddr(id0, cur): 714 | k = id0.decodekey(cur.getkey()) 715 | if len(k)==4 and k[2:4] == (b'A', 2): 716 | nodetype[id0.int(cur)-1] = 'hexrays' 717 | 718 | addstat('addr', k) 719 | 720 | def processfunc(id0, funcspec): 721 | p = idblib.IdaUnpacker(id0.wordsize, funcspec) 722 | 723 | funcstart = p.nextword() 724 | funcsize = p.nextword() 725 | flags = p.next16() 726 | if flags is None: 727 | return 728 | if flags&0x8000: # is tail 729 | return 730 | 731 | node = p.nextword() 732 | 733 | if node<0xFFFFFF and node!=0: 734 | processstructs(id0, node + id0.nodebase, "frame") 735 | 736 | def processimport(id0, node): 737 | print("imp %08x" % node) 738 | startkey = id0.makekey(node+1, 'A') 739 | endkey = id0.makekey(node+1, 'B') 740 | cur = id0.btree.find('ge', startkey) 741 | while cur.getkey() < endkey: 742 | dllnode = id0.int(cur) 743 | nodetype[dllnode] = 'import' 744 | cur.next() 745 | 746 | 747 | # mark enums, structs, scripts. 748 | enumlist(id0, '$ enums', processenums) 749 | enumlist(id0, '$ structs', lambda id0, node : processstructs(id0, node, "struct")) 750 | enumlist(id0, '$ scriptsnippets', processscripts) 751 | enumlist(id0, '$ imports', processimport) 752 | 753 | # enum functions, scan for stackframes 754 | funcsnode = id0.nodeByName('$ funcs') 755 | startkey = id0.makekey(funcsnode, 'S') 756 | endkey = id0.makekey(funcsnode, 'T') 757 | cur = id0.btree.find('ge', startkey) 758 | while cur.getkey() < endkey: 759 | processfunc(id0, cur.getval()) 760 | cur.next() 761 | 762 | clinode = id0.nodeByName('$ cli') 763 | if clinode: 764 | for letter in "ABCDEFGHIJKMcio": 765 | startkey = id0.makekey(clinode, letter) 766 | endkey = id0.makekey(clinode, chr(ord(letter)+1)) 767 | cur = id0.btree.find('ge', startkey) 768 | while cur.getkey() < endkey: 769 | nodetype[id0.int(cur)] = 'cli.'+letter 770 | cur.next() 771 | 772 | 773 | # enum addresses, scan for hex-rays nodes 774 | startkey = b'.' 775 | endkey = id0.makekey(id0.nodebase) 776 | cur = id0.btree.find('ge', startkey) 777 | while cur.getkey() < endkey: 778 | processaddr(id0, cur) 779 | cur.next() 780 | 781 | # addresses above node list 782 | startkey = id0.makekey(id0.maxnode+1) 783 | endkey = b'/' 784 | cur = id0.btree.find('ge', startkey) 785 | while cur.getkey() < endkey: 786 | processaddr(id0, cur) 787 | cur.next() 788 | 789 | # scan for unmarked nodes 790 | # $ fr[0-9a-f]+\.\w+ 791 | # $ fr[0-9a-f]+\. [rs] 792 | # $ F[0-9A-F]+\.\w+ 793 | # $ Stack of \w+ 794 | # Stack[0000007C] 795 | # xrefs to \w+ 796 | 797 | startkey = id0.makekey(id0.nodebase) 798 | endkey = id0.makekey(id0.maxnode+1) 799 | cur = id0.btree.find('ge', startkey) 800 | while cur.getkey() < endkey: 801 | k = id0.decodekey(cur.getkey()) 802 | node = k[1] 803 | if node not in nodetype: 804 | nodetype[node] = "unknown" 805 | if nodetype[node] == "unknown" and k[2] == b'N': 806 | name = cur.getval().rstrip(b'\x00') 807 | if re.match(br'\$ fr[0-9a-f]+\.\w+$', name): 808 | name = 'fr-type-functionframe' 809 | elif re.match(br'\$ fr[0-9a-f]+\. [rs]$', name): 810 | name = 'fr-type-functionframe' 811 | elif re.match(br'\$ F[0-9A-F]+\.\w+$', name): 812 | name = 'F-type-functionframe' 813 | elif name.startswith(b'Stack of '): 814 | name = 'stack-type-functionframe' 815 | elif name.startswith(b'Stack['): 816 | name = 'old-stack-type-functionframe' 817 | elif name.startswith(b'xrefs to '): 818 | name = 'old-xrefs' 819 | else: 820 | name = name.decode('utf-8', 'ignore') 821 | nodetype[node] = name 822 | 823 | cur.next() 824 | 825 | # output node classification 826 | if args.verbose: 827 | for k, v in sorted(nodetype.items(), key=lambda kv:kv[0]): 828 | print("%08x: %s" % (k, v)) 829 | 830 | # summarize tags per nodetype 831 | startkey = id0.makekey(id0.nodebase) 832 | endkey = id0.makekey(id0.maxnode+1) 833 | cur = id0.btree.find('ge', startkey) 834 | while cur.getkey() < endkey: 835 | k = id0.decodekey(cur.getkey()) 836 | node = k[1] 837 | nt = nodetype[node] 838 | 839 | addstat(nt, k) 840 | 841 | cur.next() 842 | 843 | # output tag statistics 844 | for nt, ntstats in sorted(tagstats.items(), key=lambda kv:kv[0]): 845 | print("====== %s =====" % nt) 846 | for k, v in ntstats.items(): 847 | if len(k)==1: 848 | print("%5d - %s" % (v, k[0])) 849 | elif len(k)==2 and type(k[1])==type(1): 850 | print("%5d - %s %8x" % (v, k[0], k[1])) 851 | elif type(k[1])==type(1): 852 | print("%5d - %s %8x %s" % (v, k[0], k[1], k[2:])) 853 | else: 854 | print("%5d - %s %s %s" % (v, k[0], k[1], k[2:])) 855 | 856 | 857 | def processid0(args, id0): 858 | if args.info: 859 | dumpinfo(id0) 860 | 861 | if args.pagedump: 862 | id0.btree.pagedump() 863 | 864 | if args.query: 865 | for query in args.query: 866 | id0query(args, id0, query) 867 | elif args.id0: 868 | id0.btree.dump() 869 | elif args.inc: 870 | c = id0.btree.find('ge', b'') 871 | enumeratecursor(args, c, False, lambda c:printent(args, id0, c)) 872 | elif args.dec: 873 | c = id0.btree.find('le', b'\x80') 874 | enumeratecursor(args, c, False, lambda c:printent(args, id0, c)) 875 | 876 | 877 | def hexascdumprange(id1, a, b): 878 | line = asc = "" 879 | for ea in range(a, b): 880 | if len(line)==0: 881 | line = "%08x:" % ea 882 | byte = id1.getFlags(ea)&0xFF 883 | line += " %02x" % byte 884 | asc += chr(byte) if 32 1: 945 | print("magic=%s, filever=%d" % (idb.magic, idb.fileversion)) 946 | for i in range(6): 947 | comp, ofs, size, checksum = idb.getsectioninfo(i) 948 | if ofs: 949 | part = idb.getpart(i) 950 | print("%2d: %02x, %08x %8x [%08x]: %s" % (i, comp, ofs, size, checksum, hexdump(part.read(256)))) 951 | 952 | nam = idb.getsection(idblib.NAMFile) 953 | id0 = idb.getsection(idblib.ID0File) 954 | id1 = idb.getsection(idblib.ID1File) 955 | processid0(args, id0) 956 | processid1(args, id1) 957 | processid2(args, idb.getsection(idblib.ID2File)) 958 | processnam(args, nam) 959 | processtil(args, idb.getsection(idblib.TILFile)) 960 | processseg(args, idb.getsection(idblib.SEGFile)) 961 | 962 | if args.names: 963 | dumpnames(args, id0, nam) 964 | if args.classify: 965 | classifynodes(args, id0) 966 | 967 | if args.scripts: 968 | enumlist(id0, '$ scriptsnippets', dumpscript) 969 | if args.structs: 970 | enumlist(id0, '$ structs', dumpstruct) 971 | if args.enums: 972 | enumlist(id0, '$ enums', dumpenum) 973 | if args.funcdirs: 974 | listfuncdirs(id0) 975 | if args.imports: 976 | enumlist(id0, '$ imports', dumpimport) 977 | if args.segs: 978 | listsegments(id0) 979 | 980 | 981 | def processfile(args, filetypehint, fh): 982 | class DummyIDB: 983 | def __init__(idb, args): 984 | if args.i64: 985 | idb.magic = 'IDA2' 986 | elif args.i32: 987 | idb.magic = 'IDA1' 988 | else: 989 | idb.magic = None 990 | 991 | try: 992 | magic = fh.read(64) 993 | fh.seek(-64, 1) 994 | if magic.startswith(b"Va") or magic.startswith(b"VA"): 995 | idb = DummyIDB(args) 996 | if filetypehint == 'id1': 997 | processid1(args, idblib.ID1File(idb, fh)) 998 | elif filetypehint == 'nam': 999 | processnam(args, idblib.NAMFile(idb, fh)) 1000 | elif filetypehint == 'seg': 1001 | processseg(args, idblib.SEGFile(idb, fh)) 1002 | else: 1003 | print("unknown VA type file: %s" % hexdump(magic)) 1004 | elif magic.startswith(b"IDAS"): 1005 | processid2(args, idblib.ID2File(DummyIDB(args), fh)) 1006 | elif magic.startswith(b"IDATIL"): 1007 | processtil(args, idblib.ID2File(DummyIDB(args), fh)) 1008 | elif magic.startswith(b"IDA"): 1009 | processidb(args, idblib.IDBFile(fh)) 1010 | elif magic.find(b'B-tree v') > 0: 1011 | processid0(args, idblib.ID0File(DummyIDB(args), fh)) 1012 | 1013 | except Exception as e: 1014 | print("ERROR %s" % e) 1015 | if args.debug: 1016 | raise 1017 | 1018 | 1019 | def recover_database(args, basepath, dbfiles): 1020 | processidb(args, idblib.RecoverIDBFile(args, basepath, dbfiles)) 1021 | 1022 | 1023 | def DirEnumerator(args, path): 1024 | """ 1025 | Enumerate all files / links in a directory, 1026 | optionally recursing into subdirectories, 1027 | or ignoring links. 1028 | """ 1029 | for d in os.scandir(path): 1030 | try: 1031 | if d.name == '.' or d.name == '..': 1032 | pass 1033 | elif d.is_symlink() and args.skiplinks: 1034 | pass 1035 | elif d.is_file(): 1036 | yield d.path 1037 | elif d.is_dir() and args.recurse: 1038 | for f in DirEnumerator(args, d.path): 1039 | yield f 1040 | except Exception as e: 1041 | print("EXCEPTION %s accessing %s/%s" % (e, path, d.name)) 1042 | 1043 | 1044 | def EnumeratePaths(args, paths): 1045 | """ 1046 | Enumerate all paths, files from the commandline 1047 | optionally recursing into subdirectories. 1048 | """ 1049 | for fn in paths: 1050 | try: 1051 | # 3 - for ftp://, 4 for http://, 5 for https:// 1052 | if fn.find("://") in (3, 4, 5): 1053 | yield fn 1054 | if os.path.islink(fn) and args.skiplinks: 1055 | pass 1056 | elif os.path.isdir(fn) and args.recurse: 1057 | for f in DirEnumerator(args, fn): 1058 | yield f 1059 | elif os.path.isfile(fn): 1060 | yield fn 1061 | except Exception as e: 1062 | print("EXCEPTION %s accessing %s" % (e, fn)) 1063 | 1064 | 1065 | def filetype_from_name(fn): 1066 | i = max(fn.rfind('.'), fn.rfind('/')) 1067 | return fn[i + 1:].lower() 1068 | 1069 | 1070 | def isv2name(name): 1071 | return name.lower() in ('$segregs.ida', '$segs.ida', '0.ida', '1.ida', 'ida.idl', 'names.ida') 1072 | 1073 | 1074 | def isv3ext(ext): 1075 | return ext.lower() in ('.id0', '.id1', '.id2', '.nam', '.til') 1076 | 1077 | 1078 | def xlatv2name(name): 1079 | oldnames = { 1080 | '$segregs.ida': 'reg', 1081 | '$segs.ida': 'seg', 1082 | '0.ida': 'id0', 1083 | '1.ida': 'id1', 1084 | 'ida.idl': 'idl', 1085 | 'names.ida': 'nam', 1086 | } 1087 | 1088 | return oldnames.get(name.lower()) 1089 | 1090 | 1091 | def main(): 1092 | parser = argparse.ArgumentParser(description='idbtool - print info from hex-rays IDA .idb and .i64 files', 1093 | formatter_class=argparse.RawDescriptionHelpFormatter, 1094 | epilog=""" 1095 | idbtool can process complete .idb and .i64 files, but also naked .id0, .id1, .nam, .til files. 1096 | All versions since IDA v2.0 are supported. 1097 | 1098 | Queries start with an optional operator: <,<=,>,>=,==. 1099 | Followed by either a name or address or nodeid. 1100 | Addresses are specified as a sequence of hexadecimal charaters. 1101 | Nodeid's may be specified either as the full node id, starting with ff00, 1102 | or starting with a '_'. 1103 | Names are anything which can be found under the name tree in the database. 1104 | 1105 | After the name/addr/node there is optionally a slash, followed by a node tag, 1106 | and another slash, followed by a index or hash string. 1107 | 1108 | Multiple queries can be specified, terminated by another option, or `--`. 1109 | Add `-v` for pretty printed keys and values. 1110 | 1111 | Examples: 1112 | 1113 | idbtool -v --query "$ user1;S;0" -- x.idb 1114 | idbtool -v --limit 4 --query ">#0xa" -- x.idb 1115 | idbtool -v --limit 5 --query ">Root Node;S;0" -- x.idb 1116 | idbtool -v --limit 10 --query ">Root Node;S" -- x.idb 1117 | idbtool -v --query ".0xff000001;N" -- x.idb 1118 | """) 1119 | parser.add_argument('--verbose', '-v', action='count', default=0) 1120 | parser.add_argument('--recurse', '-r', action='store_true', help='recurse into directories') 1121 | parser.add_argument('--skiplinks', '-L', action='store_true', help='skip symbolic links') 1122 | parser.add_argument('--filetype', '-t', type=str, help='specify filetype when loading `naked` id1,nam or seg files') 1123 | parser.add_argument('--i64', '-i64', action='store_true', help='specify that `naked` file is from a 64 bit database') 1124 | parser.add_argument('--i32', '-i32', action='store_true', help='specify that `naked` file is from a 32 bit database') 1125 | 1126 | parser.add_argument('--names', '-n', action='store_true', help='print names') 1127 | parser.add_argument('--scripts', '-s', action='store_true', help='print scripts') 1128 | parser.add_argument('--structs', '-u', action='store_true', help='print structs') 1129 | # parser.add_argument('--comments', '-c', action='store_true', help='print comments') 1130 | parser.add_argument('--enums', '-e', action='store_true', help='print enums and bitfields') 1131 | parser.add_argument('--imports', action='store_true', help='print imports') 1132 | parser.add_argument('--segs', action='store_true', help='print segments') 1133 | parser.add_argument('--funcdirs', action='store_true', help='print function dirs (folders)') 1134 | parser.add_argument('--info', '-i', action='store_true', help='database info') 1135 | parser.add_argument('--inc', action='store_true', help='dump id0 records by cursor increment') 1136 | parser.add_argument('--dec', action='store_true', help='dump id0 records by cursor decrement') 1137 | parser.add_argument('--id0', "-id0", action='store_true', help='dump id0 records, by walking the page tree') 1138 | parser.add_argument('--id1', "-id1", action='store_true', help='dump id1 records') 1139 | parser.add_argument('--dump', type=str, help='hexdump id1 bytes', metavar='FROM-UNTIL') 1140 | parser.add_argument('--dumpraw', type=str, help='output id1 bytes', metavar='FROM-UNTIL') 1141 | parser.add_argument('--pagedump', "-d", action='store_true', help='dump all btree pages, including any that might have become inaccessible due to datacorruption.') 1142 | parser.add_argument('--classify', action='store_true', help='Classify nodes found in the database.') 1143 | 1144 | parser.add_argument('--query', "-q", type=str, nargs='*', help='search the id0 file for a specific record.') 1145 | parser.add_argument('--limit', '-m', type=int, help='Max nr of records to return for a query.') 1146 | 1147 | parser.add_argument('--recover', action='store_true', help='recover idb from unpacked files, of v2 database') 1148 | parser.add_argument('--debug', action='store_true') 1149 | 1150 | parser.add_argument('FILES', type=str, nargs='*', help='Files') 1151 | 1152 | args = parser.parse_args() 1153 | 1154 | if args.FILES: 1155 | dbs = dict() 1156 | 1157 | for fn in EnumeratePaths(args, args.FILES): 1158 | basepath, filename = os.path.split(fn) 1159 | if isv2name(filename): 1160 | d = dbs.setdefault(basepath, dict()) 1161 | d[xlatv2name(filename)] = fn 1162 | print("%s -> %s : %s" % (xlatv2name(filename), basepath, filename)) 1163 | else: 1164 | basepath, ext = os.path.splitext(fn) 1165 | if isv3ext(ext): 1166 | d = dbs.setdefault(basepath, dict()) 1167 | d[ext.lower()] = fn 1168 | 1169 | if not args.dumpraw: 1170 | print("\n==> " + fn + " <==\n") 1171 | 1172 | try: 1173 | filetype = args.filetype or filetype_from_name(fn) 1174 | with open(fn, "rb") as fh: 1175 | processfile(args, filetype, fh) 1176 | except Exception as e: 1177 | print("ERROR: %s" % e) 1178 | if args.debug: 1179 | raise 1180 | 1181 | if args.recover: 1182 | for basepath, dbfiles in dbs.items(): 1183 | if len(dbfiles) > 1: 1184 | try: 1185 | print("\n==> " + basepath + " <==\n") 1186 | recover_database(args, basepath, dbfiles) 1187 | except Exception as e: 1188 | print("ERROR: %s" % e) 1189 | else: 1190 | print("==> STDIN <==") 1191 | processfile(args, args.filetype, sys.stdin.buffer) 1192 | 1193 | 1194 | if __name__ == '__main__': 1195 | main() 1196 | -------------------------------------------------------------------------------- /idblib.py: -------------------------------------------------------------------------------- 1 | """ 2 | idblib - a module for reading hex-rays Interactive DisAssembler databases 3 | 4 | Supports database versions starting with IDA v2.0 5 | 6 | IDA v1.x is not supported, that was an entirely different file format. 7 | IDA v2.x databases are organised as several files, in a directory 8 | IDA v3.x databases are bundled into .idb files 9 | IDA v4 .. v6 various improvements, like databases larger than 4Gig, and 64 bit support. 10 | 11 | Copyright (c) 2016 Willem Hengeveld 12 | 13 | 14 | An IDB file can contain up to 6 sections: 15 | id0 the main database 16 | id1 contains flags for each byte - what is returned by idc.GetFlags(ea) 17 | nam contains a list of addresses of named items 18 | seg .. only in older databases 19 | til type info 20 | id2 ? 21 | 22 | The id0 database is a simple key/value database, much like leveldb 23 | 24 | types of records: 25 | 26 | Some bookkeeping: 27 | 28 | "$ MAX NODE" -> the highest numbered node value in use. 29 | 30 | A list of names: 31 | 32 | "N" + name -> the node id for that name. 33 | 34 | names are both user/disassembler symbols assigned to addresses 35 | in the disassembled code, and IDA internals, like lists of items, 36 | For example: '$ structs', or 'Root Node'. 37 | 38 | The main part: 39 | 40 | "." + nodeid + tag + index 41 | 42 | This maps directly onto the idasdk netnode interface. 43 | The size of the nodeid and index is 32bits for .idb files and 64 bits for .i64 files. 44 | The nodeid and index are encoded as bigendian numbers in the key, and as little endian 45 | numbers in (most of) the values. 46 | 47 | 48 | """ 49 | from __future__ import division, print_function, absolute_import, unicode_literals 50 | import struct 51 | import binascii 52 | import re 53 | import os 54 | 55 | ############################################################################# 56 | # some code to make this library run with both python2 and python3 57 | ############################################################################# 58 | 59 | import sys 60 | if sys.version_info[0] == 3: 61 | long = int 62 | else: 63 | bytes = bytearray 64 | 65 | try: 66 | cmp(1, 2) 67 | except: 68 | # python3 does not have cmp 69 | def cmp(a, b): return (a > b) - (a < b) 70 | 71 | 72 | class cachedproperty(object): 73 | ## .. only works with python3 somehow. -- todo: figure out why not with python2 74 | def __init__(self, method): 75 | self.method = method 76 | self.name = '_' + method.__name__ 77 | def __get__(self, obj, cls): 78 | if not hasattr(obj, self.name): 79 | value = self.method(obj) 80 | setattr(obj, self.name, value) 81 | else: 82 | value = getattr(obj, self.name) 83 | return value 84 | 85 | 86 | def strz(b, o): 87 | return b[o:b.find(b'\x00', o)].decode('utf-8', 'ignore') 88 | 89 | def makeStringIO(data): 90 | if sys.version_info[0] == 2: 91 | from StringIO import StringIO 92 | return StringIO(data) 93 | else: 94 | from io import BytesIO 95 | return BytesIO(data) 96 | 97 | 98 | ############################################################################# 99 | # some utility functions 100 | ############################################################################# 101 | 102 | 103 | def nonefmt(fmt, item): 104 | # helper for outputting None without raising an error 105 | if item is None: 106 | return "-" 107 | return fmt % item 108 | 109 | 110 | def hexdump(data): 111 | if data is None: 112 | return 113 | return binascii.b2a_hex(data).decode('utf-8') 114 | 115 | 116 | ############################################################################# 117 | 118 | 119 | class FileSection(object): 120 | """ 121 | Presents a file like object which is a section of a larger file. 122 | 123 | `fh` is expected to have a seek and read method. 124 | 125 | 126 | This class is used to access a section (e.g. the .id0 file) of a larger file (e.g. the .idb file) 127 | and make read/seek behave as if it were a separate file. 128 | """ 129 | def __init__(self, fh, start, end): 130 | self.fh = fh 131 | self.start = start 132 | self.end = end 133 | 134 | self.curpos = 0 135 | self.fh.seek(self.start) 136 | 137 | def read(self, size=None): 138 | want = self.end - self.start - self.curpos 139 | if size is not None and want > size: 140 | want = size 141 | 142 | if want <= 0: 143 | return b"" 144 | 145 | # make sure filepointer is at correct position since we are sharing the fh object with others. 146 | self.fh.seek(self.curpos + self.start) 147 | data = self.fh.read(want) 148 | self.curpos += len(data) 149 | return data 150 | 151 | def seek(self, offset, *args): 152 | def isvalidpos(offset): 153 | return 0 <= offset <= self.end - self.start 154 | 155 | if len(args) == 0: 156 | whence = 0 157 | else: 158 | whence = args[0] 159 | if whence == 0: 160 | if not isvalidpos(offset): 161 | print("invalid seek: from %x to SET:%x" % (self.curpos, offset)) 162 | raise Exception("illegal offset") 163 | self.curpos = offset 164 | elif whence == 1: 165 | if not isvalidpos(self.curpos + offset): 166 | raise Exception("illegal offset") 167 | self.curpos += offset 168 | elif whence == 2: 169 | if not isvalidpos(self.end - self.start + offset): 170 | raise Exception("illegal offset") 171 | self.curpos = self.end - self.start + offset 172 | self.fh.seek(self.curpos + self.start) 173 | 174 | def tell(self): 175 | return self.curpos 176 | 177 | 178 | class IdaUnpacker: 179 | """ 180 | Decodes packed ida structures. 181 | This is used o.a. in struct definitions, and .id2 files 182 | 183 | Related sdk functions: pack_dd, unpack_dd, etc. 184 | """ 185 | def __init__(self, wordsize, data): 186 | self.wordsize = wordsize 187 | self.data = data 188 | self.o = 0 189 | 190 | def eof(self): 191 | return self.o >= len(self.data) 192 | def have(self, n): 193 | return self.o+n <= len(self.data) 194 | 195 | def nextword(self): 196 | """ 197 | Return an unsigned word-sized integer from the buffer 198 | """ 199 | if self.wordsize == 4: 200 | return self.next32() 201 | elif self.wordsize == 8: 202 | return self.next64() 203 | else: 204 | raise Exception("unsupported wordsize") 205 | 206 | def nextwordsigned(self): 207 | """ 208 | Return a signed word-sized integer from the buffer 209 | """ 210 | if self.wordsize == 4: 211 | val = self.next32() 212 | if val < 0x80000000: 213 | return val 214 | return val - 0x100000000 215 | elif self.wordsize == 8: 216 | val = self.next64() 217 | if val < 0x8000000000000000: 218 | return val 219 | return val - 0x10000000000000000 220 | else: 221 | raise Exception("unsupported wordsize") 222 | 223 | 224 | def next64(self): 225 | if self.eof(): 226 | return None 227 | lo = self.next32() 228 | hi = self.next32() 229 | return (hi<<32) | lo 230 | 231 | def next16(self): 232 | """ 233 | Return a packed 16 bit integer from the buffer 234 | """ 235 | if self.eof(): 236 | return None 237 | byte = self.data[self.o:self.o+1] 238 | if byte == b'\xff': 239 | # a 16 bit value: 240 | # 1111 1111 xxxx xxxx xxxx xxxx 241 | if self.o+3 > len(self.data): 242 | return None 243 | val, = struct.unpack_from(">H", self.data, self.o+1) 244 | self.o += 3 245 | return val 246 | elif byte < b'\x80': 247 | # a 7 bit value: 248 | # 0xxx xxxx 249 | self.o += 1 250 | val, = struct.unpack("B", byte) 251 | return val 252 | elif byte < b'\xc0': 253 | # a 14 bit value: 254 | # 10xx xxxx xxxx xxxx 255 | if self.o+2 > len(self.data): 256 | return None 257 | val, = struct.unpack_from(">H", self.data, self.o) 258 | self.o += 2 259 | return val&0x3FFF 260 | else: 261 | return None 262 | 263 | def next8(self): 264 | if self.eof(): 265 | return None 266 | byte = self.data[self.o:self.o+1] 267 | self.o += 1 268 | val, = struct.unpack("B", byte) 269 | 270 | return val 271 | 272 | def next32(self): 273 | """ 274 | Return a packed integer from the buffer 275 | """ 276 | if self.eof(): 277 | return None 278 | byte = self.data[self.o:self.o+1] 279 | if byte == b'\xff': 280 | # a 32 bit value: 281 | # 1111 1111 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx 282 | if self.o+5 > len(self.data): 283 | return None 284 | val, = struct.unpack_from(">L", self.data, self.o+1) 285 | self.o += 5 286 | return val 287 | elif byte < b'\x80': 288 | # a 7 bit value: 289 | # 0xxx xxxx 290 | self.o += 1 291 | val, = struct.unpack("B", byte) 292 | return val 293 | elif byte < b'\xc0': 294 | # a 14 bit value: 295 | # 10xx xxxx xxxx xxxx 296 | if self.o+2 > len(self.data): 297 | return None 298 | val, = struct.unpack_from(">H", self.data, self.o) 299 | self.o += 2 300 | return val&0x3FFF 301 | elif byte < b'\xe0': 302 | # a 29 bit value: 303 | # 110x xxxx xxxx xxxx xxxx xxxx xxxx xxxx 304 | if self.o+4 > len(self.data): 305 | return None 306 | val, = struct.unpack_from(">L", self.data, self.o) 307 | self.o += 4 308 | return val&0x1FFFFFFF 309 | else: 310 | return None 311 | 312 | def bytes(self, n): 313 | """ 314 | Return fixed length string from buffer 315 | """ 316 | if not self.have(n): 317 | return None 318 | data = self.data[self.o : self.o+n] 319 | self.o += n 320 | return data 321 | 322 | 323 | class IDBFile(object): 324 | """ 325 | Provide access to the various sections in an .idb file. 326 | 327 | Usage: 328 | 329 | idb = IDBFile(fhandle) 330 | id0 = idb.getsection(ID0File) 331 | 332 | ID0File is expected to have a class property 'INDEX' 333 | 334 | # v1..v5 id1 and nam files start with 'Va0' .. 'Va4' 335 | # v6 id1 and nam files start with 'VA*' 336 | # til files start with 'IDATIL' 337 | # id2 files start with 'IDAS\x1d\xa5\x55\x55' 338 | 339 | """ 340 | def __init__(self, fh): 341 | """ constructor takes a filehandle """ 342 | self.fh = fh 343 | self.fh.seek(0) 344 | hdrdata = self.fh.read(0x100) 345 | 346 | self.magic = hdrdata[0:4].decode('utf-8', 'ignore') 347 | if self.magic not in ('IDA0', 'IDA1', 'IDA2'): 348 | raise Exception("invalid file magic") 349 | 350 | values = struct.unpack_from("<6LH6L", hdrdata, 6) 351 | if values[5] != 0xaabbccdd: 352 | fileversion = 0 353 | offsets = list(values[0:5]) 354 | offsets.append(0) 355 | checksums = [0 for _ in range(6)] 356 | else: 357 | fileversion = values[6] 358 | 359 | if fileversion < 5: 360 | offsets = list(values[0:5]) 361 | checksums = list(values[8:13]) 362 | idsofs, idscheck = struct.unpack_from("> 1 517 | if k < a[mid].key: 518 | last = mid 519 | else: 520 | first = mid + 1 521 | return first - 1 522 | 523 | 524 | """ 525 | ################################################################################ 526 | 527 | I would have liked to make these classes a nested class of BTree, but 528 | the problem is than there is no way for a nested-nested class 529 | of BTree to refer back to a toplevel nested class of BTree. 530 | So moving these outside of BTree so i can use them as baseclasses 531 | in the various page implementations 532 | 533 | class BTree: 534 | class BaseEntry(object): pass 535 | class BasePage(object): pass 536 | class Page15(BasePage): 537 | class Entry(BTree.BaseEntry): 538 | pass 539 | 540 | >>> NameError: name 'BTree' is not defined 541 | 542 | """ 543 | 544 | 545 | class BaseIndexEntry(object): 546 | """ 547 | Baseclass for Index Entries. 548 | 549 | Index entries have a key + value, and a page containing keys larger than that key 550 | in this index entry. 551 | 552 | """ 553 | def __init__(self, data): 554 | ofs = self.recofs 555 | if self.recofs < 6: 556 | # reading an invalid page... 557 | self.val = self.key = None 558 | return 559 | 560 | keylen, = struct.unpack_from(", <, ==, >=, <= ) 593 | """ 594 | class BasePage(object): 595 | """ 596 | Baseclass for Pages. for the various btree versions ( 1.5, 1.6 and 2.0 ) 597 | there are subclasses which specify the exact layout of the page header, 598 | and index / leaf entries. 599 | 600 | Leaf pages don't have a 'preceeding' page pointer. 601 | 602 | """ 603 | def __init__(self, data, entsize, entfmt): 604 | self.preceeding, self.count = struct.unpack_from(entfmt, data) 605 | if self.preceeding: 606 | entrytype = self.IndexEntry 607 | else: 608 | entrytype = self.LeafEntry 609 | 610 | self.index = [] 611 | key = b"" 612 | for i in range(self.count): 613 | ent = entrytype(key, data, entsize * (1 + i)) 614 | self.index.append(ent) 615 | key = ent.key 616 | self.unknown, self.freeptr = struct.unpack_from(entfmt, data, entsize * (1 + self.count)) 617 | 618 | def find(self, key): 619 | """ 620 | Searches pages for key, returns relation to key: 621 | 622 | recurse -> found a next level index page to search for key. 623 | also returns the next level page nr 624 | gt -> found a value with a key greater than the one searched for. 625 | lt -> found a value with a key less than the one searched for. 626 | eq -> found a value with a key equal to the one searched for. 627 | gt, lt and eq return the index for the key found. 628 | 629 | # for an index entry: the key is 'less' than anything in the page pointed to. 630 | """ 631 | i = binary_search(self.index, key) 632 | if i < 0: 633 | if self.isindex(): 634 | return ('recurse', -1) 635 | return ('gt', 0) 636 | if self.index[i].key == key: 637 | return ('eq', i) 638 | if self.isindex(): 639 | return ('recurse', i) 640 | return ('lt', i) 641 | 642 | def getpage(self, ix): 643 | """ For Indexpages, returns the page ptr for the specified entry """ 644 | return self.preceeding if ix < 0 else self.index[ix].page 645 | 646 | def getkey(self, ix): 647 | """ For all page types, returns the key for the specified entry """ 648 | return self.index[ix].key 649 | 650 | def getval(self, ix): 651 | """ For all page types, returns the value for the specified entry """ 652 | return self.index[ix].val 653 | 654 | def isleaf(self): 655 | """ True when this is a Leaf Page """ 656 | return self.preceeding == 0 657 | 658 | def isindex(self): 659 | """ True when this is an Index Page """ 660 | return self.preceeding != 0 661 | 662 | def __repr__(self): 663 | return ("leaf" if self.isleaf() else ("index<%d>" % self.preceeding)) + repr(self.index) 664 | 665 | ###################################################### 666 | # Page objects for the various versions of the database 667 | ###################################################### 668 | class Page15(BasePage): 669 | """ v1.5 b-tree page """ 670 | class IndexEntry(BaseIndexEntry): 671 | def __init__(self, key, data, ofs): 672 | self.page, self.recofs = struct.unpack_from("= 0: 764 | self.stack.append((page, ix)) 765 | else: 766 | # move towards leaf 767 | self.stack.append((page, ix)) 768 | while page.isindex(): 769 | page = self.db.readpage(page.getpage(ix)) 770 | ix = len(page.index) - 1 771 | self.stack.append((page, ix)) 772 | 773 | def eof(self): 774 | return len(self.stack) == 0 775 | 776 | def getkey(self): 777 | """ return the key value pointed to by the cursor """ 778 | page, ix = self.stack[-1] 779 | return page.getkey(ix) 780 | 781 | def getval(self): 782 | """ return the data value pointed to by the cursor """ 783 | page, ix = self.stack[-1] 784 | return page.getval(ix) 785 | 786 | def __repr__(self): 787 | return "cursor:" + repr(self.stack) 788 | 789 | def __init__(self, fh): 790 | """ BTree constructor - takes a filehandle """ 791 | self.fh = fh 792 | 793 | self.fh.seek(0) 794 | data = self.fh.read(64) 795 | 796 | if data[13:].startswith(b"B-tree v 1.5 (C) Pol 1990"): 797 | self.parseheader15(data) 798 | self.page = self.Page15 799 | self.version = 15 800 | elif data[19:].startswith(b"B-tree v 1.6 (C) Pol 1990"): 801 | self.parseheader16(data) 802 | self.page = self.Page16 803 | self.version = 16 804 | elif data[19:].startswith(b"B-tree v2"): 805 | self.parseheader16(data) 806 | self.page = self.Page20 807 | self.version = 20 808 | else: 809 | print("unknown btree: %s" % hexdump(data)) 810 | raise Exception("unknown b-tree") 811 | 812 | def parseheader15(self, data): 813 | self.firstfree, self.pagesize, self.firstindex, self.reccount, self.pagecount = struct.unpack_from(" record equal to the key, None when not found 832 | 'le' -> last record with key <= to key 833 | 'ge' -> first record with key >= to key 834 | 'lt' -> last record with key < to key 835 | 'gt' -> first record with key > to key 836 | """ 837 | 838 | # descend tree to leaf nearest to the `key` 839 | page = self.readpage(self.firstindex) 840 | stack = [] 841 | while len(stack) < 256: 842 | act, ix = page.find(key) 843 | stack.append((page, ix)) 844 | if act != 'recurse': 845 | break 846 | page = self.readpage(page.getpage(ix)) 847 | 848 | if len(stack) == 256: 849 | raise Exception("b-tree corrupted") 850 | cursor = BTree.Cursor(self, stack) 851 | 852 | # now correct for what was actually asked. 853 | if act == rel: 854 | pass 855 | elif rel == 'eq' and act != 'eq': 856 | return None 857 | elif rel in ('ge', 'le') and act == 'eq': 858 | pass 859 | elif rel in ('gt', 'ge') and act == 'lt': 860 | cursor.next() 861 | elif rel == 'gt' and act == 'eq': 862 | cursor.next() 863 | elif rel in ('lt', 'le') and act == 'gt': 864 | cursor.prev() 865 | elif rel == 'lt' and act == 'eq': 866 | cursor.prev() 867 | 868 | return cursor 869 | 870 | def dump(self): 871 | """ raw dump of all records in the b-tree """ 872 | print("pagesize=%08x, reccount=%08x, pagecount=%08x" % (self.pagesize, self.reccount, self.pagecount)) 873 | self.dumpfree() 874 | self.dumptree(self.firstindex) 875 | 876 | def dumpfree(self): 877 | """ list all free pages """ 878 | fmt = "L" if self.version > 15 else "H" 879 | hdrsize = 8 if self.version > 15 else 4 880 | pn = self.firstfree 881 | if pn == 0: 882 | print("no free pages") 883 | return 884 | while pn: 885 | self.fh.seek(pn * self.pagesize) 886 | data = self.fh.read(self.pagesize) 887 | if len(data) == 0: 888 | print("could not read FREE data at page %06x" % pn) 889 | break 890 | count, nextfree = struct.unpack_from("<" + (fmt * 2), data) 891 | freepages = list(struct.unpack_from("<" + (fmt * count), data, hdrsize)) 892 | freepages.insert(0, pn) 893 | for pn in freepages: 894 | self.fh.seek(pn * self.pagesize) 895 | data = self.fh.read(self.pagesize) 896 | print("%06x: free: %s" % (pn, hexdump(data[:64]))) 897 | pn = nextfree 898 | 899 | def dumpindented(self, pn, indent=0): 900 | """ 901 | Dump all nodes of the current page with keys indented, showing how the `indent` 902 | feature works 903 | """ 904 | page = self.readpage(pn) 905 | print(" " * indent, page) 906 | if page.isindex(): 907 | print(" " * indent, end="") 908 | self.dumpindented(page.preceeding, indent + 1) 909 | for p in range(len(page.index)): 910 | print(" " * indent, end="") 911 | self.dumpindented(page.getpage(p), indent + 1) 912 | 913 | def dumptree(self, pn): 914 | """ 915 | Walks entire tree, dumping all records on each page 916 | in sequential order 917 | """ 918 | page = self.readpage(pn) 919 | print("%06x: preceeding = %06x, reccount = %04x" % (pn, page.preceeding, page.count)) 920 | for ent in page.index: 921 | print(" %s" % ent) 922 | if page.preceeding: 923 | self.dumptree(page.preceeding) 924 | for ent in page.index: 925 | self.dumptree(ent.page) 926 | 927 | def pagedump(self): 928 | """ 929 | dump the contents of all pages, ignoring links between pages, 930 | this will enable you to view contents of pages which have become 931 | lost due to datacorruption. 932 | """ 933 | self.fh.seek(self.pagesize) 934 | pn = 1 935 | while True: 936 | try: 937 | pagedata = self.fh.read(self.pagesize) 938 | if len(pagedata) == 0: 939 | break 940 | elif len(pagedata) != self.pagesize: 941 | print("%06x: incomplete - %d bytes ( pagesize = %d )" % (pn, len(pagedata), self.pagesize)) 942 | break 943 | elif pagedata == b'\x00' * self.pagesize: 944 | print("%06x: empty" % (pn)) 945 | else: 946 | page = self.page(pagedata) 947 | 948 | print("%06x: preceeding = %06x, reccount = %04x" % (pn, page.preceeding, page.count)) 949 | for ent in page.index: 950 | print(" %s" % ent) 951 | except Exception as e: 952 | print("%06x: ERROR decoding as B-tree page: %s" % (pn, e)) 953 | pn += 1 954 | 955 | 956 | class ID0File(object): 957 | """ 958 | Reads .id0 or 0.ida files, containing a v1.5, v1.6 or v2.0 b-tree database. 959 | 960 | This is basically the low level netnode interface from the idasdk. 961 | 962 | There are two major groups of nodes in the database: 963 | 964 | key = "N"+name -> value = littleendian(nodeid) 965 | key = "."+bigendian(nodeid)+char(tag)+bigendian(value) 966 | key = "."+bigendian(nodeid)+char(tag)+string 967 | 968 | key = "."+bigendian(nodeid)+char(tag) 969 | 970 | and some special nodes for bookkeeping: 971 | "$ MAX LINK" 972 | "$ MAX NODE" 973 | "$ NET DESC" 974 | 975 | Very old databases also have name entries with a lowercase 'n', 976 | and corresponding '-'+value nodes. 977 | I am not sure what those are for. 978 | 979 | several items have specially named nodes, like "$ structs", "$ enums", "Root Node" 980 | 981 | nodeByName(name) returns the nodeid for a name 982 | bytes(nodeid, tag, val) returns the value for a specific node. 983 | 984 | """ 985 | INDEX = 0 986 | 987 | def __init__(self, idb, fh): 988 | self.btree = BTree(fh) 989 | 990 | self.wordsize = None 991 | self.maxnode = None 992 | 993 | if idb.magic == 'IDA2': 994 | # .i64 files use 64 bit values for some things. 995 | self.wordsize = 8 996 | elif idb.magic in ('IDA0', 'IDA1'): 997 | self.wordsize = 4 998 | else: 999 | # determine wordsize from value of '$ MAX NODE' 1000 | c = self.btree.find('eq', b'$ MAX NODE') 1001 | if c and not c.eof(): 1002 | self.maxnode = c.getval() 1003 | self.wordsize = len(c.getval()) 1004 | 1005 | if self.wordsize not in (4, 8): 1006 | print("Can not determine wordsize for database - assuming 32 bit") 1007 | self.wordsize = 4 1008 | 1009 | if self.wordsize == 4: 1010 | self.nodebase = 0xFF000000 1011 | if not self.maxnode: 1012 | self.maxnode = self.nodebase + 0x0FFFFF 1013 | self.fmt = "L" 1014 | else: 1015 | self.nodebase = 0xFF00000000000000 1016 | if not self.maxnode: 1017 | self.maxnode = self.nodebase + 0x0FFFFFFF 1018 | 1019 | self.fmt = "Q" 1020 | 1021 | # set the keyformat for this database 1022 | self.keyfmt = ">s" + self.fmt + "s" + self.fmt 1023 | 1024 | @cachedproperty 1025 | def root(self): return self.nodeByName("Root Node") 1026 | 1027 | # note: versions before 4.7 used a short instead of a long 1028 | # and stored the versions with one minor digit ( 43 ) , instead of two ( 480 ) 1029 | @cachedproperty 1030 | def idaver(self): return self.int(self.root, 'A', -1) 1031 | 1032 | @cachedproperty 1033 | def idbparams(self): return self.bytes(self.root, 'S', 0x41b994) 1034 | @cachedproperty 1035 | def idaverstr(self): return self.string(self.root, 'S', 1303) 1036 | @cachedproperty 1037 | def nropens(self): return self.int(self.root, 'A', -4) 1038 | @cachedproperty 1039 | def creationtime(self): return self.int(self.root, 'A', -2) 1040 | @cachedproperty 1041 | def originmd5(self): return self.bytes(self.root, 'S', 1302) 1042 | @cachedproperty 1043 | def somecrc(self): return self.int(self.root, 'A', -5) 1044 | 1045 | def prettykey(self, key): 1046 | """ 1047 | returns the key in a readable format. 1048 | """ 1049 | f = list(self.decodekey(key)) 1050 | f[0] = f[0].decode('utf-8') 1051 | if len(f) > 2 and type(f[2]) == bytes: 1052 | f[2] = f[2].decode('utf-8') 1053 | 1054 | if f[0] == '.': 1055 | if len(f) == 2: 1056 | return "%s%16x" % tuple(f) 1057 | elif len(f) == 3: 1058 | return "%s%16x %s" % tuple(f) 1059 | elif len(f) == 4: 1060 | if f[2] == 'H' and type(f[3]) in (str, bytes): 1061 | f[3] = f[3].decode('utf-8') 1062 | return "%s%16x %s '%s'" % tuple(f) 1063 | elif type(f[3]) in (int, long): 1064 | return "%s%16x %s %x" % tuple(f) 1065 | else: 1066 | f[3] = hexdump(f[3]) 1067 | return "%s%16x %s %s" % tuple(f) 1068 | elif f[0] in ('N', 'n', '$'): 1069 | if type(f[1]) in (int, long): 1070 | return "%s %x %16x" % tuple(f) 1071 | else: 1072 | return "%s'%s'" % tuple(f) 1073 | elif f[0] == '-': 1074 | return "%s %x" % tuple(f) 1075 | 1076 | return hexdump(key) 1077 | 1078 | def prettyval(self, val): 1079 | """ 1080 | returns the value in a readable format. 1081 | """ 1082 | if len(val) == self.wordsize and val[-1:] in (b'\x00', b'\xff'): 1083 | return "%x" % struct.unpack("<" + self.fmt, val) 1084 | if len(val) == self.wordsize and re.search(b'[\x00-\x08\x0b\x0c\x0e-\x1f]', val, re.DOTALL): 1085 | return "%x" % struct.unpack("<" + self.fmt, val) 1086 | if len(val) < 2 or not re.match(b'^[\x09\x0a\x0d\x20-\xff]+.$', val, re.DOTALL): 1087 | return hexdump(val) 1088 | val = val.replace(b"\n", b"\\n") 1089 | return "'%s'" % val.decode('utf-8', 'ignore') 1090 | 1091 | def nodeByName(self, name): 1092 | """ Return a nodeid by name """ 1093 | # note: really long names are encoded differently: 1094 | # 'N'+'\x00'+pack('Q', nameid) => ofs 1095 | # and (ofs, 'N') -> nameid 1096 | 1097 | # at nodebase ( 0xFF000000, 'S', 0x100*nameid ) there is a series of blobs for max 0x80000 sized names. 1098 | cur = self.btree.find('eq', self.namekey(name)) 1099 | if cur: 1100 | return struct.unpack('<' + self.fmt, cur.getval())[0] 1101 | 1102 | def namekey(self, name): 1103 | if type(name) in (int, long): 1104 | return struct.pack(" 1: 1117 | # utf-8 encode the tag 1118 | args = args[:1] + (args[1].encode('utf-8'),) + args[2:] 1119 | 1120 | if len(args) == 3 and type(args[-1]) == str: 1121 | # node.tag.string type keys 1122 | return struct.pack(self.keyfmt[:1 + len(args)], b'.', *args[:-1]) + args[-1].encode('utf-8') 1123 | elif len(args) == 3 and type(args[-1]) == type(-1) and args[-1] < 0: 1124 | # negative values -> need lowercase fmt char 1125 | return struct.pack(self.keyfmt[:1 + len(args)] + self.fmt.lower(), b'.', *args) 1126 | else: 1127 | # node.tag.value type keys 1128 | return struct.pack(self.keyfmt[:2 + len(args)], b'.', *args) 1129 | 1130 | def decodekey(self, key): 1131 | """ 1132 | splits a key in a tuple, one of: 1133 | ( [ 'N', 'n', '$' ], 0, bignameid ) 1134 | ( [ 'N', 'n', '$' ], name ) 1135 | ( '-', id ) 1136 | ( '.', id ) 1137 | ( '.', id, tag ) 1138 | ( '.', id, tag, value ) 1139 | ( '.', id, 'H', name ) 1140 | """ 1141 | if key[:1] in (b'n', b'N', b'$'): 1142 | if key[1:2] == b"\x00" and len(key) == 2 + self.wordsize: 1143 | return struct.unpack(">sB" + self.fmt, key) 1144 | else: 1145 | return key[:1], key[1:].decode('utf-8', 'ignore') 1146 | if key[:1] == b'-': 1147 | return struct.unpack(">s" + self.fmt, key) 1148 | if len(key) == 1 + self.wordsize: 1149 | return struct.unpack(self.keyfmt[:3], key) 1150 | if len(key) == 1 + self.wordsize + 1: 1151 | return struct.unpack(self.keyfmt[:4], key) 1152 | if len(key) == 1 + 2 * self.wordsize + 1: 1153 | return struct.unpack(self.keyfmt[:5], key) 1154 | if len(key) > 1 + self.wordsize + 1: 1155 | f = struct.unpack_from(self.keyfmt[:4], key) 1156 | return f + (key[2 + self.wordsize:], ) 1157 | raise Exception("unknown key format") 1158 | 1159 | def bytes(self, *args): 1160 | """ return a raw value for the given arguments """ 1161 | if len(args) == 1 and isinstance(args[0], BTree.Cursor): 1162 | cur = args[0] 1163 | else: 1164 | cur = self.btree.find('eq', self.makekey(*args)) 1165 | 1166 | if cur: 1167 | return cur.getval() 1168 | 1169 | def int(self, *args): 1170 | """ 1171 | Return the integer stored in the specified node. 1172 | 1173 | Any type of integer will be decoded: byte, short, long, long long 1174 | 1175 | """ 1176 | data = self.bytes(*args) 1177 | if data is not None: 1178 | if len(data) == 1: 1179 | return struct.unpack("" + self.fmt, data, 1) 1204 | nameblob = self.blob(self.nodebase, 'S', nameid * 256, nameid * 256 + 32) 1205 | return nameblob.rstrip(b"\x00").decode('utf-8') 1206 | return data.rstrip(b"\x00").decode('utf-8') 1207 | 1208 | def blob(self, nodeid, tag, start=0, end=0xFFFFFFFF): 1209 | """ 1210 | Blobs are stored in sequential nodes 1211 | with increasing index values. 1212 | 1213 | most blobs, like scripts start at index 1214 | 0, long names start at a specified 1215 | offset. 1216 | 1217 | """ 1218 | startkey = self.makekey(nodeid, tag, start) 1219 | endkey = self.makekey(nodeid, tag, end) 1220 | cur = self.btree.find('ge', startkey) 1221 | data = b'' 1222 | while cur.getkey() <= endkey: 1223 | data += cur.getval() 1224 | cur.next() 1225 | return data 1226 | 1227 | 1228 | class ID1File(object): 1229 | """ 1230 | Reads .id1 or 1.IDA files, containing byte flags 1231 | 1232 | This is basically the information for the .idc GetFlags(ea), 1233 | FirstSeg(), NextSeg(ea), SegStart(ea), SegEnd(ea) functions 1234 | """ 1235 | INDEX = 1 1236 | 1237 | class SegInfo: 1238 | def __init__(self, startea, endea, offset): 1239 | self.startea = startea 1240 | self.endea = endea 1241 | self.offset = offset 1242 | 1243 | def __init__(self, idb, fh): 1244 | if idb.magic == 'IDA2': 1245 | wordsize, fmt = 8, "Q" 1246 | else: 1247 | wordsize, fmt = 4, "L" 1248 | # todo: verify wordsize using the following heuristic: 1249 | # L -> starting at: seglistofs + nsegs*seginfosize are all zero 1250 | # L -> starting at seglistofs .. nsegs*seginfosize every even word must be unique 1251 | 1252 | self.fh = fh 1253 | fh.seek(0) 1254 | hdrdata = fh.read(32) 1255 | magic = hdrdata[:4] 1256 | if magic in (b'Va4\x00', b'Va3\x00', b'Va2\x00', b'Va1\x00', b'Va0\x00'): 1257 | nsegments, npages = struct.unpack_from(" starting at: seglistofs + nsegs*seginfosize are all zero 1293 | # L -> starting at seglistofs .. nsegs*seginfosize every even word must be unique 1294 | 1295 | def dump(self): 1296 | """ print first and last bits for each segment """ 1297 | for seg in self.seglist: 1298 | print("==== %08x-%08x" % (seg.startea, seg.endea)) 1299 | if seg.endea - seg.startea < 30: 1300 | for ea in range(seg.startea, seg.endea): 1301 | print(" %08x: %08x" % (ea, self.getFlags(ea))) 1302 | else: 1303 | for ea in range(seg.startea, seg.startea + 10): 1304 | print(" %08x: %08x" % (ea, self.getFlags(ea))) 1305 | print("...") 1306 | for ea in range(seg.endea - 10, seg.endea): 1307 | print(" %08x: %08x" % (ea, self.getFlags(ea))) 1308 | 1309 | def find_segment(self, ea): 1310 | """ do a linear search for the given address in the segment list """ 1311 | for seg in self.seglist: 1312 | if seg.startea <= ea < seg.endea: 1313 | return seg 1314 | 1315 | def getFlags(self, ea): 1316 | seg = self.find_segment(ea) 1317 | if not seg: 1318 | return 0 1319 | self.fh.seek(seg.offset + 4 * (ea - seg.startea)) 1320 | return struct.unpack(">= 1 1377 | self.wordsize = wordsize 1378 | self.wordfmt = fmt 1379 | self.nnames = nnames 1380 | self.pagesize = pagesize 1381 | 1382 | def dump(self): 1383 | print("nam: nnames=%d, npages=%d, pagesize=%08x" % (self.nnames, self.npages, self.pagesize)) 1384 | 1385 | def allnames(self): 1386 | self.fh.seek(self.pagesize) 1387 | n = 0 1388 | while n < self.nnames: 1389 | data = self.fh.read(self.pagesize) 1390 | want = min(self.nnames - n, int(self.pagesize / self.wordsize)) 1391 | ofslist = struct.unpack_from("<%d%s" % (want, self.wordfmt), data, 0) 1392 | for ea in ofslist: 1393 | yield ea 1394 | n += want 1395 | 1396 | 1397 | class SEGFile(object): 1398 | """ reads .seg or $SEGS.IDA files. """ 1399 | INDEX = 3 1400 | 1401 | def __init__(self, idb, fh): 1402 | pass 1403 | 1404 | 1405 | class TILFile(object): 1406 | """ reads .til files """ 1407 | INDEX = 4 1408 | 1409 | def __init__(self, idb, fh): 1410 | pass 1411 | # note: v3 databases had a .reg instead of .til 1412 | 1413 | 1414 | class ID2File(object): 1415 | """ 1416 | Reads .id2 files 1417 | 1418 | ID2 sections contain packed data, resulting in tripples 1419 | of unknown use. 1420 | """ 1421 | INDEX = 5 1422 | 1423 | def __init__(self, idb, fh): 1424 | pass 1425 | 1426 | 1427 | class Struct: 1428 | """ 1429 | Decodes info for structures 1430 | 1431 | (structnode, N) = structname 1432 | (structnode, D, address) = xref-type 1433 | (structnode, M, 0) = packed struct info 1434 | (structnode, S, 27) = packed value(addr, byte) 1435 | """ 1436 | class Member: 1437 | """ 1438 | (membernode, N) = struct.member-name 1439 | (membernode, A, 3) = structid+1 1440 | (membernode, A, 8) = 1441 | (membernode, A, 11) = enumid+1 1442 | (membernode, A, 16) = flag? -- 4:variable length flag? 1443 | (membernode, S, 0x3000) = type (set with 'Y') 1444 | (membernode, S, 0x3001) = names used in 'type' 1445 | (membernode, S, 5) = array type? 1446 | (membernode, S, 9) = offset-type 1447 | (membernode, D, address) = xref-type 1448 | (membernode, d, structid) = xref-type -- for sub-structs 1449 | """ 1450 | def __init__(self, id0, spec): 1451 | self._id0 = id0 1452 | self._nodeid = spec.nextword() + self._id0.nodebase 1453 | self.skip = spec.nextword() 1454 | self.size = spec.nextword() 1455 | self.flags = spec.next32() 1456 | self.props = spec.next32() 1457 | self.ofs = None 1458 | @cachedproperty 1459 | def name(self): return self._id0.name(self._nodeid) 1460 | @cachedproperty 1461 | def enumid(self): return self._id0.int(self._nodeid, 'A', 11) 1462 | @cachedproperty 1463 | def stringtype(self): return self._id0.int(self._nodeid, 'A', 16) 1464 | @cachedproperty 1465 | def structid(self): return self._id0.int(self._nodeid, 'A', 3) 1466 | @cachedproperty 1467 | def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0) 1468 | @cachedproperty 1469 | def ptrinfo(self): return self._id0.bytes(self._nodeid, 'S', 9) 1470 | @cachedproperty 1471 | def typeinfo(self): return self._id0.bytes(self._nodeid, 'S', 0x3000) 1472 | 1473 | def __init__(self, id0, nodeid): 1474 | self._id0 = id0 1475 | self._nodeid = nodeid 1476 | 1477 | spec = self._id0.blob(self._nodeid, 'M') 1478 | p = IdaUnpacker(self._id0.wordsize, spec) 1479 | if self._id0.idaver >= 40: 1480 | # 1 = SF_VAR, 2 = SF_UNION, 4 = SF_HASHUNI, 8 = SF_NOLIST, 0x10 = SF_TYPLIB, 0x20 = SF_HIDDEN, 0x40 = SF_FRAME, 0xF80 = SF_ALIGN, 0x1000 = SF_GHOST 1481 | self.flags = p.next32() 1482 | else: 1483 | self.flags = 0 1484 | 1485 | nmembers = p.next32() 1486 | 1487 | self.members = [] 1488 | o = 0 1489 | for i in range(nmembers): 1490 | m = Struct.Member(self._id0, p) 1491 | m.ofs = o 1492 | o += m.size 1493 | 1494 | self.members.append(m) 1495 | 1496 | self.extra = [] 1497 | while not p.eof(): 1498 | self.extra.append(p.next32()) 1499 | 1500 | @cachedproperty 1501 | def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0) 1502 | @cachedproperty 1503 | def name(self): return self._id0.name(self._nodeid) 1504 | 1505 | def __iter__(self): 1506 | for m in self.members: 1507 | yield m 1508 | 1509 | 1510 | class Enum: 1511 | """ 1512 | (enumnode, N) = enum-name 1513 | (enumnode, A, -1) = nr of values 1514 | (enumnode, A, -3) = representation 1515 | (enumnode, A, -5) = flags: bitfield, hidden, ... 1516 | (enumnode, A, -8) = 1517 | (enumnode, E, value) = valuenode + 1 1518 | 1519 | """ 1520 | class Member: 1521 | """ 1522 | (membernode, N) = membername 1523 | (membernode, A, -2) = enumnode + 1 1524 | (membernode, A, -3) = member value 1525 | """ 1526 | def __init__(self, id0, nodeid): 1527 | self._id0 = id0 1528 | self._nodeid = nodeid 1529 | 1530 | @cachedproperty 1531 | def value(self): return self._id0.int(self._nodeid, 'A', -3) 1532 | @cachedproperty 1533 | def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0) 1534 | @cachedproperty 1535 | def name(self): return self._id0.name(self._nodeid) 1536 | 1537 | def __init__(self, id0, nodeid): 1538 | self._id0 = id0 1539 | self._nodeid = nodeid 1540 | 1541 | @cachedproperty 1542 | def count(self): return self._id0.int(self._nodeid, 'A', -1) 1543 | @cachedproperty 1544 | def representation(self): return self._id0.int(self._nodeid, 'A', -3) 1545 | 1546 | # flags>>3 -> width 1547 | # flags&1 -> bitfield 1548 | @cachedproperty 1549 | def flags(self): return self._id0.int(self._nodeid, 'A', -5) 1550 | 1551 | @cachedproperty 1552 | def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0) 1553 | @cachedproperty 1554 | def name(self): return self._id0.name(self._nodeid) 1555 | 1556 | def __iter__(self): 1557 | startkey = self._id0.makekey(self._nodeid, 'E') 1558 | endkey = self._id0.makekey(self._nodeid, 'F') 1559 | cur = self._id0.btree.find('ge', startkey) 1560 | while cur.getkey() < endkey: 1561 | yield Enum.Member(self._id0, self._id0.int(cur) - 1) 1562 | cur.next() 1563 | 1564 | 1565 | class Bitfield: 1566 | class Member: 1567 | def __init__(self, id0, nodeid): 1568 | self._id0 = id0 1569 | self._nodeid = nodeid 1570 | 1571 | @cachedproperty 1572 | def value(self): return self._id0.int(self._nodeid, 'A', -3) 1573 | @cachedproperty 1574 | def mask(self): return self._id0.int(self._nodeid, 'A', -6) - 1 1575 | @cachedproperty 1576 | def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0) 1577 | @cachedproperty 1578 | def name(self): return self._id0.name(self._nodeid) 1579 | 1580 | class Mask: 1581 | def __init__(self, id0, nodeid, mask): 1582 | self._id0 = id0 1583 | self._nodeid = nodeid 1584 | self.mask = mask 1585 | 1586 | @cachedproperty 1587 | def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0) 1588 | @cachedproperty 1589 | def name(self): return self._id0.name(self._nodeid) 1590 | 1591 | def __iter__(self): 1592 | """ 1593 | Enumerates all Masks 1594 | """ 1595 | startkey = self._id0.makekey(self._nodeid, 'E') 1596 | endkey = self._id0.makekey(self._nodeid, 'F') 1597 | cur = self._id0.btree.find('ge', startkey) 1598 | while cur.getkey() < endkey: 1599 | yield Bitfield.Member(self._id0, self._id0.int(cur) - 1) 1600 | cur.next() 1601 | 1602 | 1603 | def __init__(self, id0, nodeid): 1604 | self._id0 = id0 1605 | self._nodeid = nodeid 1606 | 1607 | @cachedproperty 1608 | def count(self): return self._id0.int(self._nodeid, 'A', -1) 1609 | @cachedproperty 1610 | def representation(self): return self._id0.int(self._nodeid, 'A', -3) 1611 | @cachedproperty 1612 | def flags(self): return self._id0.int(self._nodeid, 'A', -5) 1613 | 1614 | @cachedproperty 1615 | def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0) 1616 | @cachedproperty 1617 | def name(self): return self._id0.name(self._nodeid) 1618 | 1619 | def __iter__(self): 1620 | """ 1621 | Enumerates all Masks 1622 | """ 1623 | startkey = self._id0.makekey(self._nodeid, 'm') 1624 | endkey = self._id0.makekey(self._nodeid, 'n') 1625 | cur = self._id0.btree.find('ge', startkey) 1626 | while cur.getkey() < endkey: 1627 | key = self._id0.decodekey(cur.getkey()) 1628 | yield Bitfield.Mask(self._id0, self._id0.int(cur) - 1, key[-1]) 1629 | cur.next() 1630 | 1631 | class IDBParams: 1632 | def __init__(self, id0, data): 1633 | self._id0 = id0 1634 | magic, self.version, = struct.unpack_from("<3sH", data, 0) 1635 | if self.version<700: 1636 | cpu, self.idpflags, self.demnames, self.filetype, self.coresize, self.corestart, self.ostype, self.apptype = struct.unpack_from("<8sBBH" + (id0.fmt * 2) + "HH", data, 5) 1637 | self.cpu = strz(cpu, 0) 1638 | else: 1639 | p = IdaUnpacker(id0.wordsize, data[5:]) 1640 | cpulen = p.next32() 1641 | self.cpu = p.bytes(cpulen) 1642 | genflags = p.next32() 1643 | self.idpflags = p.next32() 1644 | self.demnames = 0 1645 | changecount = p.next32() 1646 | self.filetype = p.next32() 1647 | self.ostype = p.next32() 1648 | self.apptype = p.next32() 1649 | asmtype = p.next32() 1650 | specsegs = p.next32() 1651 | specsegs = p.next32() 1652 | aflags = p.next32() 1653 | aflags2 = p.next32() 1654 | base = p.nextword() 1655 | startss = p.nextword() 1656 | startcs = p.nextword() 1657 | startip = p.nextword() 1658 | startea = p.nextword() 1659 | startsp = p.nextword() 1660 | main = p.nextword() 1661 | minea = p.nextword() 1662 | maxea = p.nextword() 1663 | 1664 | self.coresize = 0 1665 | self.corestart = 0 1666 | 1667 | class Script: 1668 | def __init__(self, id0, nodeid): 1669 | self._id0 = id0 1670 | self._nodeid = nodeid 1671 | 1672 | @cachedproperty 1673 | def name(self): return self._id0.string(self._nodeid, 'S', 0) 1674 | @cachedproperty 1675 | def language(self): return self._id0.string(self._nodeid, 'S', 1) 1676 | @cachedproperty 1677 | def body(self): return strz(self._id0.blob(self._nodeid, 'X'), 0) 1678 | 1679 | class Segment: 1680 | """ 1681 | Decodes a value from "$ segs", see segment_t in segment.hpp for details. 1682 | """ 1683 | def __init__(self, id0, spec): 1684 | self._id0 = id0 1685 | p = IdaUnpacker(id0.wordsize, spec) 1686 | self.startea = p.nextword() 1687 | self.size = p.nextword() 1688 | self.name_id = p.nextword() 1689 | self.class_id = p.nextword() 1690 | self.orgbase = p.nextword() 1691 | self.unknown = p.next16() 1692 | self.align = p.next8() 1693 | self.comb = p.next8() 1694 | self.perm = p.next8() 1695 | self.bitness = p.next8() 1696 | self.flags = p.next8() 1697 | self.selector = p.nextword() 1698 | self.defsr = [p.nextword() for _ in range(16)] 1699 | self.color = p.next32() 1700 | 1701 | --------------------------------------------------------------------------------