├── setup.cfg
├── tstbs.py
├── LICENSE
├── idaunpack.py
├── test_idblib.py
├── README.md
├── tree-walking.py
├── idbtool.py
└── idblib.py


/setup.cfg:
--------------------------------------------------------------------------------
1 | [flake8]
2 | ignore = E402,E501,E731
3 | 
4 | 


--------------------------------------------------------------------------------
/tstbs.py:
--------------------------------------------------------------------------------
 1 | def binary_search(a, k):
 2 |     # c++: a.upperbound(k)--
 3 |     first, last = 0, len(a)
 4 |     while first<last:
 5 |         mid = (first+last)>>1
 6 |         if k < a[mid]:
 7 |             last = mid
 8 |         else:
 9 |             first = mid+1
10 |     return first-1
11 | for x in range(8):
12 |     print(x, binary_search([2,3,5,6], x))
13 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Willem Hengeveld <itsme@xs4all.nl>
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/idaunpack.py:
--------------------------------------------------------------------------------
 1 | """
 2 | `idaunpack` is a tool to aid in decoding packed data structures from an
 3 | IDA idb or i64 database.
 4 | """
 5 | from __future__ import print_function, division
 6 | import struct
 7 | import re
 8 | import sys
 9 | from binascii import a2b_hex, b2a_hex
10 | from idblib import IdaUnpacker
11 | 
12 | def dump_packed(data, wordsize, pattern):
13 |     p = IdaUnpacker(wordsize, data)
14 |     if pattern:
15 |         for c in pattern:
16 |             if p.eof():
17 |                 print("EOF")
18 |                 break
19 |             if c == 'H':
20 |                 val = p.next16()
21 |                 fmt = "%04x"
22 |             elif c == 'L':
23 |                 val = p.next32()
24 |                 fmt = "%08x"
25 |             elif c == 'Q':
26 |                 val = p.next64()
27 |                 fmt = "%016x"
28 |             elif c == 'W':
29 |                 val = p.nextword()
30 |                 if wordsize==4:
31 |                     fmt = "[%08x]"
32 |                 else:
33 |                     fmt = "[%016x]"
34 |             else:
35 |                 raise Exception("unknown pattern: %s" % c)
36 |             print(fmt % val, end=" ")
37 | 
38 |     while not p.eof():
39 |         val = p.next32()
40 |         print("%08x" % val, end=" ")
41 | 
42 |     print()
43 | 
44 | def unhex(hextxt):
45 |     return a2b_hex(re.sub(r'\W+', '', hextxt, flags=re.DOTALL))
46 | 
47 | def main():
48 |     import argparse
49 |     parser = argparse.ArgumentParser(description='idaunpack')
50 |     parser.add_argument('--verbose', '-v', action='store_true')
51 |     parser.add_argument('--debug', action='store_true', help='abort on exceptions.')
52 |     parser.add_argument('--pattern', '-p', type=str, help='unpack pattern: sequence of H, L, Q, W')
53 |     parser.add_argument('-4', '-3', '-32', const=4, dest='wordsize', action='store_const', help='use 32 bit words')
54 |     parser.add_argument('-8', '-6', '-64', const=8, dest='wordsize', action='store_const', help='use 64 bit words')
55 |     parser.add_argument('--wordsize', '-w', type=int, help='specify wordsize')
56 |     parser.add_argument('hexconsts', nargs='*', type=str)
57 | 
58 |     args = parser.parse_args()
59 |     if args.wordsize is None:
60 |         args.wordsize = 4
61 | 
62 |     for x in args.hexconsts:
63 |        dump_packed(unhex(x), args.wordsize, args.pattern)
64 | 
65 | if __name__ == '__main__':
66 |     main()
67 | 


--------------------------------------------------------------------------------
/test_idblib.py:
--------------------------------------------------------------------------------
 1 | import unittest
 2 | from idblib import FileSection, binary_search, makeStringIO
 3 | 
 4 | 
 5 | class TestFileSection(unittest.TestCase):
 6 |     """ unittest for FileSection object """
 7 |     def test_file(self):
 8 |         s = makeStringIO(b"0123456789abcdef")
 9 |         fh = FileSection(s, 3, 11)
10 |         self.assertEqual(fh.read(3), b"345")
11 |         self.assertEqual(fh.read(8), b"6789a")
12 |         self.assertEqual(fh.read(8), b"")
13 | 
14 |         fh.seek(-1, 2)
15 |         self.assertEqual(fh.read(8), b"a")
16 |         fh.seek(3)
17 |         self.assertEqual(fh.read(2), b"67")
18 |         fh.seek(-2, 1)
19 |         self.assertEqual(fh.read(2), b"67")
20 |         fh.seek(2, 1)
21 |         self.assertEqual(fh.read(2), b"a")
22 | 
23 |         fh.seek(8)
24 |         self.assertEqual(fh.read(1), b"")
25 |         with self.assertRaises(Exception):
26 |             fh.seek(9)
27 | 
28 | 
29 | class TestBinarySearch(unittest.TestCase):
30 |     """ unittests for binary_search """
31 |     class Object:
32 |         def __init__(self, num):
33 |             self.key = num
34 | 
35 |         def __repr__(self):
36 |             return "o(%d)" % self.num
37 | 
38 |     def test_bs(self):
39 |         obj = self.Object
40 |         lst = [obj(_) for _ in (2, 3, 5, 6)]
41 |         self.assertEqual(binary_search(lst, 1), -1)
42 |         self.assertEqual(binary_search(lst, 2), 0)
43 |         self.assertEqual(binary_search(lst, 3), 1)
44 |         self.assertEqual(binary_search(lst, 4), 1)
45 |         self.assertEqual(binary_search(lst, 5), 2)
46 |         self.assertEqual(binary_search(lst, 6), 3)
47 |         self.assertEqual(binary_search(lst, 7), 3)
48 | 
49 |     def test_emptylist(self):
50 |         obj = self.Object
51 |         lst = []
52 |         self.assertEqual(binary_search(lst, 1), -1)
53 | 
54 |     def test_oneelem(self):
55 |         obj = self.Object
56 |         lst = [obj(1)]
57 |         self.assertEqual(binary_search(lst, 0), -1)
58 |         self.assertEqual(binary_search(lst, 1), 0)
59 |         self.assertEqual(binary_search(lst, 2), 0)
60 | 
61 |     def test_twoelem(self):
62 |         obj = self.Object
63 |         lst = [obj(1), obj(3)]
64 |         self.assertEqual(binary_search(lst, 0), -1)
65 |         self.assertEqual(binary_search(lst, 1), 0)
66 |         self.assertEqual(binary_search(lst, 2), 0)
67 |         self.assertEqual(binary_search(lst, 3), 1)
68 |         self.assertEqual(binary_search(lst, 4), 1)
69 | 
70 |     def test_listsize(self):
71 |         obj = self.Object
72 |         for l in range(3, 32):
73 |             lst = [obj(_ + 1) for _ in range(l)]
74 |             lst = lst[:1] + lst[2:]
75 |             self.assertEqual(binary_search(lst, 0), -1)
76 |             self.assertEqual(binary_search(lst, 1), 0)
77 |             self.assertEqual(binary_search(lst, 2), 0)
78 |             self.assertEqual(binary_search(lst, 3), 1)
79 |             self.assertEqual(binary_search(lst, l - 1), l - 3)
80 |             self.assertEqual(binary_search(lst, l), l - 2)
81 |             self.assertEqual(binary_search(lst, l + 1), l - 2)
82 |             self.assertEqual(binary_search(lst, l + 2), l - 2)
83 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | IDBTOOL
  2 | =======
  3 | 
  4 | A tool for extracting information from IDA databases.
  5 | `idbtool` knows how to handle databases from all IDA versions since v2.0, both `i64` and `idb` files.
  6 | You can also use `idbtool` to recover information from unclosed databases.
  7 | 
  8 | `idbtool` works without change with IDA v7.0.
  9 | 
 10 | 
 11 | Much faster than loading a file in IDA
 12 | --------------------------------------
 13 | 
 14 | With idbtool you can search thousands of .idb files in seconds.
 15 | 
 16 | More precisely: on my laptop it takes:
 17 | 
 18 |  *  1.5 seconds to extract 143 idc scripts from 119 idb and i64 files.
 19 |  *  3.8 seconds to print idb info for 441 files.
 20 |  *  5.6 seconds to extract 281 enums containing 4726 members from 35 files.
 21 |  * 67.8 seconds to extract 5942 structs containing 33672 members from 265 files.
 22 | 
 23 | Loading a approximately 5 Gbyte idb file in IDA, takes about 45 minutes.
 24 | While idb3.h takes basically no time at all, no more than a few milliseconds.
 25 | 
 26 | 
 27 | 
 28 | Download
 29 | ========
 30 | 
 31 | Two versions of this tool exist:
 32 | 
 33 | One written in python
 34 |  * https://github.com/nlitsme/pyidbutil
 35 | 
 36 | One written in C++
 37 |  * https://github.com/nlitsme/idbutil
 38 | 
 39 | Both repositories contain a library which can be used for reading `.idb` or `.i64` files.
 40 | 
 41 | 
 42 | Usage
 43 | =====
 44 | 
 45 | Usage: 
 46 | 
 47 |     idbtool [options] [database file(s)]
 48 | 
 49 |  * `-n` or `--names`  will list all named values in the database.
 50 |  * `-s` or `--scripts` will list all scripts stored in the database.
 51 |  * `-u` or `--structs` will list all structs stored in the database.
 52 |  * `-e` or `--enums` will list all enums stored in the database.
 53 |  * `--imports` will list all imported symbols from the database.
 54 |  * `--funcdirs` will list function folders stored in the database.
 55 |  * `-i` or `--info` will print some general info about the database. 
 56 |  * `-d` or `--pagedump`  dump btree page tree contents.
 57 |  * `--inc`, `--dec` list all records in ascending / descending order.
 58 |  * `-q` or `--query` search specific records in the database.
 59 |  * `-m` or `--limit` limit the number of results returned by `-q`.
 60 |  * `-id0`, `-id1` dump only one specific section.
 61 |  * `--i64`, `--i32` tell idbtool that the specified file is from a 64 or 32 bit database.
 62 |  * `--recover` group files from an unpacked database.
 63 |  * `--classify` summarizes node usage in the database
 64 |  * `--dump`  hexdump the original binary data
 65 | 
 66 | query
 67 | -----
 68 | 
 69 | Queries need to be specified last on the commandline.
 70 | 
 71 | example:
 72 | 
 73 |     idbtool [database file(s)]  --query  "Root Node;V"
 74 | 
 75 | Will list the source binary for all the databases specified on the commandline.
 76 | 
 77 | A query is a string with the following format:
 78 | 
 79 |  * [==,<=,>=,<,>]  - optional relation, default: ==
 80 |  * a base node key:
 81 |     * a DOT followed by the numeric value of the nodeid.
 82 |     * a HASH followed by the numeric value of the system-nodeid.
 83 |     * a QUESTION followed by the name of the node. -> a 'N'ame node
 84 |     * the name of the node.  -> the name is resolved, results in a '.'Dot node
 85 |  * an optional tag ( A for Alt, S for Supval, etc )
 86 |  * an optional index value
 87 | 
 88 | example queries:
 89 |  * `Root Node;V` -> prints record containing the source binary name
 90 |  * `?Root Node` -> prints the Name record pointing to the root
 91 |  * `>Root Node` -> prints the first 10 records starting with the root node id.
 92 |  * `<Root Node` -> prints the 10 records startng with the recordsbefore the rootnode.
 93 |  * `.0xff000001;N` -> prints the rootnode name entry.
 94 |  * `#1;N` -> prints the rootnode name entry.
 95 | 
 96 | List the highest node and following record in the database in two different ways,
 97 | the first: starting at the first record below `ffc00000`, and listing the next.
 98 | The second: starting at the first record after `ffc00000`, and listing the previous:
 99 |  * `--query "<#0xc00000"  --limit 2 --inc -v`
100 |  * `--query ">#0xc00000"  --limit 2 --dec -v`
101 | 
102 | Note that this should be the nodeid in the `$ MAX NODE` record.
103 | 
104 | List the last two records:
105 |  * `--limit 2 --dec  -v`
106 | 
107 | List the first two records, the `$ MAX LINK` and `$ MAX NODE` records:
108 |  * `--limit 2 --inc -v`
109 | 
110 | 
111 | A full database dump
112 | --------------------
113 | 
114 | Several methods exist for printing all records in the database. This may be useful if
115 | you want to investigate more of IDA''s internals. But can also be useful in recovering
116 | data from corrupted databases.
117 | 
118 |  * `--inc`, `--dec` can be used to enumerate all b-tree records in either forward, or backward direction.
119 |     * add `-v` to get a prettier key/value output
120 |  * `--id0`  walks the page tree, instead of the record tree, printing the contents of each page
121 |  * `--pagedump` linearly skip through the file, this will also reveal information in deleted pages.
122 | 
123 | naked files
124 | ===========
125 | 
126 | When IDA or your computer crashed while working on a disassembly, and you did not yet save the database,
127 | you are left with a couple of files with extensions like `.id0`, `.id1`, `.nam`, etc.
128 | 
129 | These files are the unpacked database, i call them `naked` files.
130 | 
131 | Using the `--filetype` and `--i64` or `--i32` options you can inspect these `naked` files individually.
132 | or use the `--recover` option to view them as a complete database together.
133 | `idbtool` will figure out automatically which files would belong together.
134 | 
135 | `idbtool` can figure out the bitsize of the database from an `.id0` file, but not(yet) from the others.
136 | 
137 | 
138 | LIBRARY
139 | =======
140 | 
141 | The file `idblib.py` contains a library.
142 | 
143 | 
144 | TODO
145 | ====
146 | 
147 |  * add option to list all comments stored in the database
148 |  * add option to list flags for a list of addresses.
149 | 
150 | Author
151 | ======
152 | 
153 | Willem Hengeveld <itsme@xs4all.nl>
154 | 
155 | 


--------------------------------------------------------------------------------
/tree-walking.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Copyright (c) 2016 Willem Hengeveld <itsme@xs4all.nl>
  3 | 
  4 | Experiment in btree walking
  5 | 
  6 | 
  7 |                    *-------->[00]
  8 |          *------>[02]---+    [01]
  9 | root ->[08]---+  [05]-+ |
 10 |        [17]-+ |       | +--->[03]
 11 |             | |       |      [04]
 12 |             | |       |
 13 |             | |       +----->[06]
 14 |             | |              [07]
 15 |             | |
 16 |             | |    *-------->[09]
 17 |             | +->[11]---+    [10]
 18 |             |    [14]-+ |
 19 |             |         | +--->[12]
 20 |             |         |      [13]
 21 |             |         |
 22 |             |         +----->[15]
 23 |             |                [16]
 24 |             |
 25 |             |      *-------->[18]
 26 |             +--->[20]---+    [19]
 27 |                  [23]-+ |
 28 |                       | +--->[21]
 29 |                       |      [22]
 30 |                       |
 31 |                       +----->[24]
 32 |                              [25]
 33 | 
 34 | 
 35 | decrement from 08 : ix-- -> getpage, ix=len-1 -> getpage -> ix=len-1
 36 | decrement from 17 : ix-- -> getpage, ix=len-1 -> getpage -> ix=len-1
 37 | decrement from 02 : ix-- -> getpage, ix=len-1
 38 | decrement from 05 : ix-- -> getpage, ix=len-1
 39 | 
 40 | decrement from 01  : ix-- -> ix>=0 -> use key at ix
 41 | decrement from 03  : ix-- -> <0 -> pop -> ix>=0 -> use key at ix
 42 | decrement from 09  : ix-- -> <0 -> pop -> ix<0 -> pop -> ix>=0 -> use key at ix
 43 | 
 44 | increment from 09  : ix++
 45 | increment from 10  : ix++  -> ix==len(index)  -> pop: ix==-1  -> ix++ -> ix==0  -> use
 46 | increment from 11  : recurse, ix=0  -> use
 47 | increment from 08  : recurse, ix=-1 -> recurse, ix=0 -> use
 48 | increment from 07  : ix++ -> ix==len(index) -> pop,    ix++ -> ix==len -> pop -> ix++ -> ix==0 -> use
 49 | """
 50 | from __future__ import division, print_function, absolute_import, unicode_literals
 51 | 
 52 | # shape of the tree
 53 | # a <2,2>  tree is basically like the tree pictured in the ascii art above.
 54 | TREEDEPTH = 2
 55 | NODEWIDTH = 2
 56 | 
 57 | 
 58 | def binary_search(a, k):
 59 |     # c++: a.upperbound(k)--
 60 |     first, last = 0, len(a)
 61 |     while first < last:
 62 |         mid = (first + last) >> 1
 63 |         if k < a[mid].key:
 64 |             last = mid
 65 |         else:
 66 |             first = mid + 1
 67 |     return first - 1
 68 | 
 69 | 
 70 | class Entry(object):
 71 |     """
 72 |     a key/value entry from a b-tree page
 73 |     """
 74 |     def __init__(self, key, val):
 75 |         self.key = key
 76 |         self.val = val
 77 | 
 78 |     def __repr__(self):
 79 |         return "%s=%d" % (self.key, self.val)
 80 | 
 81 | 
 82 | class BasePage(object):
 83 |     """
 84 |     BasePage has methods common to both leaf and index pages
 85 |     """
 86 |     def __init__(self, kv):
 87 |         self.index = []
 88 |         for k, v in kv:
 89 |             self.index.append(Entry(k, v))
 90 | 
 91 |     def find(self, key):
 92 |         i = binary_search(self.index, key)
 93 |         if i < 0:
 94 |             if self.isindex():
 95 |                 return ('recurse', -1)
 96 |             return ('gt', 0)
 97 |         if self.index[i].key == key:
 98 |             return ('eq', i)
 99 |         if self.isindex():
100 |             return ('recurse', i)
101 |         return ('lt', i)
102 | 
103 |     def getkey(self, ix):
104 |         return self.index[ix].key
105 | 
106 |     def getval(self, ix):
107 |         return self.index[ix].val
108 | 
109 |     def isleaf(self):
110 |         return self.preceeding is None
111 | 
112 |     def isindex(self):
113 |         return self.preceeding is not None
114 | 
115 |     def __repr__(self):
116 |         return ("leaf" if self.isleaf() else ("index<%d>" % self.preceeding)) + repr(self.index)
117 | 
118 | 
119 | class LeafPage(BasePage):
120 |     """ a leaf page in the b-tree """
121 |     def __init__(self, kv):
122 |         super(self.__class__, self).__init__(kv)
123 |         self.preceeding = None
124 | 
125 | 
126 | class IndexPage(BasePage):
127 |     """
128 |     An index page in the b-tree.
129 |     This page has a preceeding page plus several key+subpage pairs.
130 |     For each key+subpage: all keys in the subpage are greater than the key
131 |     """
132 |     def __init__(self, preceeding, kv):
133 |         super(self.__class__, self).__init__(kv)
134 |         self.preceeding = preceeding
135 | 
136 |     def getpage(self, ix):
137 |         return self.preceeding if ix < 0 else self.index[ix].val
138 | 
139 | 
140 | class Cursor:
141 |     """
142 |     A Cursor object represents a position in the b-tree.
143 | 
144 |     It has methods for moving to the next or previous item.
145 |     And methods for retrieving the key and value of the current position
146 |     """
147 |     def __init__(self, db, stack):
148 |         self.db = db
149 |         self.stack = stack
150 | 
151 |     def next(self):
152 |         page, ix = self.stack.pop()
153 |         if page.isleaf():
154 |             # from leaf move towards root
155 |             ix += 1
156 |             while self.stack and ix == len(page.index):
157 |                 page, ix = self.stack.pop()
158 |                 ix += 1
159 |             if ix < len(page.index):
160 |                 self.stack.append((page, ix))
161 |         else:
162 |             # from node move towards leaf
163 |             self.stack.append((page, ix))
164 |             page = self.db.readpage(page.getpage(ix))
165 |             while page.isindex():
166 |                 ix = -1
167 |                 self.stack.append((page, ix))
168 |                 page = self.db.readpage(page.getpage(ix))
169 |             ix = 0
170 |             self.stack.append((page, ix))
171 | 
172 |         self.verify()
173 | 
174 |     def prev(self):
175 |         page, ix = self.stack.pop()
176 |         ix -= 1
177 |         if page.isleaf():
178 |             # move towards root, until non 'prec' item found
179 |             while self.stack and ix < 0:
180 |                 page, ix = self.stack.pop()
181 |             if ix >= 0:
182 |                 self.stack.append((page, ix))
183 |         else:
184 |             # move towards leaf
185 |             self.stack.append((page, ix))
186 |             while page.isindex():
187 |                 page = self.db.readpage(page.getpage(ix))
188 |                 ix = len(page.index) - 1
189 |                 self.stack.append((page, ix))
190 | 
191 |         self.verify()
192 | 
193 |     def verify(self):
194 |         """ verify cursor state consistency """
195 |         if len(self.stack) == 3:
196 |             if not self.stack[-1][0].isleaf():
197 |                 print("WARN no leaf")
198 |         elif len(self.stack) > 3:
199 |             print("WARN: stack too large")
200 | 
201 |         if len(self.stack) >= 2:
202 |             if self.stack[0][0] == self.stack[1][0]:
203 |                 print("WARN: identical index pages on stack")
204 |             if not self.stack[0][0].isindex():
205 |                 print("WARN: expected root=index")
206 |             if not self.stack[1][0].isindex():
207 |                 print("WARN: expected 2nd=index")
208 | 
209 |     def eof(self):
210 |         return len(self.stack) == 0
211 | 
212 |     def getkey(self):
213 |         page, ix = self.stack[-1]
214 |         return page.getkey(ix)
215 | 
216 |     def getval(self):
217 |         page, ix = self.stack[-1]
218 |         return page.getval(ix)
219 | 
220 |     def __repr__(self):
221 |         return "cursor:" + repr(self.stack)
222 | 
223 | 
224 | class Btree:
225 |     """
226 |     A B-tree implementation
227 |     """
228 |     def __init__(self):
229 |         self.pages = []
230 |         self.generate(TREEDEPTH, NODEWIDTH)
231 | 
232 |     def manual(self):
233 |         """ manually construct the ascii art tree """
234 |         for i in range(9):
235 |             self.pages.append(LeafPage((("%02d" % (3 * i), 0), ("%02d" % (3 * i + 1), 0))))
236 |         for i in range(3):
237 |             self.pages.append(IndexPage(3 * i, (("%02d" % (9 * i + 2), 3 * i + 1), ("%02d" % (9 * i + 5), 3 * i + 2))))
238 |         self.pages.append(IndexPage(9, (("08", 10), ("17", 11))))
239 |         self.rootindex = len(self.pages) - 1
240 | 
241 |     def generate(self, depth, nodesize):
242 |         """ automatically generate the try in the ascii art above """
243 | 
244 |         def namegen():
245 |             i = 0
246 |             while True:
247 |                 yield "%03d" % i
248 |                 i += 1
249 | 
250 |         self.rootindex = self.construct(namegen(), depth, nodesize)
251 |         print("%d pages" % (len(self.pages)))
252 | 
253 |     def construct(self, namegen, depth, nodesize):
254 |         if depth:
255 |             return self.createindex(namegen, depth, nodesize)
256 |         else:
257 |             return self.createleaf(namegen, nodesize)
258 | 
259 |     def createindex(self, namegen, depth, nodesize):
260 |         page = IndexPage(self.construct(namegen, depth - 1, nodesize),
261 |                          [(next(namegen), self.construct(namegen, depth - 1, nodesize)) for _ in range(nodesize)])
262 |         self.pages.append(page)
263 |         return len(self.pages) - 1
264 | 
265 |     def createleaf(self, namegen, nodesize):
266 |         page = LeafPage([(next(namegen), 0) for _ in range(nodesize)])
267 |         self.pages.append(page)
268 |         return len(self.pages) - 1
269 | 
270 |     def readpage(self, pn):
271 |         return self.pages[pn]
272 | 
273 |     def find(self, key):
274 |         """
275 |         Find a node in the tree, returns the cursor plus the reletion to the wanted key:
276 |         'eq' for equal, 'lt' when the found key is less than the wanted key,
277 |         or 'gt' when the found key is greater than the wanted key.
278 |         """
279 |         page = self.readpage(self.rootindex)
280 |         stack = []
281 |         while True:
282 |             act, ix = page.find(key)
283 |             stack.append((page, ix))
284 |             if act != 'recurse':
285 |                 break
286 |             page = self.readpage(page.getpage(ix))
287 |         return act, Cursor(self, stack)
288 | 
289 |     def dumptree(self, pn, indent=0):
290 |         """ dump all nodes of the current b-tree """
291 |         page = self.readpage(pn)
292 |         print("  " * indent, page)
293 |         if page.isindex():
294 |             print("  " * indent, end="")
295 |             self.dumptree(page.preceeding, indent + 1)
296 |             for p in range(len(page.index)):
297 |                 print("  " * indent, end="")
298 |                 self.dumptree(page.getpage(p), indent + 1)
299 | 
300 | 
301 | db = Btree()
302 | print("<<")
303 | db.dumptree(db.rootindex)
304 | print(">>")
305 | 
306 | 
307 | for i in range(NODEWIDTH * len(db.pages)):
308 |     print("--------- %03d" % i)
309 |     act, cursor = db.find("%03d" % i)
310 |     print("found", act, cursor.getkey(), cursor)
311 |     cursor.prev()
312 |     if not cursor.eof():
313 |         print("prev:", "..", cursor.getkey(), cursor)
314 |     else:
315 |         print("prev:  EOF", cursor)
316 | 
317 | for i in range(NODEWIDTH * len(db.pages)):
318 |     print("--------- %03d" % i)
319 |     act, cursor = db.find("%03d" % i)
320 |     print("found", act, cursor.getkey(), cursor)
321 |     cursor.next()
322 |     if not cursor.eof():
323 |         print("next:", "..", cursor.getkey(), cursor)
324 |     else:
325 |         print("next:  EOF", cursor)
326 | 
327 | for k in ('', '0', '1', '2', '3', '000', '010', '020', '100'):
328 |     print("--------- %s" % k)
329 |     act, cursor = db.find(k)
330 |     print(cursor)
331 |     print(act, cursor.getkey(), end=" next=")
332 |     cursor.next()
333 |     if cursor.eof():
334 |         print("EOF")
335 |     else:
336 |         print(cursor.getkey())
337 | 
338 | act, cursor = db.find("000")
339 | print("get000", end=" ")
340 | for i in range(NODEWIDTH * len(db.pages)):
341 |     cursor.next()
342 |     if cursor.eof():
343 |         print("EOF")
344 |     else:
345 |         print("-> %s" % cursor.getkey(), end=" ")
346 | print()
347 | 
348 | act, cursor = db.find("025")
349 | print("get025", end=" ")
350 | for i in range(NODEWIDTH * len(db.pages)):
351 |     cursor.prev()
352 |     if cursor.eof():
353 |         print("EOF")
354 |     else:
355 |         print("-> %s" % cursor.getkey(), end=" ")
356 | print()
357 | 


--------------------------------------------------------------------------------
/idbtool.py:
--------------------------------------------------------------------------------
   1 | #!/usr/bin/python3
   2 | """
   3 | Tool for querying information from Hexrays .idb and .i64 files
   4 | without launching IDA.
   5 | 
   6 | Copyright (c) 2016 Willem Hengeveld <itsme@xs4all.nl>
   7 | """
   8 | 
   9 | # todo:
  10 | #  '$ segs'
  11 | #      S <segaddr> = packed(startea, size, ....)
  12 | #  '$ srareas'
  13 | #      a <addr>    = packed(startea, size, flag, flag)  -- includes functions
  14 | #      b <addr>    = packed(startea, size, flag, flag)  -- segment
  15 | #      c <addr>    = packed(startea, size, flag, flag)  -- same as 'b'
  16 | #       
  17 | from __future__ import division, print_function, absolute_import, unicode_literals
  18 | import sys
  19 | import os
  20 | if sys.version_info[0] == 2:
  21 |     import scandir
  22 |     os.scandir = scandir.scandir
  23 | if sys.version_info[0] == 2:
  24 |     reload(sys)
  25 |     sys.setdefaultencoding('utf-8')
  26 | 
  27 | if sys.version_info[0] == 2:
  28 |     stdout = sys.stdout
  29 | else:
  30 |     stdout = sys.stdout.buffer
  31 | 
  32 | import struct
  33 | import binascii
  34 | import argparse
  35 | import itertools
  36 | from collections import defaultdict
  37 | 
  38 | import re
  39 | 
  40 | from datetime import datetime
  41 | 
  42 | import idblib
  43 | from idblib import hexdump
  44 | 
  45 | 
  46 | def timestring(t):
  47 |     if t == 0:
  48 |         return "....-..-.. ..:..:.."
  49 |     return datetime.strftime(datetime.fromtimestamp(t), "%Y-%m-%d %H:%M:%S")
  50 | 
  51 | 
  52 | def strz(b, o):
  53 |     return b[o:b.find(b'\x00', o)].decode('utf-8', 'ignore')
  54 | 
  55 | def nonefmt(fmt, num):
  56 |     if num is None:
  57 |         return "-"
  58 |     return fmt % num
  59 | 
  60 | ######### license encoding ################
  61 | 
  62 | 
  63 | def decryptuser(data):
  64 |     """
  65 |     The '$ original user' node is encrypted with hexray's private key.
  66 |     Hence we can easily decrypt it, but not change it to something else.
  67 |     We can however copy the entry from another database, or just replace it with garbage.
  68 | 
  69 |     The node contains 128 bytes encrypted license, followed by 32 bytes zero.
  70 | 
  71 |     Note: i found several ida55 databases online where this does not work.
  72 |     possible these were created using a cracked version of IDA.
  73 |     """
  74 |     data = int(binascii.b2a_hex(data[127::-1]), 16)
  75 |     user = pow(data, 0x13, 0x93AF7A8E3A6EB93D1B4D1FB7EC29299D2BC8F3CE5F84BFE88E47DDBDD5550C3CE3D2B16A2E2FBD0FBD919E8038BB05752EC92DD1498CB283AA087A93184F1DD9DD5D5DF7857322DFCD70890F814B58448071BBABB0FC8A7868B62EB29CC2664C8FE61DFBC5DB0EE8BF6ECF0B65250514576C4384582211896E5478F95C42FDED)
  76 |     user = binascii.a2b_hex("%0256x" % user)
  77 |     return user[1:]
  78 | 
  79 | 
  80 | def licensestring(lic):
  81 |     """ decode a license blob """
  82 |     if not lic:
  83 |         return
  84 |     if len(lic) < 127:
  85 |         print("too short license format: %s" % binascii.b2a_hex(lic))
  86 |         return
  87 |     elif len(lic) > 127 and sum(lic[127:]) != 0:
  88 |         print("too long license format: %s" % binascii.b2a_hex(lic))
  89 |         return
  90 | 
  91 |     if struct.unpack_from("<L", lic, 106)[0]:
  92 |         print("unknown license format: %s" % binascii.b2a_hex(lic))
  93 |         return
  94 | 
  95 |     # first 2 bytes probably a checksum
  96 | 
  97 |     licver, = struct.unpack_from("<H", lic, 2)
  98 |     time, = struct.unpack_from("<L", lic, 4)
  99 | 
 100 |     # new 'Freeware version'  has licver == 0 as well, but is new format anyway, it is recognizable by time==0x10000
 101 |     if licver == 0 and time != 0x10000:
 102 |         if time:
 103 |             """
 104 |             # up to and including ida v5.2
 105 | 
 106 |             +00:  int16 checksum?
 107 |             +02:  int16 zero
 108 |             +04:  int32 unix timestamp
 109 |             +08:  byte[8]  zero
 110 |             +10:  int32 flags
 111 |             +14:  char[107]  license text
 112 |             """
 113 | 
 114 |             licflags, = struct.unpack_from("<L", lic, 16)
 115 |             licensee = strz(lic, 20)
 116 |             return "%s [%08x]  %s" % (timestring(time), licflags, licensee)
 117 |         else:
 118 |             """
 119 |             +00: byte[0x13]  zero
 120 |             +13: int32 ?
 121 |             +17: int32 timestamp
 122 |             +1b: byte[8]  zero
 123 |             +23: int32 flags
 124 |             +27: char[88]  license text
 125 |             """
 126 |             unk, = struct.unpack_from("<L", lic, 0x13)
 127 |             time, = struct.unpack_from("<L", lic, 0x17)
 128 |             licflags, = struct.unpack_from("<L", lic, 0x23)
 129 |             licensee = strz(lic, 0x27)
 130 | 
 131 |             return "%s [%08x] (%08x)  %s" % (timestring(time), licflags, unk, licensee)
 132 |     else:
 133 |         """
 134 |         # since ida v5.3
 135 | 
 136 |         +00: int16 checksum?
 137 |         +02: int16 idaversion
 138 |         +04: int32 ? small number, 1 or 2.
 139 |         +08: int64 ? -1  or big number,  maybe license flags?
 140 |         +10: int32 timestamp
 141 |         +14: int32  zero
 142 |         +18: int32  sometimes another timestamp
 143 |         +1c: byte[6]  license id
 144 |         +22: char[*] license text   ( v5.3-v5.x : 93 chars,  v6.0: 77 chars, v6.5: 69 chars )
 145 |         +67: int64 ?  since ida v6.50
 146 |         +6f: byte[16] hash   .. since ida v6.00
 147 |         """
 148 |         time1, = struct.unpack_from("<L", lic, 16)
 149 |         time2, = struct.unpack_from("<L", lic, 16 + 8)
 150 |         licid = "%02X-%02X%02X-%02X%02X-%02X" % struct.unpack_from("6B", lic, 28)
 151 |         licensee = strz(lic, 34)
 152 |         return "v%04d %s .. %s  %s  %s" % (licver, timestring(time1), timestring(time2), licid, licensee)
 153 | 
 154 | 
 155 | def dumpuser(id0):
 156 |     """ dump the original, and current database user """
 157 |     orignode = id0.nodeByName('$ original user')
 158 |     if orignode:
 159 |         user0 = id0.bytes(orignode, 'S', 0)
 160 |         if user0:
 161 |             if user0.find(b'\x00\x00\x00\x00') >= 128:
 162 |                 user0 = decryptuser(user0)
 163 |             else:
 164 |                 user0 = user0[:127]
 165 |             # user0 has 128 bytes rsa encrypted license, followed by 32 bytes zero
 166 |             print("orig: %s" % licensestring(user0))
 167 |         # ida9 has S10+S11 == license json
 168 |         user10 = id0.blob(orignode, 'S', 16)
 169 |         if user10:
 170 |             import json
 171 |             user10 = json.loads(user10)
 172 |             print("orig: %s" % user10)
 173 |     curnode = id0.nodeByName('$ user1')
 174 |     if curnode:
 175 |         user1 = id0.bytes(curnode, 'S', 0)
 176 |         print("user: %s" % licensestring(user1))
 177 | 
 178 | 
 179 | ######### idb summary #########
 180 | 
 181 | 
 182 | filetypelist = [
 183 |     "MS DOS EXE File",
 184 |     "MS DOS COM File",
 185 |     "Binary File",
 186 |     "MS DOS Driver",
 187 |     "New Executable (NE)",
 188 |     "Intel Hex Object File",
 189 |     "MOS Technology Hex Object File",
 190 |     "Linear Executable (LX)",
 191 |     "Linear Executable (LE)",
 192 |     "Netware Loadable Module (NLM)",
 193 |     "Common Object File Format (COFF)",
 194 |     "Portable Executable (PE)",
 195 |     "Object Module Format",
 196 |     "R-records",
 197 |     "ZIP file (this file is never loaded to IDA database)",
 198 |     "Library of OMF Modules",
 199 |     "ar library",
 200 |     "file is loaded using LOADER DLL",
 201 |     "Executable and Linkable Format (ELF)",
 202 |     "Watcom DOS32 Extender (W32RUN)",
 203 |     "Linux a.out (AOUT)",
 204 |     "PalmPilot program file",
 205 |     "MS DOS EXE File",
 206 |     "MS DOS COM File",
 207 |     "AIX ar library",
 208 |     "Mac OS X Mach-O file",
 209 | ]
 210 | 
 211 | 
 212 | def dumpinfo(id0):
 213 |     """ print various infos on the idb file """
 214 |     def ftstring(ft):
 215 |         if 0 < ft < len(filetypelist):
 216 |             return "%02x:%s" % (ft, filetypelist[ft])
 217 |         return "%02x:unknown" % ft
 218 | 
 219 |     def decodebitmask(fl, bitnames):
 220 |         l = []
 221 |         knownbits = 0
 222 |         for bit, name in enumerate(bitnames):
 223 |             if fl & (1 << bit) and name is not None:
 224 |                 l.append(name)
 225 |                 knownbits |= 1 << bit
 226 |         if fl & ~knownbits:
 227 |             l.append("unknown_%x" % (fl & ~knownbits))
 228 |         return ",".join(l)
 229 | 
 230 |     def osstring(fl):
 231 |         return decodebitmask(fl, ['msdos', 'win', 'os2', 'netw', 'unix', 'other'])
 232 | 
 233 |     def appstring(fl):
 234 |         return decodebitmask(fl, ['console', 'graphics', 'exe', 'dll', 'driver', '1thread', 'mthread', '16bit', '32bit', '64bit'])
 235 | 
 236 |     ldr = id0.nodeByName("$ loader name")
 237 |     if ldr:
 238 |         print("loader: %s %s" % (id0.string(ldr, 'S', 0), id0.string(ldr, 'S', 1)))
 239 | 
 240 |     if not id0.root:
 241 |         print("database has no RootNode")
 242 |         return
 243 | 
 244 |     if id0.idbparams:
 245 |         params = idblib.IDBParams(id0, id0.idbparams)
 246 |         print("cpu: %s, version=%d, filetype=%s, ostype=%s, apptype=%s, core:%x, size:%x" % (params.cpu, params.version, ftstring(params.filetype), osstring(params.ostype), appstring(params.apptype), params.corestart, params.coresize))
 247 | 
 248 |     print("idaver=%s: %s" % (nonefmt("%04d", id0.idaver), id0.idaverstr))
 249 | 
 250 |     srcmd5 = id0.originmd5
 251 |     print("nopens=%s, ctime=%s, crc=%s, md5=%s" % (nonefmt("%d", id0.nropens), nonefmt("%08x", id0.creationtime), nonefmt("%08x", id0.somecrc), hexdump(srcmd5) if srcmd5 else "-"))
 252 | 
 253 |     dumpuser(id0)
 254 | 
 255 | 
 256 | def dumpnames(args, id0, nam):
 257 |     for ea in nam.allnames():
 258 |         print("%08x: %s" % (ea, id0.name(ea)))
 259 | 
 260 | 
 261 | def dumpscript(id0, node):
 262 |     """ dump all stored scripts """
 263 |     s = idblib.Script(id0, node)
 264 | 
 265 |     print("======= %s %s =======" % (s.language, s.name))
 266 |     print(s.body)
 267 | 
 268 | 
 269 | def dumpstructmember(m):
 270 |     """
 271 |     Dump info for a struct member.
 272 |     """
 273 |     print("     %02x %02x %08x %02x: %-40s" % (m.skip, m.size, m.flags, m.props, m.name), end="")
 274 |     if m.enumid:
 275 |         print(" enum %08x" % m.enumid, end="")
 276 |     if m.structid:
 277 |         print(" struct %08x" % m.structid, end="")
 278 |     if m.ptrinfo:
 279 |         # packed
 280 |         # note: 64bit nrs are stored low32, high32
 281 |         #  flags1, target, base, delta, flags2
 282 | 
 283 |         # flags1:
 284 |         #   0=off8  1=off16 2=off32 3=low8  4=low16 5=high8 6=high16 9=off64
 285 |         #   0x10 = targetaddr, 0x20 = baseaddr, 0x40 = delta, 0x80 = base is plainnum
 286 |         # flags2:
 287 |         #   1=image is off, 0x10 = subtract, 0x20 = signed operand
 288 |         print(" ptr %s" % m.ptrinfo, end="")
 289 |     if m.typeinfo:
 290 |         print(" type %s" % m.typeinfo, end="")
 291 |     print()
 292 | 
 293 | 
 294 | def dumpstruct(id0, node):
 295 |     """
 296 |     dump all info for the struct defined by `node`
 297 |     """
 298 |     s = idblib.Struct(id0, node)
 299 | 
 300 | 
 301 |     print("struct %s, 0x%x" % (s.name, s.flags))
 302 |     for m in s:
 303 |         dumpstructmember(m)
 304 | 
 305 | def dumpbitmember(m):
 306 |     print("        %08x %s" % (m.value or 0, m.name))
 307 | def dumpmask(m):
 308 |     print("    mask %08x %s" % (m.mask, m.name))
 309 |     for m in m:
 310 |         dumpbitmember(m)
 311 | def dumpbitfield(id0, node):
 312 |     b = idblib.Bitfield(id0, node)
 313 |     print("bitfield %s, %s, %s, %s" % (b.name, nonefmt("0x%x", b.count), nonefmt("0x%x", b.representation), nonefmt("0x%x", b.flags)))
 314 |     for m in b:
 315 |         dumpmask(m)
 316 | 
 317 | def dumpenummember(m):
 318 |     """
 319 |     Print information on a single enum member
 320 |     """
 321 |     print("    %08x %s" % (m.value or 0, m.name))
 322 | 
 323 | def dumpenum(id0, node):
 324 |     """
 325 |     Dump all info for the enum defined by `node`
 326 |     """
 327 |     e = idblib.Enum(id0, node)
 328 |     if e.flags and e.flags&1:
 329 |         dumpbitfield(id0, node)
 330 |         return
 331 |     print("enum %s, %s, %s, %s" % (e.name, nonefmt("0x%x", e.count), nonefmt("0x%x", e.representation), nonefmt("0x%x", e.flags)))
 332 | 
 333 |     for m in e:
 334 |         dumpenummember(m)
 335 | 
 336 | 
 337 | def dumpimport(id0, node):
 338 |     # Note that '$ imports' is a list where the actual nodes
 339 |     # are stored in the list, therefore we add '1' to the node here.
 340 | 
 341 |     # first the named imports
 342 |     startkey = id0.makekey(node+1, 'S')
 343 |     endkey = id0.makekey(node+1, 'T')
 344 |     cur = id0.btree.find('ge', startkey)
 345 |     while cur.getkey() < endkey:
 346 |         txt = id0.string(cur)
 347 |         key = cur.getkey()
 348 |         ea = id0.decodekey(key)[3]
 349 |         print("%08x: %s" % (ea, txt))
 350 |         cur.next()
 351 | 
 352 |     # then list the imports by ordinal
 353 |     startkey = id0.makekey(node+1, 'A')
 354 |     endkey = id0.makekey(node+1, 'B')
 355 |     cur = id0.btree.find('ge', startkey)
 356 |     while cur.getkey() < endkey:
 357 |         ordinal = id0.decodekey(cur.getkey())[3]
 358 |         ea = id0.int(cur)
 359 |         print("%08x: (ord%04d) %s" % (ea, ordinal, id0.name(ea)))
 360 |         cur.next()
 361 | 
 362 | 
 363 | def enumlist(id0, listname, callback):
 364 |     """
 365 |     Lists are all stored in a similar way.
 366 | 
 367 |     (listnode, 'N')           = listname
 368 |     (listnode, 'A', -1)       = list size      <-- not for '$ scriptsnippets'
 369 |     (listnode, 'A', seqnr)    = itemnode+1
 370 | 
 371 |     (listnode, 'Y', itemnode) = seqnr          <-- only with '$ enums'
 372 | 
 373 |     (listnode, 'Y', 0)        = list size      <-- only '$ scriptsnippets'
 374 |     (listnode, 'Y', 1)        = ?              <-- only '$ scriptsnippets'
 375 | 
 376 |     (listnode, 'S', seqnr)    = dllname        <-- only '$ imports'
 377 | 
 378 |     """
 379 |     listnode = id0.nodeByName(listname)
 380 |     if not listnode:
 381 |         return
 382 | 
 383 |     startkey = id0.makekey(listnode, 'A')
 384 |     endkey = id0.makekey(listnode, 'A', 0xFFFFFFFF)
 385 |     cur = id0.btree.find('ge', startkey)
 386 |     while cur.getkey() < endkey:
 387 |         item = id0.int(cur)
 388 |         callback(id0, item - 1)
 389 |         cur.next()
 390 | 
 391 | 
 392 | def listfuncdirs(id0):
 393 |     listnode = id0.nodeByName('$ dirtree/funcs')
 394 |     if not listnode:
 395 |         return
 396 | 
 397 |     dir_id = 0
 398 |     while True:
 399 |         start = dir_id * 0x10000
 400 |         end = start + 0xFFFF
 401 |         data = id0.blob(listnode, 'S', start, end)
 402 |         if data == b'':
 403 |             break
 404 |         dumpfuncdir(id0, dir_id, data)
 405 |         dir_id += 1
 406 | 
 407 | 
 408 | def dumpfuncdir(id0, dir_index, data):
 409 |     terminate = data.find(b'\0', 1)
 410 |     name = data[1:terminate].decode('utf-8')
 411 | 
 412 |     p = idblib.IdaUnpacker(id0.wordsize, data[terminate+1:])
 413 |     parent = p.nextword()
 414 |     unk = p.next32()
 415 |     
 416 |     if data[0] == 0:  # IDA 7.5
 417 |         subdir_count = p.next32()
 418 |         subdirs = []
 419 |         while subdir_count:
 420 |             subdir_id = p.nextwordsigned()
 421 |             if subdirs:
 422 |                 subdir_id = subdirs[-1] + subdir_id
 423 |             subdirs.append(subdir_id)
 424 |             subdir_count -= 1
 425 | 
 426 |         func_count = p.next32()
 427 |         funcs = []
 428 |         while func_count:
 429 |             func_id = p.nextwordsigned()
 430 |             if funcs:
 431 |                 func_id = funcs[-1] + func_id
 432 |             funcs.append(func_id)
 433 |             func_count -= 1
 434 | 
 435 |     elif data[0] == 1:  # IDA 7.6
 436 |         children_count = p.next32()
 437 |         children = []
 438 |         for i in range(children_count):
 439 |             next_child = p.nextwordsigned()
 440 |             if children:
 441 |                 next_child += children[-1]
 442 |             children.append(next_child)
 443 | 
 444 |         subdir_count = p.next32()
 445 |         children_count -= subdir_count
 446 |         childtype_counts = [subdir_count]
 447 |         while children_count:
 448 |             childtype_count = p.next32()
 449 |             children_count -= childtype_count
 450 |             childtype_counts.append(childtype_count)
 451 | 
 452 |         subdirs = []
 453 |         funcs = []
 454 |         i = 0
 455 |         parsing_subdirs = True  # switch back and forth
 456 |         for childtype_count in childtype_counts:
 457 |             for _ in range(childtype_count):
 458 |                 if parsing_subdirs:
 459 |                     subdirs.append(children[i])
 460 |                 else:
 461 |                     funcs.append(children[i])
 462 |                 i += 1
 463 |             parsing_subdirs = not parsing_subdirs
 464 |     else:
 465 |         raise NotImplementedError('unsupported funcdir schema')
 466 | 
 467 |     if not p.eof():
 468 |         raise Exception('not EOF after dir parsed')
 469 | 
 470 |     print("dir %d = %s" % (dir_index, name))
 471 |     print("  parent = %d" % parent)
 472 |     print("  subdirs:")
 473 |     for subdir in subdirs:
 474 |         print("    %d" % subdir)
 475 |     print("  functions:")
 476 |     for func in funcs:
 477 |         print("    0x%x" % func)
 478 | 
 479 | 
 480 | def printent(args, id0, c):
 481 |     if args.verbose:
 482 |         print("%s = %s" % (id0.prettykey(c.getkey()), id0.prettyval(c.getval())))
 483 |     else:
 484 |         print("%s = %s" % (hexdump(c.getkey()), hexdump(c.getval())))
 485 | 
 486 | 
 487 | def createkey(args, id0, base, tag, ix):
 488 |     """
 489 | 
 490 |     parse base node specification:
 491 | 
 492 |     '?<name>' -> explicit N<name> key
 493 |     '#<number>' -> relative to nodebase
 494 |     '.<number>' -> absolute nodeid
 495 | 
 496 |     '<name>'  -> lookup by name.
 497 | 
 498 |     """
 499 |     if base[:1] == '?':
 500 |         return id0.namekey(base[1:])
 501 | 
 502 |     if re.match(r'^#(?:0[xX][0-9a-fA-F]+|\d+)$', base):
 503 |         nodeid = int(base[1:], 0) + id0.nodebase
 504 |     elif re.match(r'^\.(?:0[xX][0-9a-fA-F]+|\d+)$', base):
 505 |         nodeid = int(base[1:], 0)
 506 |     else:
 507 |         nodeid = id0.nodeByName(base)
 508 |         if nodeid and args.verbose > 1:
 509 |             print("found node %x for %s" % (nodeid, base))
 510 |     if nodeid is None:
 511 |         print("Could not find '%s'" % base)
 512 |         return
 513 | 
 514 |     s = [nodeid]
 515 |     if tag is not None:
 516 |         s.append(tag)
 517 |         if ix is not None:
 518 |             try:
 519 |                 ix = int(ix, 0)
 520 |             except:
 521 |                 pass
 522 |             s.append(ix)
 523 | 
 524 |     return id0.makekey(*s)
 525 | 
 526 | 
 527 | def enumeratecursor(args, c, onerec, callback):
 528 |     """
 529 |     Enumerate cursor in direction specified by `--dec` or `--inc`,
 530 |     taking into account the optional limit set by `--limit`
 531 | 
 532 |     Output according to verbosity level set by `--verbose`.
 533 |     """
 534 |     limit = args.limit
 535 |     while c and not c.eof() and (limit is None or limit > 0):
 536 |         callback(c)
 537 |         if args.dec:
 538 |             c.prev()
 539 |         else:
 540 |             c.next()
 541 |         if limit is not None:
 542 |             limit -= 1
 543 |         elif onerec:
 544 |             break
 545 | 
 546 | 
 547 | def id0query(args, id0, query):
 548 |     """
 549 |     queries start with an optional operator: <,<=,>,>=,==
 550 | 
 551 |     followed by either a name or address or nodeid
 552 | 
 553 |     Addresses are specified as a sequence of hexadecimal charaters.
 554 |     Nodeid's may be specified either as the full node id, starting with ff00,
 555 |     or starting with a '_'
 556 |     Names are anything which can be found under the name tree in the database.
 557 | 
 558 |     after the name/addr/node there is optionally a slash, followed by a node tag,
 559 |     and another slash, followed by a index or hash string.
 560 | 
 561 |     """
 562 | 
 563 |     xlatop = {'=': 'eq', '==': 'eq', '>': 'gt', '<': 'lt', '>=': 'ge', '<=': 'le'}
 564 | 
 565 |     SEP = r";"
 566 |     m = re.match(r'^([=<>]=?)?(.+?)(?:' + SEP + r'(\w+)(?:' + SEP + r'(.+))?)?$', query)
 567 |     op = m.group(1) or "=="
 568 |     base = m.group(2)
 569 |     tag = m.group(3)  # optional ;tag
 570 |     ix = m.group(4)   # optional ;ix
 571 | 
 572 |     op = xlatop[op]
 573 | 
 574 |     c = id0.btree.find(op, createkey(args, id0, base, tag, ix))
 575 | 
 576 |     enumeratecursor(args, c, op=='eq', lambda c:printent(args, id0, c))
 577 | 
 578 | 
 579 | def getsegs(id0):
 580 |     """
 581 |     Returns a list of all segments.
 582 |     """
 583 |     seglist = []
 584 |     node = id0.nodeByName('$ segs')
 585 |     if not node:
 586 |         return
 587 |     startkey = id0.makekey(node, 'S')
 588 |     endkey = id0.makekey(node, 'T')
 589 |     cur = id0.btree.find('ge', startkey)
 590 |     while cur.getkey() < endkey:
 591 |         s = idblib.Segment(id0, cur.getval())
 592 |         seglist.append(s)
 593 |         cur.next()
 594 | 
 595 |     return seglist
 596 | 
 597 | 
 598 | def listsegments(id0):
 599 |     """
 600 |     Print a summary of all segments found in the IDB.
 601 |     """
 602 |     ssnode = id0.nodeByName('$ segstrings')
 603 |     if not ssnode:
 604 |         print("can't find '$ segstrings' node")
 605 |         return
 606 |     segstrings = id0.blob(ssnode, 'S')
 607 |     p = idblib.IdaUnpacker(id0.wordsize, segstrings)
 608 |     unk = p.next32()
 609 |     nextid = p.next32()
 610 |     slist = []
 611 |     while not p.eof():
 612 |         slen = p.next32()
 613 |         if slen is None:
 614 |             break
 615 |         name = p.bytes(slen)
 616 |         if name is None:
 617 |             break
 618 |         slist.append(name.decode('utf-8', 'ignore'))
 619 | 
 620 |     segs = getsegs(id0)
 621 |     for s in segs:
 622 |         print("%08x - %08x  %s" % (s.startea, s.startea+s.size, slist[s.name_id-1]))
 623 | 
 624 | def classifynodes(args, id0):
 625 |     """
 626 |     Attempt to classify all nodes in the IDA database.
 627 | 
 628 |     Note: this does not work for very old dbs
 629 |     """
 630 |     nodetype = {}
 631 |     tagstats = defaultdict(lambda : defaultdict(int))
 632 | 
 633 |     segs = getsegs(id0)
 634 | 
 635 |     print("node: %x .. %x" % (id0.nodebase, id0.maxnode))
 636 | 
 637 |     def addstat(nodetype, k):
 638 |         if len(k)<3:
 639 |             print("??? strange, expected longer key - %s" % k)
 640 |             return
 641 |         tag = k[2].decode('utf-8')
 642 |         if len(k)==3:
 643 |             tagstats[nodetype][(tag, )] += 1
 644 |         elif len(k)==4:
 645 |             value = k[3]
 646 |             if type(value)==int:
 647 |                 if isaddress(value):
 648 |                     tagstats[nodetype][(tag, 'addr')] += 1
 649 |                 elif isnode(value):
 650 |                     tagstats[nodetype][(tag, 'node')] += 1
 651 |                 else:
 652 |                     if value >= id0.maxnode:
 653 |                         value -= pow(0x100, id0.wordsize)
 654 |                     tagstats[nodetype][(tag, value)] += 1
 655 |             else:
 656 |                 tagstats[nodetype][(tag, 'string')] += 1
 657 |         else:
 658 |             print("??? strange, expected shorter key - %s" % k)
 659 |             return
 660 | 
 661 |     def isaddress(addr):
 662 |         for s in segs:
 663 |             if s.startea <= addr < s.startea+s.size:
 664 |                 return True
 665 | 
 666 |     def isnode(addr):
 667 |         return id0.nodebase <= addr <= id0.maxnode
 668 | 
 669 |     def processbitfieldvalue(v):
 670 |         nodetype[v._nodeid] = 'bitfieldvalue'
 671 | 
 672 |     def processbitfieldmask(m):
 673 |         nodetype[m._nodeid] = 'bitfieldmask'
 674 | 
 675 |         for m in m:
 676 |             processbitfieldvalue(m)
 677 | 
 678 |     def processbitfield(id0, node):
 679 |         nodetype[node] = 'bitfield'
 680 | 
 681 |         b = idblib.Bitfield(id0, node)
 682 |         for m in b:
 683 |             processbitfieldmask(m)
 684 | 
 685 | 
 686 |     def processenummember(m):
 687 |         nodetype[m._nodeid] = 'enummember'
 688 | 
 689 |     def processenums(id0, node):
 690 |         nodetype[node] = 'enum'
 691 | 
 692 |         e = idblib.Enum(id0, node)
 693 |         if e.flags&1:
 694 |             processbitfield(id0, node)
 695 |             return
 696 | 
 697 |         for m in e:
 698 |             processenummember(m)
 699 | 
 700 |     def processstructmember(m, typename):
 701 |         nodetype[m._nodeid] = typename
 702 | 
 703 |     def processstructs(id0, node, typename):
 704 |         nodetype[node] = typename
 705 |         s = idblib.Struct(id0, node)
 706 | 
 707 |         for m in s:
 708 |             processstructmember(m, typename+"member")
 709 | 
 710 |     def processscripts(id0, node):
 711 |         nodetype[node] = 'script'
 712 | 
 713 |     def processaddr(id0, cur):
 714 |         k = id0.decodekey(cur.getkey())
 715 |         if len(k)==4 and k[2:4] == (b'A', 2):
 716 |             nodetype[id0.int(cur)-1] = 'hexrays'
 717 | 
 718 |         addstat('addr', k)
 719 | 
 720 |     def processfunc(id0, funcspec):
 721 |         p = idblib.IdaUnpacker(id0.wordsize, funcspec)
 722 | 
 723 |         funcstart = p.nextword()
 724 |         funcsize = p.nextword()
 725 |         flags = p.next16()
 726 |         if flags is None:
 727 |             return
 728 |         if flags&0x8000:   # is tail
 729 |             return
 730 | 
 731 |         node = p.nextword()
 732 | 
 733 |         if node<0xFFFFFF and node!=0:
 734 |             processstructs(id0, node + id0.nodebase, "frame")
 735 | 
 736 |     def processimport(id0, node):
 737 |         print("imp %08x" % node)
 738 |         startkey = id0.makekey(node+1, 'A')
 739 |         endkey = id0.makekey(node+1, 'B')
 740 |         cur = id0.btree.find('ge', startkey)
 741 |         while cur.getkey() < endkey:
 742 |             dllnode = id0.int(cur)
 743 |             nodetype[dllnode] = 'import'
 744 |             cur.next()
 745 | 
 746 | 
 747 |     # mark enums, structs, scripts.
 748 |     enumlist(id0, '$ enums', processenums)
 749 |     enumlist(id0, '$ structs', lambda id0, node : processstructs(id0, node, "struct"))
 750 |     enumlist(id0, '$ scriptsnippets', processscripts)
 751 |     enumlist(id0, '$ imports', processimport)
 752 | 
 753 |     # enum functions, scan for stackframes
 754 |     funcsnode = id0.nodeByName('$ funcs')
 755 |     startkey = id0.makekey(funcsnode, 'S')
 756 |     endkey = id0.makekey(funcsnode, 'T')
 757 |     cur = id0.btree.find('ge', startkey)
 758 |     while cur.getkey() < endkey:
 759 |         processfunc(id0, cur.getval())
 760 |         cur.next()
 761 | 
 762 |     clinode = id0.nodeByName('$ cli')
 763 |     if clinode:
 764 |         for letter in "ABCDEFGHIJKMcio":
 765 |             startkey = id0.makekey(clinode, letter)
 766 |             endkey = id0.makekey(clinode, chr(ord(letter)+1))
 767 |             cur = id0.btree.find('ge', startkey)
 768 |             while cur.getkey() < endkey:
 769 |                 nodetype[id0.int(cur)] = 'cli.'+letter
 770 |                 cur.next()
 771 | 
 772 | 
 773 |     # enum addresses, scan for hex-rays nodes
 774 |     startkey = b'.'
 775 |     endkey = id0.makekey(id0.nodebase)
 776 |     cur = id0.btree.find('ge', startkey)
 777 |     while cur.getkey() < endkey:
 778 |         processaddr(id0, cur)
 779 |         cur.next()
 780 | 
 781 |     # addresses above node list
 782 |     startkey = id0.makekey(id0.maxnode+1)
 783 |     endkey = b'/'
 784 |     cur = id0.btree.find('ge', startkey)
 785 |     while cur.getkey() < endkey:
 786 |         processaddr(id0, cur)
 787 |         cur.next()
 788 | 
 789 |     # scan for unmarked nodes
 790 |     #  $ fr[0-9a-f]+\.\w+
 791 |     #  $ fr[0-9a-f]+\. [rs]
 792 |     #  $ F[0-9A-F]+\.\w+
 793 |     #  $ Stack of \w+
 794 |     #  Stack[0000007C]
 795 |     #  xrefs to \w+
 796 | 
 797 |     startkey = id0.makekey(id0.nodebase)
 798 |     endkey = id0.makekey(id0.maxnode+1)
 799 |     cur = id0.btree.find('ge', startkey)
 800 |     while cur.getkey() < endkey:
 801 |         k = id0.decodekey(cur.getkey())
 802 |         node = k[1]
 803 |         if node not in nodetype:
 804 |             nodetype[node] = "unknown"
 805 |         if nodetype[node] == "unknown" and k[2] == b'N':
 806 |             name = cur.getval().rstrip(b'\x00')
 807 |             if re.match(br'\$ fr[0-9a-f]+\.\w+$', name):
 808 |                 name = 'fr-type-functionframe'
 809 |             elif re.match(br'\$ fr[0-9a-f]+\. [rs]$', name):
 810 |                 name = 'fr-type-functionframe'
 811 |             elif re.match(br'\$ F[0-9A-F]+\.\w+$', name):
 812 |                 name = 'F-type-functionframe'
 813 |             elif name.startswith(b'Stack of '):
 814 |                 name = 'stack-type-functionframe'
 815 |             elif name.startswith(b'Stack['):
 816 |                 name = 'old-stack-type-functionframe'
 817 |             elif name.startswith(b'xrefs to '):
 818 |                 name = 'old-xrefs'
 819 |             else:
 820 |                 name = name.decode('utf-8', 'ignore')
 821 |             nodetype[node] = name
 822 | 
 823 |         cur.next()
 824 | 
 825 |     # output node classification
 826 |     if args.verbose:
 827 |         for k, v in sorted(nodetype.items(), key=lambda kv:kv[0]):
 828 |             print("%08x: %s" % (k, v))
 829 | 
 830 |     # summarize tags per nodetype
 831 |     startkey = id0.makekey(id0.nodebase)
 832 |     endkey = id0.makekey(id0.maxnode+1)
 833 |     cur = id0.btree.find('ge', startkey)
 834 |     while cur.getkey() < endkey:
 835 |         k = id0.decodekey(cur.getkey())
 836 |         node = k[1]
 837 |         nt = nodetype[node]
 838 | 
 839 |         addstat(nt, k)
 840 | 
 841 |         cur.next()
 842 | 
 843 |     # output tag statistics
 844 |     for nt, ntstats in sorted(tagstats.items(), key=lambda kv:kv[0]):
 845 |         print("====== %s =====" % nt)
 846 |         for k, v in ntstats.items():
 847 |             if len(k)==1:
 848 |                 print("%5d - %s" % (v, k[0]))
 849 |             elif len(k)==2 and type(k[1])==type(1):
 850 |                 print("%5d - %s %8x" % (v, k[0], k[1]))
 851 |             elif type(k[1])==type(1):
 852 |                 print("%5d - %s %8x %s" % (v, k[0], k[1], k[2:]))
 853 |             else:
 854 |                 print("%5d - %s %s %s" % (v, k[0], k[1], k[2:]))
 855 | 
 856 | 
 857 | def processid0(args, id0):
 858 |     if args.info:
 859 |         dumpinfo(id0)
 860 | 
 861 |     if args.pagedump:
 862 |         id0.btree.pagedump()
 863 | 
 864 |     if args.query:
 865 |         for query in args.query:
 866 |             id0query(args, id0, query)
 867 |     elif args.id0:
 868 |         id0.btree.dump()
 869 |     elif args.inc:
 870 |         c = id0.btree.find('ge', b'')
 871 |         enumeratecursor(args, c, False, lambda c:printent(args, id0, c))
 872 |     elif args.dec:
 873 |         c = id0.btree.find('le', b'\x80')
 874 |         enumeratecursor(args, c, False, lambda c:printent(args, id0, c))
 875 | 
 876 | 
 877 | def hexascdumprange(id1, a, b):
 878 |     line = asc = ""
 879 |     for ea in range(a, b):
 880 |         if len(line)==0:
 881 |             line = "%08x:" % ea
 882 |         byte = id1.getFlags(ea)&0xFF
 883 |         line += " %02x" % byte
 884 |         asc += chr(byte) if 32<byte<127 else '.'
 885 | 
 886 |         if len(line) == 9 + 3*16:
 887 |             line += " " + asc
 888 |             print(line)
 889 |             line = asc = ""
 890 |     if len(line):
 891 |         while len(line) < 9 + 3*16:
 892 |             line += "   "
 893 |         line += " " + asc
 894 |         print(line)
 895 | 
 896 | 
 897 | def saverange(id1, a, b, fh):
 898 |     buf = bytes()
 899 |     for ea in range(a, b):
 900 |         byte = id1.getFlags(ea)&0xFF
 901 |         buf += struct.pack("B", byte)
 902 | 
 903 |         if len(buf) == 65536:
 904 |             fh.write(buf)
 905 |             buf = bytes()
 906 | 
 907 |     if buf:
 908 |         fh.write(buf)
 909 | 
 910 | 
 911 | def processid1(args, id1):
 912 |     if args.id1:
 913 |         id1.dump()
 914 |     elif args.dump or args.dumpraw:
 915 |         m = re.match(r'^(\d\w*)-(\d\w*)?$', args.dump or args.dumpraw)
 916 |         if not m:
 917 |             raise Exception("--dump requires a byte range")
 918 |         a = int(m.group(1), 0)
 919 |         b = int(m.group(2), 0)
 920 | 
 921 |         if args.dumpraw:
 922 |             saverange(id1, a, b, stdout)
 923 |         else:
 924 |             hexascdumprange(id1, a, b)
 925 | 
 926 | 
 927 | def processid2(args, id2):
 928 |     pass
 929 | 
 930 | 
 931 | def processnam(args, nam):
 932 |     pass
 933 | 
 934 | 
 935 | def processtil(args, til):
 936 |     pass
 937 | 
 938 | 
 939 | def processseg(args, seg):
 940 |     pass
 941 | 
 942 | 
 943 | def processidb(args, idb):
 944 |     if args.verbose > 1:
 945 |         print("magic=%s, filever=%d" % (idb.magic, idb.fileversion))
 946 |         for i in range(6):
 947 |             comp, ofs, size, checksum = idb.getsectioninfo(i)
 948 |             if ofs:
 949 |                 part = idb.getpart(i)
 950 |                 print("%2d: %02x, %08x %8x [%08x]:  %s" % (i, comp, ofs, size, checksum, hexdump(part.read(256))))
 951 | 
 952 |     nam = idb.getsection(idblib.NAMFile)
 953 |     id0 = idb.getsection(idblib.ID0File)
 954 |     id1 = idb.getsection(idblib.ID1File)
 955 |     processid0(args, id0)
 956 |     processid1(args, id1)
 957 |     processid2(args, idb.getsection(idblib.ID2File))
 958 |     processnam(args, nam)
 959 |     processtil(args, idb.getsection(idblib.TILFile))
 960 |     processseg(args, idb.getsection(idblib.SEGFile))
 961 | 
 962 |     if args.names:
 963 |         dumpnames(args, id0, nam)
 964 |     if args.classify:
 965 |         classifynodes(args, id0)
 966 | 
 967 |     if args.scripts:
 968 |         enumlist(id0, '$ scriptsnippets', dumpscript)
 969 |     if args.structs:
 970 |         enumlist(id0, '$ structs', dumpstruct)
 971 |     if args.enums:
 972 |         enumlist(id0, '$ enums', dumpenum)
 973 |     if args.funcdirs:
 974 |         listfuncdirs(id0)
 975 |     if args.imports:
 976 |         enumlist(id0, '$ imports', dumpimport)
 977 |     if args.segs:
 978 |         listsegments(id0)
 979 | 
 980 | 
 981 | def processfile(args, filetypehint, fh):
 982 |     class DummyIDB:
 983 |         def __init__(idb, args):
 984 |             if args.i64:
 985 |                 idb.magic = 'IDA2'
 986 |             elif args.i32:
 987 |                 idb.magic = 'IDA1'
 988 |             else:
 989 |                 idb.magic = None
 990 | 
 991 |     try:
 992 |         magic = fh.read(64)
 993 |         fh.seek(-64, 1)
 994 |         if magic.startswith(b"Va") or magic.startswith(b"VA"):
 995 |             idb = DummyIDB(args)
 996 |             if filetypehint == 'id1':
 997 |                 processid1(args, idblib.ID1File(idb, fh))
 998 |             elif filetypehint == 'nam':
 999 |                 processnam(args, idblib.NAMFile(idb, fh))
1000 |             elif filetypehint == 'seg':
1001 |                 processseg(args, idblib.SEGFile(idb, fh))
1002 |             else:
1003 |                 print("unknown VA type file: %s" % hexdump(magic))
1004 |         elif magic.startswith(b"IDAS"):
1005 |             processid2(args, idblib.ID2File(DummyIDB(args), fh))
1006 |         elif magic.startswith(b"IDATIL"):
1007 |             processtil(args, idblib.ID2File(DummyIDB(args), fh))
1008 |         elif magic.startswith(b"IDA"):
1009 |             processidb(args, idblib.IDBFile(fh))
1010 |         elif magic.find(b'B-tree v') > 0:
1011 |             processid0(args, idblib.ID0File(DummyIDB(args), fh))
1012 | 
1013 |     except Exception as e:
1014 |         print("ERROR %s" % e)
1015 |         if args.debug:
1016 |             raise
1017 | 
1018 | 
1019 | def recover_database(args, basepath, dbfiles):
1020 |     processidb(args, idblib.RecoverIDBFile(args, basepath, dbfiles))
1021 | 
1022 | 
1023 | def DirEnumerator(args, path):
1024 |     """
1025 |     Enumerate all files / links in a directory,
1026 |     optionally recursing into subdirectories,
1027 |     or ignoring links.
1028 |     """
1029 |     for d in os.scandir(path):
1030 |         try:
1031 |             if d.name == '.' or d.name == '..':
1032 |                 pass
1033 |             elif d.is_symlink() and args.skiplinks:
1034 |                 pass
1035 |             elif d.is_file():
1036 |                 yield d.path
1037 |             elif d.is_dir() and args.recurse:
1038 |                 for f in DirEnumerator(args, d.path):
1039 |                     yield f
1040 |         except Exception as e:
1041 |             print("EXCEPTION %s accessing %s/%s" % (e, path, d.name))
1042 | 
1043 | 
1044 | def EnumeratePaths(args, paths):
1045 |     """
1046 |     Enumerate all paths, files from the commandline
1047 |     optionally recursing into subdirectories.
1048 |     """
1049 |     for fn in paths:
1050 |         try:
1051 |             # 3 - for ftp://, 4 for http://, 5 for https://
1052 |             if fn.find("://") in (3, 4, 5):
1053 |                 yield fn
1054 |             if os.path.islink(fn) and args.skiplinks:
1055 |                 pass
1056 |             elif os.path.isdir(fn) and args.recurse:
1057 |                 for f in DirEnumerator(args, fn):
1058 |                     yield f
1059 |             elif os.path.isfile(fn):
1060 |                 yield fn
1061 |         except Exception as e:
1062 |             print("EXCEPTION %s accessing %s" % (e, fn))
1063 | 
1064 | 
1065 | def filetype_from_name(fn):
1066 |     i = max(fn.rfind('.'), fn.rfind('/'))
1067 |     return fn[i + 1:].lower()
1068 | 
1069 | 
1070 | def isv2name(name):
1071 |     return name.lower() in ('$segregs.ida', '$segs.ida', '0.ida', '1.ida', 'ida.idl', 'names.ida')
1072 | 
1073 | 
1074 | def isv3ext(ext):
1075 |     return ext.lower() in ('.id0', '.id1', '.id2', '.nam', '.til')
1076 | 
1077 | 
1078 | def xlatv2name(name):
1079 |     oldnames = {
1080 |         '$segregs.ida': 'reg',
1081 |         '$segs.ida': 'seg',
1082 |         '0.ida': 'id0',
1083 |         '1.ida': 'id1',
1084 |         'ida.idl': 'idl',
1085 |         'names.ida': 'nam',
1086 |     }
1087 | 
1088 |     return oldnames.get(name.lower())
1089 | 
1090 | 
1091 | def main():
1092 |     parser = argparse.ArgumentParser(description='idbtool - print info from hex-rays IDA .idb and .i64 files',
1093 |                                      formatter_class=argparse.RawDescriptionHelpFormatter,
1094 |                                      epilog="""
1095 | idbtool can process complete .idb and .i64 files, but also naked .id0, .id1, .nam, .til files.
1096 | All versions since IDA v2.0 are supported.
1097 | 
1098 | Queries start with an optional operator: <,<=,>,>=,==.
1099 | Followed by either a name or address or nodeid.
1100 | Addresses are specified as a sequence of hexadecimal charaters.
1101 | Nodeid's may be specified either as the full node id, starting with ff00,
1102 | or starting with a '_'.
1103 | Names are anything which can be found under the name tree in the database.
1104 | 
1105 | After the name/addr/node there is optionally a slash, followed by a node tag,
1106 | and another slash, followed by a index or hash string.
1107 | 
1108 | Multiple queries can be specified, terminated by another option, or `--`.
1109 | Add `-v` for pretty printed keys and values.
1110 | 
1111 | Examples:
1112 | 
1113 |   idbtool -v --query "$ user1;S;0" -- x.idb
1114 |   idbtool -v --limit 4 --query ">#0xa" -- x.idb
1115 |   idbtool -v --limit 5 --query ">Root Node;S;0" -- x.idb
1116 |   idbtool -v --limit 10 --query ">Root Node;S" -- x.idb
1117 |   idbtool -v --query ".0xff000001;N" -- x.idb
1118 | """)
1119 |     parser.add_argument('--verbose', '-v', action='count', default=0)
1120 |     parser.add_argument('--recurse', '-r', action='store_true', help='recurse into directories')
1121 |     parser.add_argument('--skiplinks', '-L', action='store_true', help='skip symbolic links')
1122 |     parser.add_argument('--filetype', '-t', type=str, help='specify filetype when loading `naked` id1,nam or seg files')
1123 |     parser.add_argument('--i64', '-i64', action='store_true', help='specify that `naked` file is from a 64 bit database')
1124 |     parser.add_argument('--i32', '-i32', action='store_true', help='specify that `naked` file is from a 32 bit database')
1125 | 
1126 |     parser.add_argument('--names', '-n', action='store_true', help='print names')
1127 |     parser.add_argument('--scripts', '-s', action='store_true', help='print scripts')
1128 |     parser.add_argument('--structs', '-u', action='store_true', help='print structs')
1129 |     # parser.add_argument('--comments', '-c', action='store_true', help='print comments')
1130 |     parser.add_argument('--enums', '-e', action='store_true', help='print enums and bitfields')
1131 |     parser.add_argument('--imports', action='store_true', help='print imports')
1132 |     parser.add_argument('--segs', action='store_true', help='print segments')
1133 |     parser.add_argument('--funcdirs', action='store_true', help='print function dirs (folders)')
1134 |     parser.add_argument('--info', '-i', action='store_true', help='database info')
1135 |     parser.add_argument('--inc', action='store_true', help='dump id0 records by cursor increment')
1136 |     parser.add_argument('--dec', action='store_true', help='dump id0 records by cursor decrement')
1137 |     parser.add_argument('--id0', "-id0", action='store_true', help='dump id0 records, by walking the page tree')
1138 |     parser.add_argument('--id1', "-id1", action='store_true', help='dump id1 records')
1139 |     parser.add_argument('--dump', type=str, help='hexdump id1 bytes', metavar='FROM-UNTIL')
1140 |     parser.add_argument('--dumpraw', type=str, help='output id1 bytes', metavar='FROM-UNTIL')
1141 |     parser.add_argument('--pagedump', "-d", action='store_true', help='dump all btree pages, including any that might have become inaccessible due to datacorruption.')
1142 |     parser.add_argument('--classify', action='store_true', help='Classify nodes found in the database.')
1143 | 
1144 |     parser.add_argument('--query', "-q", type=str, nargs='*', help='search the id0 file for a specific record.')
1145 |     parser.add_argument('--limit', '-m', type=int, help='Max nr of records to return for a query.')
1146 | 
1147 |     parser.add_argument('--recover', action='store_true', help='recover idb from unpacked files, of v2 database')
1148 |     parser.add_argument('--debug', action='store_true')
1149 | 
1150 |     parser.add_argument('FILES', type=str, nargs='*', help='Files')
1151 | 
1152 |     args = parser.parse_args()
1153 | 
1154 |     if args.FILES:
1155 |         dbs = dict()
1156 | 
1157 |         for fn in EnumeratePaths(args, args.FILES):
1158 |             basepath, filename = os.path.split(fn)
1159 |             if isv2name(filename):
1160 |                 d = dbs.setdefault(basepath, dict())
1161 |                 d[xlatv2name(filename)] = fn
1162 |                 print("%s -> %s : %s" % (xlatv2name(filename), basepath, filename))
1163 |             else:
1164 |                 basepath, ext = os.path.splitext(fn)
1165 |                 if isv3ext(ext):
1166 |                     d = dbs.setdefault(basepath, dict())
1167 |                     d[ext.lower()] = fn
1168 | 
1169 |             if not args.dumpraw:
1170 |                 print("\n==> " + fn + " <==\n")
1171 | 
1172 |             try:
1173 |                 filetype = args.filetype or filetype_from_name(fn)
1174 |                 with open(fn, "rb") as fh:
1175 |                     processfile(args, filetype, fh)
1176 |             except Exception as e:
1177 |                 print("ERROR: %s" % e)
1178 |                 if args.debug:
1179 |                     raise
1180 | 
1181 |         if args.recover:
1182 |             for basepath, dbfiles in dbs.items():
1183 |                 if len(dbfiles) > 1:
1184 |                     try:
1185 |                         print("\n==> " + basepath + " <==\n")
1186 |                         recover_database(args, basepath, dbfiles)
1187 |                     except Exception as e:
1188 |                         print("ERROR: %s" % e)
1189 |     else:
1190 |         print("==> STDIN <==")
1191 |         processfile(args, args.filetype, sys.stdin.buffer)
1192 | 
1193 | 
1194 | if __name__ == '__main__':
1195 |     main()
1196 | 


--------------------------------------------------------------------------------
/idblib.py:
--------------------------------------------------------------------------------
   1 | """
   2 | idblib - a module for reading hex-rays Interactive DisAssembler databases
   3 | 
   4 | Supports database versions starting with IDA v2.0
   5 | 
   6 | IDA v1.x  is not supported, that was an entirely different file format.
   7 | IDA v2.x  databases are organised as several files, in a directory
   8 | IDA v3.x  databases are bundled into .idb files
   9 | IDA v4 .. v6  various improvements, like databases larger than 4Gig, and 64 bit support.
  10 | 
  11 | Copyright (c) 2016 Willem Hengeveld <itsme@xs4all.nl>
  12 | 
  13 | 
  14 | An IDB file can contain up to 6 sections:
  15 |     id0  the main database
  16 |     id1  contains flags for each byte - what is returned by idc.GetFlags(ea)
  17 |     nam  contains a list of addresses of named items
  18 |     seg  .. only in older databases
  19 |     til  type info
  20 |     id2  ?
  21 | 
  22 | The id0 database is a simple key/value database, much like leveldb
  23 | 
  24 | types of records:
  25 | 
  26 | Some bookkeeping:
  27 | 
  28 |     "$ MAX NODE" -> the highest numbered node value in use.
  29 | 
  30 | A list of names:
  31 | 
  32 |     "N" + name  -> the node id for that name.
  33 | 
  34 | names are both user/disassembler symbols assigned to addresses
  35 | in the disassembled code, and IDA internals, like lists of items,
  36 | For example: '$ structs', or 'Root Node'.
  37 | 
  38 | The main part:
  39 | 
  40 |     "." + nodeid + tag + index
  41 | 
  42 | This maps directly onto the idasdk netnode interface.
  43 | The size of the nodeid and index is 32bits for .idb files and 64 bits for .i64 files.
  44 | The nodeid and index are encoded as bigendian numbers in the key, and as little endian
  45 | numbers in (most of) the values.
  46 | 
  47 | 
  48 | """
  49 | from __future__ import division, print_function, absolute_import, unicode_literals
  50 | import struct
  51 | import binascii
  52 | import re
  53 | import os
  54 | 
  55 | #############################################################################
  56 | # some code to make this library run with both python2 and python3
  57 | #############################################################################
  58 | 
  59 | import sys
  60 | if sys.version_info[0] == 3:
  61 |     long = int
  62 | else:
  63 |     bytes = bytearray
  64 | 
  65 | try:
  66 |     cmp(1, 2)
  67 | except:
  68 |     # python3 does not have cmp
  69 |     def cmp(a, b): return (a > b) - (a < b)
  70 | 
  71 | 
  72 | class cachedproperty(object):
  73 |     ## .. only works with python3 somehow. -- todo: figure out why not with python2
  74 |     def __init__(self, method):
  75 |         self.method = method
  76 |         self.name = '_' + method.__name__
  77 |     def __get__(self, obj, cls):
  78 |         if not hasattr(obj, self.name):
  79 |             value = self.method(obj)
  80 |             setattr(obj, self.name, value)
  81 |         else:
  82 |             value = getattr(obj, self.name)
  83 |         return value
  84 | 
  85 | 
  86 | def strz(b, o):
  87 |     return b[o:b.find(b'\x00', o)].decode('utf-8', 'ignore')
  88 | 
  89 | def makeStringIO(data):
  90 |     if sys.version_info[0] == 2:
  91 |         from StringIO import StringIO
  92 |         return StringIO(data)
  93 |     else:
  94 |         from io import BytesIO
  95 |         return BytesIO(data)
  96 | 
  97 | 
  98 | #############################################################################
  99 | # some utility functions
 100 | #############################################################################
 101 | 
 102 | 
 103 | def nonefmt(fmt, item):
 104 |     # helper for outputting None without raising an error
 105 |     if item is None:
 106 |         return "-"
 107 |     return fmt % item
 108 | 
 109 | 
 110 | def hexdump(data):
 111 |     if data is None:
 112 |         return
 113 |     return binascii.b2a_hex(data).decode('utf-8')
 114 | 
 115 | 
 116 | #############################################################################
 117 | 
 118 | 
 119 | class FileSection(object):
 120 |     """
 121 |     Presents a file like object which is a section of a larger file.
 122 | 
 123 |     `fh` is expected to have a seek and read method.
 124 | 
 125 | 
 126 |     This class is used to access a section (e.g. the .id0 file) of a larger file (e.g. the .idb file)
 127 |     and make read/seek behave as if it were a separate file.
 128 |     """
 129 |     def __init__(self, fh, start, end):
 130 |         self.fh = fh
 131 |         self.start = start
 132 |         self.end = end
 133 | 
 134 |         self.curpos = 0
 135 |         self.fh.seek(self.start)
 136 | 
 137 |     def read(self, size=None):
 138 |         want = self.end - self.start - self.curpos
 139 |         if size is not None and want > size:
 140 |             want = size
 141 | 
 142 |         if want <= 0:
 143 |             return b""
 144 | 
 145 |         # make sure filepointer is at correct position since we are sharing the fh object with others.
 146 |         self.fh.seek(self.curpos + self.start)
 147 |         data = self.fh.read(want)
 148 |         self.curpos += len(data)
 149 |         return data
 150 | 
 151 |     def seek(self, offset, *args):
 152 |         def isvalidpos(offset):
 153 |             return 0 <= offset <= self.end - self.start
 154 | 
 155 |         if len(args) == 0:
 156 |             whence = 0
 157 |         else:
 158 |             whence = args[0]
 159 |         if whence == 0:
 160 |             if not isvalidpos(offset):
 161 |                 print("invalid seek: from %x to SET:%x" % (self.curpos, offset))
 162 |                 raise Exception("illegal offset")
 163 |             self.curpos = offset
 164 |         elif whence == 1:
 165 |             if not isvalidpos(self.curpos + offset):
 166 |                 raise Exception("illegal offset")
 167 |             self.curpos += offset
 168 |         elif whence == 2:
 169 |             if not isvalidpos(self.end - self.start + offset):
 170 |                 raise Exception("illegal offset")
 171 |             self.curpos = self.end - self.start + offset
 172 |         self.fh.seek(self.curpos + self.start)
 173 | 
 174 |     def tell(self):
 175 |         return self.curpos
 176 | 
 177 | 
 178 | class IdaUnpacker:
 179 |     """
 180 |     Decodes packed ida structures.
 181 |     This is used o.a. in struct definitions, and .id2 files
 182 | 
 183 |     Related sdk functions: pack_dd, unpack_dd, etc.
 184 |     """
 185 |     def __init__(self, wordsize, data):
 186 |         self.wordsize = wordsize
 187 |         self.data = data
 188 |         self.o = 0
 189 | 
 190 |     def eof(self):
 191 |         return self.o >= len(self.data)
 192 |     def have(self, n):
 193 |         return self.o+n <= len(self.data)
 194 | 
 195 |     def nextword(self):
 196 |         """
 197 |         Return an unsigned word-sized integer from the buffer
 198 |         """
 199 |         if self.wordsize == 4:
 200 |             return self.next32()
 201 |         elif self.wordsize == 8:
 202 |             return self.next64()
 203 |         else:
 204 |             raise Exception("unsupported wordsize")
 205 | 
 206 |     def nextwordsigned(self):
 207 |         """
 208 |         Return a signed word-sized integer from the buffer
 209 |         """
 210 |         if self.wordsize == 4:
 211 |             val = self.next32()
 212 |             if val < 0x80000000:
 213 |                 return val
 214 |             return val - 0x100000000
 215 |         elif self.wordsize == 8:
 216 |             val = self.next64()
 217 |             if val < 0x8000000000000000:
 218 |                 return val
 219 |             return val - 0x10000000000000000
 220 |         else:
 221 |             raise Exception("unsupported wordsize")
 222 | 
 223 | 
 224 |     def next64(self):
 225 |         if self.eof():
 226 |             return None
 227 |         lo = self.next32()
 228 |         hi = self.next32()
 229 |         return (hi<<32) | lo
 230 | 
 231 |     def next16(self):
 232 |         """
 233 |         Return a packed 16 bit integer from the buffer
 234 |         """
 235 |         if self.eof():
 236 |             return None
 237 |         byte = self.data[self.o:self.o+1]
 238 |         if byte == b'\xff':
 239 |             # a 16 bit value:
 240 |             # 1111 1111 xxxx xxxx xxxx xxxx 
 241 |             if self.o+3 > len(self.data):
 242 |                 return None
 243 |             val, = struct.unpack_from(">H", self.data, self.o+1)
 244 |             self.o += 3
 245 |             return val
 246 |         elif byte < b'\x80':
 247 |             # a 7 bit value:
 248 |             # 0xxx xxxx
 249 |             self.o += 1
 250 |             val, = struct.unpack("B", byte)
 251 |             return val
 252 |         elif byte < b'\xc0':
 253 |             # a 14 bit value:
 254 |             # 10xx xxxx xxxx xxxx
 255 |             if self.o+2 > len(self.data):
 256 |                 return None
 257 |             val, = struct.unpack_from(">H", self.data, self.o)
 258 |             self.o += 2
 259 |             return val&0x3FFF
 260 |         else:
 261 |             return None
 262 | 
 263 |     def next8(self):
 264 |         if self.eof():
 265 |             return None
 266 |         byte = self.data[self.o:self.o+1]
 267 |         self.o += 1
 268 |         val, = struct.unpack("B", byte)
 269 | 
 270 |         return val
 271 | 
 272 |     def next32(self):
 273 |         """
 274 |         Return a packed integer from the buffer
 275 |         """
 276 |         if self.eof():
 277 |             return None
 278 |         byte = self.data[self.o:self.o+1]
 279 |         if byte == b'\xff':
 280 |             # a 32 bit value:
 281 |             # 1111 1111 xxxx xxxx xxxx xxxx xxxx xxxx xxxx xxxx
 282 |             if self.o+5 > len(self.data):
 283 |                 return None
 284 |             val, = struct.unpack_from(">L", self.data, self.o+1)
 285 |             self.o += 5
 286 |             return val
 287 |         elif byte < b'\x80':
 288 |             # a 7 bit value:
 289 |             # 0xxx xxxx
 290 |             self.o += 1
 291 |             val, = struct.unpack("B", byte)
 292 |             return val
 293 |         elif byte < b'\xc0':
 294 |             # a 14 bit value:
 295 |             # 10xx xxxx xxxx xxxx
 296 |             if self.o+2 > len(self.data):
 297 |                 return None
 298 |             val, = struct.unpack_from(">H", self.data, self.o)
 299 |             self.o += 2
 300 |             return val&0x3FFF
 301 |         elif byte < b'\xe0':
 302 |             # a 29 bit value:
 303 |             # 110x xxxx xxxx xxxx xxxx xxxx xxxx xxxx
 304 |             if self.o+4 > len(self.data):
 305 |                 return None
 306 |             val, = struct.unpack_from(">L", self.data, self.o)
 307 |             self.o += 4
 308 |             return val&0x1FFFFFFF
 309 |         else:
 310 |             return None
 311 | 
 312 |     def bytes(self, n):
 313 |         """
 314 |         Return fixed length string from buffer
 315 |         """
 316 |         if not self.have(n):
 317 |             return None
 318 |         data = self.data[self.o : self.o+n]
 319 |         self.o += n
 320 |         return data
 321 | 
 322 | 
 323 | class IDBFile(object):
 324 |     """
 325 |     Provide access to the various sections in an .idb file.
 326 | 
 327 |     Usage:
 328 | 
 329 |     idb = IDBFile(fhandle)
 330 |     id0 = idb.getsection(ID0File)
 331 | 
 332 |     ID0File is expected to have a class property 'INDEX'
 333 | 
 334 | # v1..v5  id1 and nam files start with 'Va0' .. 'Va4'
 335 | # v6      id1 and nam files start with 'VA*'
 336 | # til files start with 'IDATIL'
 337 | # id2 files start with 'IDAS\x1d\xa5\x55\x55'
 338 | 
 339 |     """
 340 |     def __init__(self, fh):
 341 |         """ constructor takes a filehandle """
 342 |         self.fh = fh
 343 |         self.fh.seek(0)
 344 |         hdrdata = self.fh.read(0x100)
 345 | 
 346 |         self.magic = hdrdata[0:4].decode('utf-8', 'ignore')
 347 |         if self.magic not in ('IDA0', 'IDA1', 'IDA2'):
 348 |             raise Exception("invalid file magic")
 349 | 
 350 |         values = struct.unpack_from("<6LH6L", hdrdata, 6)
 351 |         if values[5] != 0xaabbccdd:
 352 |             fileversion = 0
 353 |             offsets = list(values[0:5])
 354 |             offsets.append(0)
 355 |             checksums = [0 for _ in range(6)]
 356 |         else:
 357 |             fileversion = values[6]
 358 | 
 359 |             if fileversion < 5:
 360 |                 offsets = list(values[0:5])
 361 |                 checksums = list(values[8:13])
 362 |                 idsofs, idscheck = struct.unpack_from("<LH" if fileversion == 1 else "<LL", hdrdata, 56)
 363 |                 offsets.append(idsofs)
 364 |                 checksums.append(idscheck)
 365 | 
 366 |                 # note: filever 4  has '0x5c', zeros, md5, more zeroes
 367 |             elif fileversion == 6:
 368 |                 values = struct.unpack_from("<QQLLHQQQ5LQL", hdrdata, 6)
 369 |                 offsets = [values[_] for _ in (0, 1, 5, 6, 7, 13)]
 370 |                 checksums = [values[_] for _ in (8, 9, 10, 11, 12, 14)]
 371 |             elif fileversion == 910:
 372 |                 """
 373 |                 +00: "IDA2", 0, 0
 374 |                 +06: headersize
 375 |                 +0e: datastart
 376 |                 +16: aabbccdd00000000
 377 |                 +1e: version
 378 |                 +20: compression
 379 |                 +21: 6 qwords   section-size
 380 |                 +5d: md5
 381 |                 """
 382 |                 values = struct.unpack_from("<3QHB6Q", hdrdata, 6)
 383 |                 offsets = [values[1]]
 384 |                 self.sizes = values[5:]
 385 |                 
 386 |                 for s in self.sizes:
 387 |                     offsets.append(offsets[-1]+s)
 388 |                 checksums = [0] * len(offsets)
 389 |                 self.compression = values[4]
 390 |                 if self.compression:
 391 |                     raise Exception("compression not supported for v910")
 392 |             else:
 393 |                 raise Exception("unknown file version")
 394 | 
 395 |         # offsets now has offsets to the various idb parts
 396 |         #  id0, id1, nam, seg, til, id2 ( = sparse file )
 397 |         self.offsets = offsets
 398 |         self.checksums = checksums
 399 |         self.fileversion = fileversion
 400 | 
 401 |     def getsectioninfo(self, i):
 402 |         """
 403 |         Returns a tuple with section parameters by index.
 404 | 
 405 |         The parameteres are:
 406 |          * compression flag
 407 |          * data offset
 408 |          * data size
 409 |          * data checksum
 410 | 
 411 |         Sections are stored in a fixed order: id0, id1, nam, seg, til, id2
 412 |         """
 413 |         if not 0 <= i < len(self.offsets):
 414 |             return 0, 0, 0, 0
 415 | 
 416 |         if self.offsets[i] == 0:
 417 |             return 0, 0, 0, 0
 418 | 
 419 |         self.fh.seek(self.offsets[i])
 420 |         if self.fileversion < 5:
 421 |             comp, size = struct.unpack("<BL", self.fh.read(5))
 422 |             ofs = self.offsets[i] + 5
 423 |         elif self.fileversion == 6:
 424 |             comp, size = struct.unpack("<BQ", self.fh.read(9))
 425 |             ofs = self.offsets[i] + 9
 426 |         elif self.fileversion == 910:
 427 |             comp = 0
 428 |             size = self.sizes[i]
 429 |             ofs = self.offsets[i]
 430 |         else:
 431 |             raise Exception("unhandled file version")
 432 |         return comp, ofs, size, self.checksums[i]
 433 | 
 434 |     def getpart(self, ix):
 435 |         """
 436 |         Returns a fileobject for the specified section.
 437 | 
 438 |         This method optionally decompresses the data found in the .idb file,
 439 |         and returns a file-like object, with seek, read, tell.
 440 |         """
 441 |         if self.offsets[ix] == 0:
 442 |             return
 443 | 
 444 |         comp, ofs, size, checksum = self.getsectioninfo(ix)
 445 | 
 446 |         fh = FileSection(self.fh, ofs, ofs + size)
 447 |         if comp == 2:
 448 |             import zlib
 449 |             # very old databases used a different compression scheme:
 450 |             wbits = -15 if self.magic == 'IDA0' else 15
 451 | 
 452 |             fh = makeStringIO(zlib.decompress(fh.read(size), wbits))
 453 |         elif comp == 0:
 454 |             pass
 455 |         else:
 456 |             raise Exception("unsupported section encoding: %02x" % comp)
 457 |         return fh
 458 | 
 459 |     def getsection(self, cls):
 460 |         """
 461 |         Constructs an object for the specified section.
 462 |         """
 463 |         return cls(self, self.getpart(cls.INDEX))
 464 | 
 465 | 
 466 | class RecoverIDBFile:
 467 |     """
 468 |     RecoverIDBFile has the same interface as IDBFile, but expects the database to be split over several files.
 469 | 
 470 |     This is useful for opening  IDAv2.x databases, or for recovering data from unclosed databases.
 471 |     """
 472 |     id2ext = ['.id0', '.id1', '.nam', '.seg', '.til', '.id2']
 473 | 
 474 |     def __init__(self, args, basepath, dbfiles):
 475 |         if args.i64:
 476 |             self.magic = 'IDA2'
 477 |         else:
 478 |             self.magic = 'IDA1'
 479 |         self.basepath = basepath
 480 |         self.dbfiles = dbfiles
 481 |         self.fileversion = 0
 482 | 
 483 |     def getsectioninfo(self, i):
 484 |         if not 0 <= i < len(self.id2ext):
 485 |             return 0, 0, 0, 0
 486 |         ext = self.id2ext[i]
 487 |         if ext not in self.dbfiles:
 488 |             return 0, 0, 0, 0
 489 |         return 0, 0, os.path.getsize(self.dbfiles[ext]), 0
 490 | 
 491 |     def getpart(self, ix):
 492 |         if not 0 <= ix < len(self.id2ext):
 493 |             return None
 494 |         ext = self.id2ext[ix]
 495 |         if ext not in self.dbfiles:
 496 |             print("can't find %s" % ext)
 497 |             return None
 498 |         return open(self.dbfiles[ext], "rb")
 499 | 
 500 |     def getsection(self, cls):
 501 |         part = self.getpart(cls.INDEX)
 502 |         if part:
 503 |             return cls(self, part)
 504 | 
 505 | 
 506 | def binary_search(a, k):
 507 |     """
 508 |     Do a binary search in an array of objects ordered by '.key'
 509 | 
 510 |     returns the largest index for which:  a[i].key <= k
 511 | 
 512 |     like c++: a.upperbound(k)--
 513 |     """
 514 |     first, last = 0, len(a)
 515 |     while first < last:
 516 |         mid = (first + last) >> 1
 517 |         if k < a[mid].key:
 518 |             last = mid
 519 |         else:
 520 |             first = mid + 1
 521 |     return first - 1
 522 | 
 523 | 
 524 | """
 525 | ################################################################################
 526 | 
 527 | I would have liked to make these classes a nested class of BTree, but
 528 | the problem is than there is no way for a nested-nested class
 529 | of BTree to refer back to a toplevel nested class of BTree.
 530 | So moving these outside of BTree so i can use them as baseclasses
 531 | in the various page implementations
 532 | 
 533 | class BTree:
 534 |     class BaseEntry(object): pass
 535 |     class BasePage(object): pass
 536 |     class Page15(BasePage):
 537 |         class Entry(BTree.BaseEntry):
 538 |             pass
 539 | 
 540 | >>> NameError: name 'BTree' is not defined
 541 | 
 542 | """
 543 | 
 544 | 
 545 | class BaseIndexEntry(object):
 546 |     """
 547 |     Baseclass for Index Entries.
 548 | 
 549 |     Index entries have a key + value, and a page containing keys larger than that key
 550 |     in this index entry.
 551 | 
 552 |     """
 553 |     def __init__(self, data):
 554 |         ofs = self.recofs
 555 |         if self.recofs < 6:
 556 |             # reading an invalid page...
 557 |             self.val = self.key = None
 558 |             return
 559 | 
 560 |         keylen, = struct.unpack_from("<H", data, ofs) ; ofs += 2
 561 |         self.key = data[ofs:ofs + keylen]  ; ofs += keylen
 562 |         vallen, = struct.unpack_from("<H", data, ofs) ; ofs += 2
 563 |         self.val = data[ofs:ofs + vallen]  ; ofs += vallen
 564 | 
 565 |     def __repr__(self):
 566 |         return "%06x: %s = %s" % (self.page, hexdump(self.key), hexdump(self.val))
 567 | 
 568 | 
 569 | class BaseLeafEntry(BaseIndexEntry):
 570 |     """
 571 |     Baseclass for Leaf Entries
 572 | 
 573 |     Leaf entries have a key + value, and an `indent`
 574 | 
 575 |     The `indent` is there to save space in the index, since subsequent keys
 576 |     usually are very similar.
 577 |     The indent specifies the offset where this key is different from the previous key
 578 |     """
 579 |     def __init__(self, key, data):
 580 |         """ leaf entries get the previous key a an argument. """
 581 |         super(BaseLeafEntry, self).__init__(data)
 582 |         self.key = key[:self.indent] + self.key
 583 | 
 584 |     def __repr__(self):
 585 |         return " %02x:%02x: %s = %s" % (self.unknown1, self.unknown, hexdump(self.key), hexdump(self.val))
 586 | 
 587 | 
 588 | class BTree(object):
 589 |     """
 590 |     BTree is the IDA main database engine.
 591 |     It allows the user to do a binary search for records with
 592 |     a specified key relation ( >, <, ==, >=, <= )
 593 |     """
 594 |     class BasePage(object):
 595 |         """
 596 |         Baseclass for Pages. for the various btree versions ( 1.5, 1.6 and 2.0 )
 597 |         there are subclasses which specify the exact layout of the page header,
 598 |         and index / leaf entries.
 599 | 
 600 |         Leaf pages don't have a 'preceeding' page pointer.
 601 | 
 602 |         """
 603 |         def __init__(self, data, entsize, entfmt):
 604 |             self.preceeding, self.count = struct.unpack_from(entfmt, data)
 605 |             if self.preceeding:
 606 |                 entrytype = self.IndexEntry
 607 |             else:
 608 |                 entrytype = self.LeafEntry
 609 | 
 610 |             self.index = []
 611 |             key = b""
 612 |             for i in range(self.count):
 613 |                 ent = entrytype(key, data, entsize * (1 + i))
 614 |                 self.index.append(ent)
 615 |                 key = ent.key
 616 |             self.unknown, self.freeptr = struct.unpack_from(entfmt, data, entsize * (1 + self.count))
 617 | 
 618 |         def find(self, key):
 619 |             """
 620 |             Searches pages for key, returns relation to key:
 621 | 
 622 |             recurse -> found a next level index page to search for key.
 623 |                        also returns the next level page nr
 624 |             gt -> found a value with a key greater than the one searched for.
 625 |             lt -> found a value with a key less than the one searched for.
 626 |             eq -> found a value with a key equal to the one searched for.
 627 |                        gt, lt and eq return the index for the key found.
 628 | 
 629 |             # for an index entry: the key is 'less' than anything in the page pointed to.
 630 |             """
 631 |             i = binary_search(self.index, key)
 632 |             if i < 0:
 633 |                 if self.isindex():
 634 |                     return ('recurse', -1)
 635 |                 return ('gt', 0)
 636 |             if self.index[i].key == key:
 637 |                 return ('eq', i)
 638 |             if self.isindex():
 639 |                 return ('recurse', i)
 640 |             return ('lt', i)
 641 | 
 642 |         def getpage(self, ix):
 643 |             """ For Indexpages, returns the page ptr for the specified entry """
 644 |             return self.preceeding if ix < 0 else self.index[ix].page
 645 | 
 646 |         def getkey(self, ix):
 647 |             """ For all page types, returns the key for the specified entry """
 648 |             return self.index[ix].key
 649 | 
 650 |         def getval(self, ix):
 651 |             """ For all page types, returns the value for the specified entry """
 652 |             return self.index[ix].val
 653 | 
 654 |         def isleaf(self):
 655 |             """ True when this is a Leaf Page """
 656 |             return self.preceeding == 0
 657 | 
 658 |         def isindex(self):
 659 |             """ True when this is an Index Page """
 660 |             return self.preceeding != 0
 661 | 
 662 |         def __repr__(self):
 663 |             return ("leaf" if self.isleaf() else ("index<%d>" % self.preceeding)) + repr(self.index)
 664 | 
 665 |     ######################################################
 666 |     # Page objects for the various versions of the database
 667 |     ######################################################
 668 |     class Page15(BasePage):
 669 |         """ v1.5 b-tree page """
 670 |         class IndexEntry(BaseIndexEntry):
 671 |             def __init__(self, key, data, ofs):
 672 |                 self.page, self.recofs = struct.unpack_from("<HH", data, ofs)
 673 |                 self.recofs += 1   # skip unused zero byte in each key/value record
 674 |                 super(self.__class__, self).__init__(data)
 675 | 
 676 |         class LeafEntry(BaseLeafEntry):
 677 |             def __init__(self, key, data, ofs):
 678 |                 self.indent, self.unknown, self.recofs = struct.unpack_from("<BBH", data, ofs)
 679 |                 self.unknown1 = 0
 680 |                 self.recofs += 1   # skip unused zero byte in each key/value record
 681 |                 super(self.__class__, self).__init__(key, data)
 682 | 
 683 |         def __init__(self, data):
 684 |             super(self.__class__, self).__init__(data, 4, "<HH")
 685 | 
 686 |     class Page16(BasePage):
 687 |         """ v1.6 b-tree page """
 688 |         class IndexEntry(BaseIndexEntry):
 689 |             def __init__(self, key, data, ofs):
 690 |                 self.page, self.recofs = struct.unpack_from("<LH", data, ofs)
 691 |                 self.recofs += 1   # skip unused zero byte in each key/value record
 692 |                 super(self.__class__, self).__init__(data)
 693 | 
 694 |         class LeafEntry(BaseLeafEntry):
 695 |             def __init__(self, key, data, ofs):
 696 |                 self.indent, self.unknown1, self.unknown, self.recofs = struct.unpack_from("<BBHH", data, ofs)
 697 |                 self.recofs += 1   # skip unused zero byte in each key/value record
 698 |                 super(self.__class__, self).__init__(key, data)
 699 | 
 700 |         def __init__(self, data):
 701 |             super(self.__class__, self).__init__(data, 6, "<LH")
 702 | 
 703 |     class Page20(BasePage):
 704 |         """ v2.0 b-tree page """
 705 |         class IndexEntry(BaseIndexEntry):
 706 |             def __init__(self, key, data, ofs):
 707 |                 self.page, self.recofs = struct.unpack_from("<LH", data, ofs)
 708 |                 # unused zero byte is no longer there in v2.0 b-tree
 709 |                 super(self.__class__, self).__init__(data)
 710 | 
 711 |         class LeafEntry(BaseLeafEntry):
 712 |             def __init__(self, key, data, ofs):
 713 |                 self.indent, self.unknown, self.recofs = struct.unpack_from("<HHH", data, ofs)
 714 |                 self.unknown1 = 0
 715 |                 super(self.__class__, self).__init__(key, data)
 716 | 
 717 |         def __init__(self, data):
 718 |             super(self.__class__, self).__init__(data, 6, "<LH")
 719 | 
 720 |     class Cursor:
 721 |         """
 722 |         A Cursor object represents a position in the b-tree.
 723 | 
 724 |         It has methods for moving to the next or previous item.
 725 |         And methods for retrieving the key and value of the current position
 726 | 
 727 |         The position is represented as a list of (page, index) tuples
 728 |         """
 729 |         def __init__(self, db, stack):
 730 |             self.db = db
 731 |             self.stack = stack
 732 | 
 733 |         def next(self):
 734 |             """ move cursor to next entry """
 735 |             page, ix = self.stack.pop()
 736 |             if page.isleaf():
 737 |                 # from leaf move towards root
 738 |                 ix += 1
 739 |                 while self.stack and ix == len(page.index):
 740 |                     page, ix = self.stack.pop()
 741 |                     ix += 1
 742 |                 if ix < len(page.index):
 743 |                     self.stack.append((page, ix))
 744 |             else:
 745 |                 # from node move towards leaf
 746 |                 self.stack.append((page, ix))
 747 |                 page = self.db.readpage(page.getpage(ix))
 748 |                 while page.isindex():
 749 |                     ix = -1
 750 |                     self.stack.append((page, ix))
 751 |                     page = self.db.readpage(page.getpage(ix))
 752 |                 ix = 0
 753 |                 self.stack.append((page, ix))
 754 | 
 755 |         def prev(self):
 756 |             """ move cursor to the previous entry """
 757 |             page, ix = self.stack.pop()
 758 |             ix -= 1
 759 |             if page.isleaf():
 760 |                 # move towards root, until non 'prec' item found
 761 |                 while self.stack and ix < 0:
 762 |                     page, ix = self.stack.pop()
 763 |                 if ix >= 0:
 764 |                     self.stack.append((page, ix))
 765 |             else:
 766 |                 # move towards leaf
 767 |                 self.stack.append((page, ix))
 768 |                 while page.isindex():
 769 |                     page = self.db.readpage(page.getpage(ix))
 770 |                     ix = len(page.index) - 1
 771 |                     self.stack.append((page, ix))
 772 | 
 773 |         def eof(self):
 774 |             return len(self.stack) == 0
 775 | 
 776 |         def getkey(self):
 777 |             """ return the key value pointed to by the cursor """
 778 |             page, ix = self.stack[-1]
 779 |             return page.getkey(ix)
 780 | 
 781 |         def getval(self):
 782 |             """ return the data value pointed to by the cursor """
 783 |             page, ix = self.stack[-1]
 784 |             return page.getval(ix)
 785 | 
 786 |         def __repr__(self):
 787 |             return "cursor:" + repr(self.stack)
 788 | 
 789 |     def __init__(self, fh):
 790 |         """ BTree constructor - takes a filehandle """
 791 |         self.fh = fh
 792 | 
 793 |         self.fh.seek(0)
 794 |         data = self.fh.read(64)
 795 | 
 796 |         if data[13:].startswith(b"B-tree v 1.5 (C) Pol 1990"):
 797 |             self.parseheader15(data)
 798 |             self.page = self.Page15
 799 |             self.version = 15
 800 |         elif data[19:].startswith(b"B-tree v 1.6 (C) Pol 1990"):
 801 |             self.parseheader16(data)
 802 |             self.page = self.Page16
 803 |             self.version = 16
 804 |         elif data[19:].startswith(b"B-tree v2"):
 805 |             self.parseheader16(data)
 806 |             self.page = self.Page20
 807 |             self.version = 20
 808 |         else:
 809 |             print("unknown btree: %s" % hexdump(data))
 810 |             raise Exception("unknown b-tree")
 811 | 
 812 |     def parseheader15(self, data):
 813 |         self.firstfree, self.pagesize, self.firstindex, self.reccount, self.pagecount = struct.unpack_from("<HHHLH", data, 0)
 814 | 
 815 |     def parseheader16(self, data):
 816 |         # v16 and v20 both have the same header format
 817 |         self.firstfree, self.pagesize, self.firstindex, self.reccount, self.pagecount = struct.unpack_from("<LHLLL", data, 0)
 818 | 
 819 |     def readpage(self, nr):
 820 |         self.fh.seek(nr * self.pagesize)
 821 |         return self.page(self.fh.read(self.pagesize))
 822 | 
 823 |     def find(self, rel, key):
 824 |         """
 825 |         Searches for a record with the specified relation to the key
 826 | 
 827 |         A cursor object is returned, the user can call getkey, getval on the cursor
 828 |         to retrieve the actual value.
 829 |         or call cursor.next() / cursor.prev() to enumerate values.
 830 | 
 831 |         'eq'  -> record equal to the key, None when not found
 832 |         'le'  -> last record with key <= to key
 833 |         'ge'  -> first record with key >= to key
 834 |         'lt'  -> last record with key < to key
 835 |         'gt'  -> first record with key > to key
 836 |         """
 837 | 
 838 |         # descend tree to leaf nearest to the `key`
 839 |         page = self.readpage(self.firstindex)
 840 |         stack = []
 841 |         while len(stack) < 256:
 842 |             act, ix = page.find(key)
 843 |             stack.append((page, ix))
 844 |             if act != 'recurse':
 845 |                 break
 846 |             page = self.readpage(page.getpage(ix))
 847 | 
 848 |         if len(stack) == 256:
 849 |             raise Exception("b-tree corrupted")
 850 |         cursor = BTree.Cursor(self, stack)
 851 | 
 852 |         # now correct for what was actually asked.
 853 |         if act == rel:
 854 |             pass
 855 |         elif rel == 'eq' and act != 'eq':
 856 |             return None
 857 |         elif rel in ('ge', 'le') and act == 'eq':
 858 |             pass
 859 |         elif rel in ('gt', 'ge') and act == 'lt':
 860 |             cursor.next()
 861 |         elif rel == 'gt' and act == 'eq':
 862 |             cursor.next()
 863 |         elif rel in ('lt', 'le') and act == 'gt':
 864 |             cursor.prev()
 865 |         elif rel == 'lt' and act == 'eq':
 866 |             cursor.prev()
 867 | 
 868 |         return cursor
 869 | 
 870 |     def dump(self):
 871 |         """ raw dump of all records in the b-tree """
 872 |         print("pagesize=%08x, reccount=%08x, pagecount=%08x" % (self.pagesize, self.reccount, self.pagecount))
 873 |         self.dumpfree()
 874 |         self.dumptree(self.firstindex)
 875 | 
 876 |     def dumpfree(self):
 877 |         """ list all free pages """
 878 |         fmt = "L" if self.version > 15 else "H"
 879 |         hdrsize = 8 if self.version > 15 else 4
 880 |         pn = self.firstfree
 881 |         if pn == 0:
 882 |             print("no free pages")
 883 |             return
 884 |         while pn:
 885 |             self.fh.seek(pn * self.pagesize)
 886 |             data = self.fh.read(self.pagesize)
 887 |             if len(data) == 0:
 888 |                 print("could not read FREE data at page %06x" % pn)
 889 |                 break
 890 |             count, nextfree = struct.unpack_from("<" + (fmt * 2), data)
 891 |             freepages = list(struct.unpack_from("<" + (fmt * count), data, hdrsize))
 892 |             freepages.insert(0, pn)
 893 |             for pn in freepages:
 894 |                 self.fh.seek(pn * self.pagesize)
 895 |                 data = self.fh.read(self.pagesize)
 896 |                 print("%06x: free: %s" % (pn, hexdump(data[:64])))
 897 |             pn = nextfree
 898 | 
 899 |     def dumpindented(self, pn, indent=0):
 900 |         """
 901 |         Dump all nodes of the current page with keys indented, showing how the `indent`
 902 |         feature works
 903 |         """
 904 |         page = self.readpage(pn)
 905 |         print("  " * indent, page)
 906 |         if page.isindex():
 907 |             print("  " * indent, end="")
 908 |             self.dumpindented(page.preceeding, indent + 1)
 909 |             for p in range(len(page.index)):
 910 |                 print("  " * indent, end="")
 911 |                 self.dumpindented(page.getpage(p), indent + 1)
 912 | 
 913 |     def dumptree(self, pn):
 914 |         """
 915 |         Walks entire tree, dumping all records on each page
 916 |         in sequential order
 917 |         """
 918 |         page = self.readpage(pn)
 919 |         print("%06x: preceeding = %06x, reccount = %04x" % (pn, page.preceeding, page.count))
 920 |         for ent in page.index:
 921 |             print("    %s" % ent)
 922 |         if page.preceeding:
 923 |             self.dumptree(page.preceeding)
 924 |             for ent in page.index:
 925 |                 self.dumptree(ent.page)
 926 | 
 927 |     def pagedump(self):
 928 |         """
 929 |         dump the contents of all pages, ignoring links between pages,
 930 |         this will enable you to view contents of pages which have become
 931 |         lost due to datacorruption.
 932 |         """
 933 |         self.fh.seek(self.pagesize)
 934 |         pn = 1
 935 |         while True:
 936 |             try:
 937 |                 pagedata = self.fh.read(self.pagesize)
 938 |                 if len(pagedata) == 0:
 939 |                     break
 940 |                 elif len(pagedata) != self.pagesize:
 941 |                     print("%06x: incomplete - %d bytes ( pagesize = %d )" % (pn, len(pagedata), self.pagesize))
 942 |                     break
 943 |                 elif pagedata == b'\x00' * self.pagesize:
 944 |                     print("%06x: empty" % (pn))
 945 |                 else:
 946 |                     page = self.page(pagedata)
 947 | 
 948 |                     print("%06x: preceeding = %06x, reccount = %04x" % (pn, page.preceeding, page.count))
 949 |                     for ent in page.index:
 950 |                         print("    %s" % ent)
 951 |             except Exception as e:
 952 |                 print("%06x: ERROR decoding as B-tree page: %s" % (pn, e))
 953 |             pn += 1
 954 | 
 955 | 
 956 | class ID0File(object):
 957 |     """
 958 |     Reads .id0 or 0.ida  files, containing a v1.5, v1.6 or v2.0 b-tree database.
 959 | 
 960 |     This is basically the low level netnode interface from the idasdk.
 961 | 
 962 |     There are two major groups of nodes in the database:
 963 | 
 964 |     key = "N"+name  -> value = littleendian(nodeid)
 965 |     key = "."+bigendian(nodeid)+char(tag)+bigendian(value)
 966 |     key = "."+bigendian(nodeid)+char(tag)+string
 967 | 
 968 |     key = "."+bigendian(nodeid)+char(tag)
 969 | 
 970 |     and some special nodes for bookkeeping:
 971 |     "$ MAX LINK"
 972 |     "$ MAX NODE"
 973 |     "$ NET DESC"
 974 | 
 975 |     Very old databases also have name entries with a lowercase 'n',
 976 |     and corresponding '-'+value nodes.
 977 |     I am not sure what those are for.
 978 | 
 979 |     several items have specially named nodes, like "$ structs", "$ enums", "Root Node"
 980 | 
 981 |     nodeByName(name)  returns the nodeid for a name
 982 |     bytes(nodeid, tag, val)  returns the value for a specific node.
 983 | 
 984 |     """
 985 |     INDEX = 0
 986 | 
 987 |     def __init__(self, idb, fh):
 988 |         self.btree = BTree(fh)
 989 | 
 990 |         self.wordsize = None
 991 |         self.maxnode = None
 992 | 
 993 |         if idb.magic == 'IDA2':
 994 |             # .i64 files use 64 bit values for some things.
 995 |             self.wordsize = 8
 996 |         elif idb.magic in ('IDA0', 'IDA1'):
 997 |             self.wordsize = 4
 998 |         else:
 999 |             # determine wordsize from value of '$ MAX NODE'
1000 |             c = self.btree.find('eq', b'$ MAX NODE')
1001 |             if c and not c.eof():
1002 |                 self.maxnode = c.getval()
1003 |                 self.wordsize = len(c.getval())
1004 | 
1005 |         if self.wordsize not in (4, 8):
1006 |             print("Can not determine wordsize for database - assuming 32 bit")
1007 |             self.wordsize = 4
1008 | 
1009 |         if self.wordsize == 4:
1010 |             self.nodebase = 0xFF000000
1011 |             if not self.maxnode:
1012 |                 self.maxnode = self.nodebase + 0x0FFFFF
1013 |             self.fmt = "L"
1014 |         else:
1015 |             self.nodebase = 0xFF00000000000000
1016 |             if not self.maxnode:
1017 |                 self.maxnode = self.nodebase + 0x0FFFFFFF
1018 | 
1019 |             self.fmt = "Q"
1020 | 
1021 |         # set the keyformat for this database
1022 |         self.keyfmt = ">s" + self.fmt + "s" + self.fmt
1023 | 
1024 |     @cachedproperty
1025 |     def root(self): return self.nodeByName("Root Node")
1026 | 
1027 |     # note: versions before 4.7 used a short instead of a long
1028 |     # and stored the versions with one minor digit ( 43 ) , instead of two ( 480 )
1029 |     @cachedproperty
1030 |     def idaver(self): return self.int(self.root, 'A', -1)
1031 | 
1032 |     @cachedproperty
1033 |     def idbparams(self): return self.bytes(self.root, 'S', 0x41b994)
1034 |     @cachedproperty
1035 |     def idaverstr(self): return self.string(self.root, 'S', 1303)
1036 |     @cachedproperty
1037 |     def nropens(self): return self.int(self.root, 'A', -4)
1038 |     @cachedproperty
1039 |     def creationtime(self): return self.int(self.root, 'A', -2)
1040 |     @cachedproperty
1041 |     def originmd5(self): return self.bytes(self.root, 'S', 1302)
1042 |     @cachedproperty
1043 |     def somecrc(self): return self.int(self.root, 'A', -5)
1044 | 
1045 |     def prettykey(self, key):
1046 |         """
1047 |         returns the key in a readable format.
1048 |         """
1049 |         f = list(self.decodekey(key))
1050 |         f[0] = f[0].decode('utf-8')
1051 |         if len(f) > 2 and type(f[2]) == bytes:
1052 |             f[2] = f[2].decode('utf-8')
1053 | 
1054 |         if f[0] == '.':
1055 |             if len(f) == 2:
1056 |                 return "%s%16x" % tuple(f)
1057 |             elif len(f) == 3:
1058 |                 return "%s%16x %s" % tuple(f)
1059 |             elif len(f) == 4:
1060 |                 if f[2] == 'H' and type(f[3]) in (str, bytes):
1061 |                     f[3] = f[3].decode('utf-8')
1062 |                     return "%s%16x %s '%s'" % tuple(f)
1063 |                 elif type(f[3]) in (int, long):
1064 |                     return "%s%16x %s %x" % tuple(f)
1065 |                 else:
1066 |                     f[3] = hexdump(f[3])
1067 |                     return "%s%16x %s %s" % tuple(f)
1068 |         elif f[0] in ('N', 'n', '$'):
1069 |             if type(f[1]) in (int, long):
1070 |                 return "%s %x %16x" % tuple(f)
1071 |             else:
1072 |                 return "%s'%s'" % tuple(f)
1073 |         elif f[0] == '-':
1074 |             return "%s %x" % tuple(f)
1075 | 
1076 |         return hexdump(key)
1077 | 
1078 |     def prettyval(self, val):
1079 |         """
1080 |         returns the value in a readable format.
1081 |         """
1082 |         if len(val) == self.wordsize and val[-1:] in (b'\x00', b'\xff'):
1083 |             return "%x" % struct.unpack("<" + self.fmt, val)
1084 |         if len(val) == self.wordsize and re.search(b'[\x00-\x08\x0b\x0c\x0e-\x1f]', val, re.DOTALL):
1085 |             return "%x" % struct.unpack("<" + self.fmt, val)
1086 |         if len(val) < 2 or not re.match(b'^[\x09\x0a\x0d\x20-\xff]+.$', val, re.DOTALL):
1087 |             return hexdump(val)
1088 |         val = val.replace(b"\n", b"\\n")
1089 |         return "'%s'" % val.decode('utf-8', 'ignore')
1090 | 
1091 |     def nodeByName(self, name):
1092 |         """ Return a nodeid by name """
1093 |         # note: really long names are encoded differently:
1094 |         #  'N'+'\x00'+pack('Q', nameid)  => ofs
1095 |         #  and  (ofs, 'N') -> nameid
1096 | 
1097 |         # at nodebase ( 0xFF000000, 'S', 0x100*nameid )  there is a series of blobs for max 0x80000 sized names.
1098 |         cur = self.btree.find('eq', self.namekey(name))
1099 |         if cur:
1100 |             return struct.unpack('<' + self.fmt, cur.getval())[0]
1101 | 
1102 |     def namekey(self, name):
1103 |         if type(name) in (int, long):
1104 |             return struct.pack("<sB" + self.fmt, b'N', 0, name)
1105 |         return b'N' + name.encode('utf-8')
1106 | 
1107 |     def makekey(self, *args):
1108 |         """
1109 |         Return a binary key for the nodeid, tag and optional value
1110 | 
1111 |         makekey(node)
1112 |         makekey(node, tag)
1113 |         makekey(node, tag, stringvalue)
1114 |         makekey(node, tag, intvalue)
1115 |         """
1116 |         if len(args) > 1:
1117 |             # utf-8 encode the tag
1118 |             args = args[:1] + (args[1].encode('utf-8'),) + args[2:]
1119 | 
1120 |         if len(args) == 3 and type(args[-1]) == str:
1121 |             # node.tag.string type keys
1122 |             return struct.pack(self.keyfmt[:1 + len(args)], b'.', *args[:-1]) + args[-1].encode('utf-8')
1123 |         elif len(args) == 3 and type(args[-1]) == type(-1) and args[-1] < 0:
1124 |             # negative values -> need lowercase fmt char
1125 |             return struct.pack(self.keyfmt[:1 + len(args)] + self.fmt.lower(), b'.', *args)
1126 |         else:
1127 |             # node.tag.value type keys
1128 |             return struct.pack(self.keyfmt[:2 + len(args)], b'.', *args)
1129 | 
1130 |     def decodekey(self, key):
1131 |         """
1132 |         splits a key in a tuple, one of:
1133 |            ( [ 'N', 'n', '$' ],  0,   bignameid )
1134 |            ( [ 'N', 'n', '$' ],  name  )
1135 |            ( '-',  id )
1136 |            ( '.',  id )
1137 |            ( '.',  id,  tag )
1138 |            ( '.',  id,  tag, value )
1139 |            ( '.',  id,  'H', name  )
1140 |         """
1141 |         if key[:1] in (b'n', b'N', b'$'):
1142 |             if key[1:2] == b"\x00" and len(key) == 2 + self.wordsize:
1143 |                 return struct.unpack(">sB" + self.fmt, key)
1144 |             else:
1145 |                 return key[:1], key[1:].decode('utf-8', 'ignore')
1146 |         if key[:1] == b'-':
1147 |             return struct.unpack(">s" + self.fmt, key)
1148 |         if len(key) == 1 + self.wordsize:
1149 |             return struct.unpack(self.keyfmt[:3], key)
1150 |         if len(key) == 1 + self.wordsize + 1:
1151 |             return struct.unpack(self.keyfmt[:4], key)
1152 |         if len(key) == 1 + 2 * self.wordsize + 1:
1153 |             return struct.unpack(self.keyfmt[:5], key)
1154 |         if len(key) > 1 + self.wordsize + 1:
1155 |             f = struct.unpack_from(self.keyfmt[:4], key)
1156 |             return f + (key[2 + self.wordsize:], )
1157 |         raise Exception("unknown key format")
1158 | 
1159 |     def bytes(self, *args):
1160 |         """ return a raw value for the given arguments """
1161 |         if len(args) == 1 and isinstance(args[0], BTree.Cursor):
1162 |             cur = args[0]
1163 |         else:
1164 |             cur = self.btree.find('eq', self.makekey(*args))
1165 | 
1166 |         if cur:
1167 |             return cur.getval()
1168 | 
1169 |     def int(self, *args):
1170 |         """
1171 |         Return the integer stored in the specified node.
1172 | 
1173 |         Any type of integer will be decoded: byte, short, long, long long
1174 | 
1175 |         """
1176 |         data = self.bytes(*args)
1177 |         if data is not None:
1178 |             if len(data) == 1:
1179 |                 return struct.unpack("<B", data)[0]
1180 |             if len(data) == 2:
1181 |                 return struct.unpack("<H", data)[0]
1182 |             if len(data) == 4:
1183 |                 return struct.unpack("<L", data)[0]
1184 |             if len(data) == 8:
1185 |                 return struct.unpack("<Q", data)[0]
1186 |             print("can't get int from %s" % hexdump(data))
1187 | 
1188 |     def string(self, *args):
1189 |         """ return string stored in node """
1190 |         data = self.bytes(*args)
1191 |         if data is not None:
1192 |             return data.rstrip(b"\x00").decode('utf-8')
1193 | 
1194 |     def name(self, id):
1195 |         """
1196 |         resolves a name, both short and long names.
1197 |         """
1198 |         data = self.bytes(id, 'N')
1199 |         if not data:
1200 |             print("%x has no name" % id)
1201 |             return
1202 |         if data[:1] == b'\x00':
1203 |             nameid, = struct.unpack_from(">" + self.fmt, data, 1)
1204 |             nameblob = self.blob(self.nodebase, 'S', nameid * 256, nameid * 256 + 32)
1205 |             return nameblob.rstrip(b"\x00").decode('utf-8')
1206 |         return data.rstrip(b"\x00").decode('utf-8')
1207 | 
1208 |     def blob(self, nodeid, tag, start=0, end=0xFFFFFFFF):
1209 |         """
1210 |         Blobs are stored in sequential nodes
1211 |         with increasing index values.
1212 | 
1213 |         most blobs, like scripts start at index
1214 |         0, long names start at a specified
1215 |         offset.
1216 | 
1217 |         """
1218 |         startkey = self.makekey(nodeid, tag, start)
1219 |         endkey = self.makekey(nodeid, tag, end)
1220 |         cur = self.btree.find('ge', startkey)
1221 |         data = b''
1222 |         while cur.getkey() <= endkey:
1223 |             data += cur.getval()
1224 |             cur.next()
1225 |         return data
1226 | 
1227 | 
1228 | class ID1File(object):
1229 |     """
1230 |     Reads .id1 or 1.IDA files, containing byte flags
1231 | 
1232 |     This is basically the information for the .idc GetFlags(ea),
1233 |     FirstSeg(), NextSeg(ea), SegStart(ea), SegEnd(ea) functions
1234 |     """
1235 |     INDEX = 1
1236 | 
1237 |     class SegInfo:
1238 |         def __init__(self, startea, endea, offset):
1239 |             self.startea = startea
1240 |             self.endea = endea
1241 |             self.offset = offset
1242 | 
1243 |     def __init__(self, idb, fh):
1244 |         if idb.magic == 'IDA2':
1245 |             wordsize, fmt = 8, "Q"
1246 |         else:
1247 |             wordsize, fmt = 4, "L"
1248 |         # todo: verify wordsize using the following heuristic:
1249 |         #  L -> starting at: seglistofs + nsegs*seginfosize  are all zero
1250 |         #  L -> starting at seglistofs .. nsegs*seginfosize every even word must be unique
1251 | 
1252 |         self.fh = fh
1253 |         fh.seek(0)
1254 |         hdrdata = fh.read(32)
1255 |         magic = hdrdata[:4]
1256 |         if magic in (b'Va4\x00', b'Va3\x00', b'Va2\x00', b'Va1\x00', b'Va0\x00'):
1257 |             nsegments, npages = struct.unpack_from("<HH", hdrdata, 4)
1258 |             #  filesize / npages == 0x2000  for all cases
1259 |             seglistofs = 8
1260 |             seginfosize = 3
1261 |         elif magic == b'VA*\x00':
1262 |             always3, nsegments, always2k, npages = struct.unpack_from("<LLLL", hdrdata, 4)
1263 |             if always3 != 3:
1264 |                 print("ID1: first dword != 3: %08x" % always3)
1265 |             if always2k != 0x800:
1266 |                 print("ID1: third dword != 2k: %08x" % always2k)
1267 |             seglistofs = 20
1268 |             seginfosize = 2
1269 |         else:
1270 |             raise Exception("unknown id1 magic: %s" % hexdump(magic))
1271 | 
1272 |         self.seglist = []
1273 |         # Va0  - ida v3.0.5
1274 |         # Va3  - ida v3.6
1275 |         fh.seek(seglistofs)
1276 |         if magic in (b'Va4\x00', b'Va3\x00', b'Va2\x00', b'Va1\x00', b'Va0\x00'):
1277 |             segdata = fh.read(nsegments * 3 * wordsize)
1278 |             for o in range(nsegments):
1279 |                 startea, endea, id1ofs = struct.unpack_from("<" + fmt + fmt + fmt, segdata, o * seginfosize * wordsize)
1280 |                 self.seglist.append(self.SegInfo(startea, endea, id1ofs))
1281 |         elif magic == b'VA*\x00':
1282 |             segdata = fh.read(nsegments * 2 * wordsize)
1283 |             id1ofs = 0x2000
1284 |             for o in range(nsegments):
1285 |                 startea, endea = struct.unpack_from("<" + fmt + fmt, segdata, o * seginfosize * wordsize)
1286 |                 self.seglist.append(self.SegInfo(startea, endea, id1ofs))
1287 |                 id1ofs += 4 * (endea - startea)
1288 | 
1289 |     def is32bit_heuristic(self, fh, seglistofs):
1290 |         fh.seek(seglistofs)
1291 |         # todo: verify wordsize using the following heuristic:
1292 |         #  L -> starting at: seglistofs + nsegs*seginfosize  are all zero
1293 |         #  L -> starting at seglistofs .. nsegs*seginfosize every even word must be unique
1294 | 
1295 |     def dump(self):
1296 |         """ print first and last bits for each segment """
1297 |         for seg in self.seglist:
1298 |             print("==== %08x-%08x" % (seg.startea, seg.endea))
1299 |             if seg.endea - seg.startea < 30:
1300 |                 for ea in range(seg.startea, seg.endea):
1301 |                     print("    %08x: %08x" % (ea, self.getFlags(ea)))
1302 |             else:
1303 |                 for ea in range(seg.startea, seg.startea + 10):
1304 |                     print("    %08x: %08x" % (ea, self.getFlags(ea)))
1305 |                 print("...")
1306 |                 for ea in range(seg.endea - 10, seg.endea):
1307 |                     print("    %08x: %08x" % (ea, self.getFlags(ea)))
1308 | 
1309 |     def find_segment(self, ea):
1310 |         """ do a linear search for the given address in the segment list """
1311 |         for seg in self.seglist:
1312 |             if seg.startea <= ea < seg.endea:
1313 |                 return seg
1314 | 
1315 |     def getFlags(self, ea):
1316 |         seg = self.find_segment(ea)
1317 |         if not seg:
1318 |             return 0
1319 |         self.fh.seek(seg.offset + 4 * (ea - seg.startea))
1320 |         return struct.unpack("<L", self.fh.read(4))[0]
1321 | 
1322 |     def firstSeg(self):
1323 |         return self.seglist[0].startea
1324 | 
1325 |     def nextSeg(self, ea):
1326 |         for i, seg in enumerate(self.seglist):
1327 |             if seg.startea <= ea < seg.endea:
1328 |                 if i + 1 < len(self.seglist):
1329 |                     return self.seglist[i + 1].startea
1330 |                 else:
1331 |                     return
1332 | 
1333 |     def segStart(self, ea):
1334 |         seg = self.find_segment(ea)
1335 |         if not seg:
1336 |             return
1337 |         return seg.startea
1338 | 
1339 |     def segEnd(self, ea):
1340 |         seg = self.find_segment(ea)
1341 |         if not seg:
1342 |             return
1343 |         return seg.endea
1344 | 
1345 | 
1346 | class NAMFile(object):
1347 |     """ reads .nam or NAMES.IDA files, containing ptrs to named items """
1348 |     INDEX = 2
1349 | 
1350 |     def __init__(self, idb, fh):
1351 |         if idb.magic == 'IDA2':
1352 |             wordsize, fmt = 8, "Q"
1353 |         else:
1354 |             wordsize, fmt = 4, "L"
1355 | 
1356 |         self.fh = fh
1357 |         fh.seek(0)
1358 |         hdrdata = fh.read(64)
1359 |         magic = hdrdata[:4]
1360 |         # Va0  - ida v3.0.5
1361 |         # Va1  - ida v3.6
1362 |         if magic in (b'Va4\x00', b'Va3\x00', b'Va2\x00', b'Va1\x00', b'Va0\x00'):
1363 |             always1, npages, always0, nnames, pagesize = struct.unpack_from("<HH" + fmt + fmt + "L", hdrdata, 4)
1364 |             if always1 != 1: print("nam: first hw = %d" % always1)
1365 |             if always0 != 0: print("nam: third dw = %d" % always0)
1366 |         elif magic == b'VA*\x00':
1367 |             always3, always1, always2k, npages, always0, nnames = struct.unpack_from("<LLLL" + fmt + "L", hdrdata, 4)
1368 |             if always3 != 3: print("nam: 3 hw = %d" % always3)
1369 |             if always1 != 1: print("nam: 1 hw = %d" % always1)
1370 |             if always0 != 0: print("nam: 0 dw = %d" % always0)
1371 |             if always2k != 0x800: print("nam: 2k dw = %d" % always2k)
1372 |             pagesize = 0x2000
1373 |         else:
1374 |             raise Exception("unknown nam magic: %s" % hexdump(magic))
1375 |         if idb.magic == 'IDA2':
1376 |             nnames >>= 1
1377 |         self.wordsize = wordsize
1378 |         self.wordfmt = fmt
1379 |         self.nnames = nnames
1380 |         self.pagesize = pagesize
1381 | 
1382 |     def dump(self):
1383 |         print("nam: nnames=%d, npages=%d, pagesize=%08x" % (self.nnames, self.npages, self.pagesize))
1384 | 
1385 |     def allnames(self):
1386 |         self.fh.seek(self.pagesize)
1387 |         n = 0
1388 |         while n < self.nnames:
1389 |             data = self.fh.read(self.pagesize)
1390 |             want = min(self.nnames - n, int(self.pagesize / self.wordsize))
1391 |             ofslist = struct.unpack_from("<%d%s" % (want, self.wordfmt), data, 0)
1392 |             for ea in ofslist:
1393 |                 yield ea
1394 |             n += want
1395 | 
1396 | 
1397 | class SEGFile(object):
1398 |     """ reads .seg or $SEGS.IDA files.  """
1399 |     INDEX = 3
1400 | 
1401 |     def __init__(self, idb, fh):
1402 |         pass
1403 | 
1404 | 
1405 | class TILFile(object):
1406 |     """ reads .til files """
1407 |     INDEX = 4
1408 | 
1409 |     def __init__(self, idb, fh):
1410 |         pass
1411 | # note: v3 databases had a .reg instead of .til
1412 | 
1413 | 
1414 | class ID2File(object):
1415 |     """
1416 |     Reads .id2 files
1417 | 
1418 |     ID2 sections contain packed data, resulting in tripples
1419 |     of unknown use.
1420 |     """
1421 |     INDEX = 5
1422 | 
1423 |     def __init__(self, idb, fh):
1424 |         pass
1425 | 
1426 | 
1427 | class Struct:
1428 |     """
1429 |     Decodes info for structures
1430 | 
1431 |     (structnode, N)          = structname
1432 |     (structnode, D, address) = xref-type
1433 |     (structnode, M, 0)       = packed struct info
1434 |     (structnode, S, 27)      = packed value(addr, byte)
1435 |     """
1436 |     class Member:
1437 |         """
1438 |            (membernode, N)          = struct.member-name
1439 |            (membernode, A, 3)       = structid+1
1440 |            (membernode, A, 8)       = 
1441 |            (membernode, A, 11)      = enumid+1
1442 |            (membernode, A, 16)      = flag?  -- 4:variable length flag?
1443 |            (membernode, S, 0x3000)  = type (set with 'Y')
1444 |            (membernode, S, 0x3001)  = names used in 'type'
1445 |            (membernode, S, 5)       = array type?
1446 |            (membernode, S, 9)       = offset-type
1447 |            (membernode, D, address) = xref-type
1448 |            (membernode, d, structid) = xref-type   -- for sub-structs
1449 |         """
1450 |         def __init__(self, id0, spec):
1451 |             self._id0 = id0
1452 |             self._nodeid = spec.nextword() +  self._id0.nodebase
1453 |             self.skip = spec.nextword()
1454 |             self.size = spec.nextword()
1455 |             self.flags = spec.next32()
1456 |             self.props = spec.next32()
1457 |             self.ofs = None
1458 |         @cachedproperty
1459 |         def name(self): return self._id0.name(self._nodeid)
1460 |         @cachedproperty
1461 |         def enumid(self): return self._id0.int(self._nodeid, 'A', 11)
1462 |         @cachedproperty
1463 |         def stringtype(self): return self._id0.int(self._nodeid, 'A', 16)
1464 |         @cachedproperty
1465 |         def structid(self): return self._id0.int(self._nodeid, 'A', 3)
1466 |         @cachedproperty
1467 |         def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0)
1468 |         @cachedproperty
1469 |         def ptrinfo(self): return self._id0.bytes(self._nodeid, 'S', 9)
1470 |         @cachedproperty
1471 |         def typeinfo(self): return self._id0.bytes(self._nodeid, 'S', 0x3000)
1472 | 
1473 |     def __init__(self, id0, nodeid):
1474 |         self._id0 = id0
1475 |         self._nodeid = nodeid
1476 | 
1477 |         spec = self._id0.blob(self._nodeid, 'M')
1478 |         p = IdaUnpacker(self._id0.wordsize, spec)
1479 |         if self._id0.idaver >= 40:
1480 |             #    1 = SF_VAR, 2 = SF_UNION, 4 = SF_HASHUNI, 8 = SF_NOLIST, 0x10 = SF_TYPLIB, 0x20 = SF_HIDDEN, 0x40 = SF_FRAME, 0xF80 = SF_ALIGN, 0x1000 = SF_GHOST
1481 |             self.flags = p.next32()
1482 |         else:
1483 |             self.flags = 0
1484 | 
1485 |         nmembers = p.next32()
1486 | 
1487 |         self.members = []
1488 |         o = 0
1489 |         for i in range(nmembers):
1490 |             m = Struct.Member(self._id0, p)
1491 |             m.ofs = o
1492 |             o += m.size
1493 | 
1494 |             self.members.append(m)
1495 | 
1496 |         self.extra = []
1497 |         while not p.eof():
1498 |             self.extra.append(p.next32())
1499 | 
1500 |     @cachedproperty
1501 |     def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0)
1502 |     @cachedproperty
1503 |     def name(self): return self._id0.name(self._nodeid)
1504 | 
1505 |     def __iter__(self):
1506 |         for m in self.members:
1507 |             yield m
1508 | 
1509 | 
1510 | class Enum:
1511 |     """
1512 |        (enumnode, N)     = enum-name
1513 |        (enumnode, A, -1) = nr of values
1514 |        (enumnode, A, -3) = representation
1515 |        (enumnode, A, -5) = flags: bitfield, hidden, ...
1516 |        (enumnode, A, -8) = 
1517 |        (enumnode, E, value) = valuenode + 1
1518 |         
1519 |     """
1520 |     class Member:
1521 |         """
1522 |            (membernode, N)      = membername
1523 |            (membernode, A, -2)  = enumnode + 1
1524 |            (membernode, A, -3)  = member value
1525 |         """
1526 |         def __init__(self, id0, nodeid):
1527 |             self._id0 = id0
1528 |             self._nodeid = nodeid
1529 | 
1530 |         @cachedproperty
1531 |         def value(self): return self._id0.int(self._nodeid, 'A', -3)
1532 |         @cachedproperty
1533 |         def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0)
1534 |         @cachedproperty
1535 |         def name(self): return self._id0.name(self._nodeid)
1536 | 
1537 |     def __init__(self, id0, nodeid):
1538 |         self._id0 = id0
1539 |         self._nodeid = nodeid
1540 | 
1541 |     @cachedproperty
1542 |     def count(self): return self._id0.int(self._nodeid, 'A', -1)
1543 |     @cachedproperty
1544 |     def representation(self): return self._id0.int(self._nodeid, 'A', -3)
1545 | 
1546 |     # flags>>3 -> width
1547 |     # flags&1 -> bitfield
1548 |     @cachedproperty
1549 |     def flags(self): return self._id0.int(self._nodeid, 'A', -5)
1550 | 
1551 |     @cachedproperty
1552 |     def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0)
1553 |     @cachedproperty
1554 |     def name(self): return self._id0.name(self._nodeid)
1555 | 
1556 |     def __iter__(self):
1557 |         startkey = self._id0.makekey(self._nodeid, 'E')
1558 |         endkey = self._id0.makekey(self._nodeid, 'F')
1559 |         cur = self._id0.btree.find('ge', startkey)
1560 |         while cur.getkey() < endkey:
1561 |             yield Enum.Member(self._id0, self._id0.int(cur) - 1)
1562 |             cur.next()
1563 | 
1564 | 
1565 | class Bitfield:
1566 |     class Member:
1567 |         def __init__(self, id0, nodeid):
1568 |             self._id0 = id0
1569 |             self._nodeid = nodeid
1570 | 
1571 |         @cachedproperty
1572 |         def value(self): return self._id0.int(self._nodeid, 'A', -3)
1573 |         @cachedproperty
1574 |         def mask(self): return self._id0.int(self._nodeid, 'A', -6) - 1
1575 |         @cachedproperty
1576 |         def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0)
1577 |         @cachedproperty
1578 |         def name(self): return self._id0.name(self._nodeid)
1579 | 
1580 |     class Mask:
1581 |         def __init__(self, id0, nodeid, mask):
1582 |             self._id0 = id0
1583 |             self._nodeid = nodeid
1584 |             self.mask = mask
1585 | 
1586 |         @cachedproperty
1587 |         def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0)
1588 |         @cachedproperty
1589 |         def name(self): return self._id0.name(self._nodeid)
1590 | 
1591 |         def __iter__(self):
1592 |             """
1593 |             Enumerates all Masks
1594 |             """
1595 |             startkey = self._id0.makekey(self._nodeid, 'E')
1596 |             endkey = self._id0.makekey(self._nodeid, 'F')
1597 |             cur = self._id0.btree.find('ge', startkey)
1598 |             while cur.getkey() < endkey:
1599 |                 yield Bitfield.Member(self._id0, self._id0.int(cur) - 1)
1600 |                 cur.next()
1601 | 
1602 | 
1603 |     def __init__(self, id0, nodeid):
1604 |         self._id0 = id0
1605 |         self._nodeid = nodeid
1606 | 
1607 |     @cachedproperty
1608 |     def count(self): return self._id0.int(self._nodeid, 'A', -1)
1609 |     @cachedproperty
1610 |     def representation(self): return self._id0.int(self._nodeid, 'A', -3)
1611 |     @cachedproperty
1612 |     def flags(self): return self._id0.int(self._nodeid, 'A', -5)
1613 | 
1614 |     @cachedproperty
1615 |     def comment(self, repeatable): return self._id0.string(self._nodeid, 'S', 1 if repeatable else 0)
1616 |     @cachedproperty
1617 |     def name(self): return self._id0.name(self._nodeid)
1618 | 
1619 |     def __iter__(self):
1620 |         """
1621 |         Enumerates all Masks
1622 |         """
1623 |         startkey = self._id0.makekey(self._nodeid, 'm')
1624 |         endkey = self._id0.makekey(self._nodeid, 'n')
1625 |         cur = self._id0.btree.find('ge', startkey)
1626 |         while cur.getkey() < endkey:
1627 |             key = self._id0.decodekey(cur.getkey())
1628 |             yield Bitfield.Mask(self._id0, self._id0.int(cur) - 1, key[-1])
1629 |             cur.next()
1630 | 
1631 | class IDBParams:
1632 |     def __init__(self, id0, data):
1633 |         self._id0 = id0
1634 |         magic, self.version,  = struct.unpack_from("<3sH", data, 0)
1635 |         if self.version<700:
1636 |             cpu, self.idpflags, self.demnames, self.filetype, self.coresize, self.corestart, self.ostype, self.apptype = struct.unpack_from("<8sBBH" + (id0.fmt * 2) + "HH", data, 5)
1637 |             self.cpu = strz(cpu, 0)
1638 |         else:
1639 |             p = IdaUnpacker(id0.wordsize, data[5:])
1640 |             cpulen = p.next32()
1641 |             self.cpu = p.bytes(cpulen)
1642 |             genflags = p.next32()
1643 |             self.idpflags = p.next32()
1644 |             self.demnames = 0
1645 |             changecount = p.next32()
1646 |             self.filetype = p.next32()
1647 |             self.ostype = p.next32()
1648 |             self.apptype = p.next32()
1649 |             asmtype = p.next32()
1650 |             specsegs = p.next32()
1651 |             specsegs = p.next32()
1652 |             aflags = p.next32()
1653 |             aflags2 = p.next32()
1654 |             base = p.nextword()
1655 |             startss = p.nextword()
1656 |             startcs = p.nextword()
1657 |             startip = p.nextword()
1658 |             startea = p.nextword()
1659 |             startsp = p.nextword()
1660 |             main = p.nextword()
1661 |             minea = p.nextword()
1662 |             maxea = p.nextword()
1663 | 
1664 |             self.coresize = 0
1665 |             self.corestart = 0
1666 | 
1667 | class Script:
1668 |     def __init__(self, id0, nodeid):
1669 |         self._id0 = id0
1670 |         self._nodeid = nodeid
1671 | 
1672 |     @cachedproperty
1673 |     def name(self): return self._id0.string(self._nodeid, 'S', 0)
1674 |     @cachedproperty
1675 |     def language(self): return self._id0.string(self._nodeid, 'S', 1)
1676 |     @cachedproperty
1677 |     def body(self): return strz(self._id0.blob(self._nodeid, 'X'), 0)
1678 | 
1679 | class Segment:
1680 |     """
1681 |     Decodes a value from "$ segs", see segment_t in segment.hpp for details.
1682 |     """
1683 |     def __init__(self, id0, spec):
1684 |         self._id0 = id0
1685 |         p = IdaUnpacker(id0.wordsize, spec)
1686 |         self.startea = p.nextword()
1687 |         self.size = p.nextword()
1688 |         self.name_id = p.nextword()
1689 |         self.class_id = p.nextword()
1690 |         self.orgbase = p.nextword()
1691 |         self.unknown = p.next16()
1692 |         self.align = p.next8()
1693 |         self.comb = p.next8()
1694 |         self.perm = p.next8()
1695 |         self.bitness = p.next8()
1696 |         self.flags = p.next8()
1697 |         self.selector = p.nextword()
1698 |         self.defsr = [p.nextword() for _ in range(16)]
1699 |         self.color = p.next32()
1700 | 
1701 | 


--------------------------------------------------------------------------------