├── QString.py ├── QTMetaObject.py ├── QTRegister.py ├── QTResource.py ├── common.py ├── create_qstring.py └── vtable.py /QString.py: -------------------------------------------------------------------------------- 1 | import logging 2 | 3 | from ghidra.program.model.symbol import RefType 4 | from ghidra.program.model.symbol import SourceType 5 | from ghidra.program.model.symbol import SymbolUtilities 6 | """ 7 | self.data = createUnicodeString(to_address) 8 | at ghidra.program.database.code.CodeManager.checkValidAddressRange(CodeManager.java:1970) 9 | at ghidra.program.database.code.CodeManager.createCodeUnit(CodeManager.java:2055) 10 | at ghidra.program.database.ListingDB.createData(ListingDB.java:422) 11 | at ghidra.program.flatapi.FlatProgramAPI.createData(FlatProgramAPI.java:1658) 12 | at ghidra.program.flatapi.FlatProgramAPI.createUnicodeString(FlatProgramAPI.java:1790) 13 | at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke0(Native Method) 14 | at java.base/jdk.internal.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) 15 | at java.base/jdk.internal.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) 16 | at java.base/java.lang.reflect.Method.invoke(Method.java:566) 17 | ghidra.program.model.util.CodeUnitInsertionException: ghidra.program.model.util.CodeUnitInsertionException: Conflicting data exists at address 00403f20 to 00403f23 18 | """ 19 | from ghidra.program.model.util import CodeUnitInsertionException 20 | from ghidra.program.model.data import StructureDataType, IntegerDataType, DataTypeConflictHandler 21 | 22 | # this below allows to have the global objects as in the scripts themself 23 | # see 24 | from __main__ import * 25 | import common 26 | 27 | # FIXME: it's tricky to define "complex" data types 28 | # via C parsing since it seems that ghidra 29 | # doesn't parse correctly if some other archives 30 | # must be parsed before 31 | QARRAY_DECLARATION = """ 32 | struct QArrayData { 33 | int ref; 34 | int size; 35 | unsigned int alloc; /* this is a bitfield */ 36 | ptrdiff_t offset; 37 | }; 38 | """ 39 | 40 | 41 | logging.basicConfig() 42 | logger = logging.getLogger(__name__) 43 | logger.setLevel("INFO") 44 | 45 | 46 | def create(): 47 | """Create programmatically the QArrayData data type extracting 48 | the information from the CompilerSpec class.""" 49 | logger.warning("creating QArrayData") 50 | 51 | name = "QArrayData" 52 | 53 | qarraydata = StructureDataType(name, 0) 54 | qarraydata.add( 55 | IntegerDataType.dataType, 56 | currentProgram.getCompilerSpec().getDataOrganization().getIntegerSize(), 57 | "ref", 58 | "") 59 | qarraydata.add(IntegerDataType.dataType, 60 | currentProgram.getCompilerSpec().getDataOrganization().getIntegerSize(), 61 | "size", 62 | "") 63 | qarraydata.add(IntegerDataType.dataType, 64 | currentProgram.getCompilerSpec().getDataOrganization().getIntegerSize(), 65 | "alloc", 66 | "") 67 | 68 | # we need to pack the structure 69 | qarraydata.setToDefaultPacking() 70 | 71 | # for the offset we need to get the datatype that have the same size of a pointer 72 | # but is an integer; see 73 | pointer_size = currentProgram.getCompilerSpec().getDataOrganization().getPointerSize() 74 | ptrdiffDataType = IntegerDataType.getSignedDataType( 75 | pointer_size, 76 | currentProgram.getDataTypeManager(), 77 | ) 78 | qarraydata.add( 79 | ptrdiffDataType, 80 | pointer_size, 81 | "offset", 82 | "") # this should be ptrdiff_t 83 | 84 | currentProgram.getDataTypeManager().addDataType(qarraydata, DataTypeConflictHandler.REPLACE_HANDLER) 85 | 86 | # you need to requery; since it has no category indicated you should obtain the first one 87 | return getDataTypes(name)[0] 88 | 89 | 90 | def get(): 91 | """Return the QArrayData data type or create programmatically it.""" 92 | data_types = getDataTypes('QArrayData') 93 | 94 | # usually under "/Demangler" it exists an empty structure defined 95 | if len(data_types) == 0 or data_types[0].isNotYetDefined() or data_types[0].getLength() != 16: 96 | return create(), True 97 | 98 | return data_types[0], False 99 | 100 | 101 | def slugify(label): 102 | """Replace invalid characters with underscores""" 103 | return SymbolUtilities.replaceInvalidChars(label, True) 104 | 105 | 106 | class QString: 107 | INDEX_QARRAYDATA_OFFSET = 3 108 | INDEX_QARRAYDATA_LENGTH = 1 109 | 110 | dataType, _ = get() 111 | 112 | def __init__(self, address): 113 | self.address = address 114 | 115 | # sanity check (probably some more TODO) 116 | if getInt(address) != -1: 117 | raise ValueError("We are expecting -1 for the 'ref' field") 118 | 119 | # create data at the wanted position 120 | self._d = createData(address, self.dataType) 121 | 122 | # create reference 123 | rm = currentProgram.getReferenceManager() 124 | 125 | to_address = address.add(self.offset) 126 | 127 | rm.addOffsetMemReference( 128 | address, 129 | to_address, 130 | self.offset, 131 | RefType.DATA, 132 | SourceType.USER_DEFINED, 133 | 0, 134 | ) 135 | 136 | self.data = getDataAt(to_address) 137 | 138 | # we try to define a unicode string but maybe 139 | # some others data was defined before so we simply 140 | # get the string and whatever 141 | if self.data is None: 142 | try: 143 | self.data = createUnicodeString(to_address) 144 | str_ = self.data.value 145 | except CodeUnitInsertionException as e: 146 | logger.warning("--- code conflict below ---") 147 | logger.exception(e) 148 | logger.warning("---------------------------") 149 | # we haven't any data defined, use unicode 150 | self.data = common.get_bytes_from_binary(to_address, self.size * 2) 151 | str_ = self.data.decode('utf-16le') 152 | else: 153 | str_ = self.data.value 154 | 155 | createLabel(address, 'QARRAYDATA_%s' % slugify(str_), True) 156 | 157 | @classmethod 158 | def getHeaderSize(cls): 159 | return cls.dataType.getLength() 160 | 161 | @property 162 | def offset(self): 163 | return self._d.getComponent(self.INDEX_QARRAYDATA_OFFSET).value.getValue() 164 | 165 | @property 166 | def size(self): 167 | """This is the value as is, if you need the length of the unicode encode 168 | data you need to multiply this by 2.""" 169 | return self._d.getComponent(self.INDEX_QARRAYDATA_LENGTH).value.getValue() 170 | 171 | @property 172 | def end(self): 173 | """Return the address where the data pointed by this ends""" 174 | return self.address.add(self.offset + self.size * 2) 175 | 176 | @property 177 | def end_aligned(self): 178 | """Return the address where the data pointed by this end but aligned""" 179 | return self.address.add((self.offset + (self.size + 1) * 2 + 3) & 0xfffffc) # FIXME: generate mask 180 | 181 | def getString(self): 182 | return self.data if type(self.data) == str else self.data.value 183 | -------------------------------------------------------------------------------- /QTMetaObject.py: -------------------------------------------------------------------------------- 1 | # Try to rebuild the class from the QMetaObject information 2 | #@author 3 | #@category QT 4 | #@keybinding 5 | #@menupath 6 | #@toolbar 7 | import logging 8 | import struct 9 | from ghidra.app.tablechooser import TableChooserExecutor, AddressableRowObject, StringColumnDisplay 10 | from ghidra.program.model.symbol import RefType, SourceType 11 | from ghidra.program.model.data import ( 12 | StructureDataType, 13 | FunctionDefinitionDataType, 14 | GenericCallingConvention, 15 | IntegerDataType, 16 | ParameterDefinitionImpl, 17 | PointerDataType, 18 | VoidDataType, 19 | EnumDataType, 20 | DataTypeConflictHandler, 21 | ) 22 | 23 | from ghidra.app.cmd.function import ApplyFunctionSignatureCmd 24 | from ghidra.program.database.data import EnumDB 25 | 26 | import common 27 | from QString import QString 28 | 29 | logging.basicConfig() 30 | logger = logging.getLogger(__name__) 31 | logger.setLevel("INFO") 32 | 33 | 34 | def create(): 35 | name = "qt_meta_data_header_t" 36 | metadata = StructureDataType(name, 0) 37 | 38 | metadata.add( 39 | IntegerDataType.dataType, 40 | 0, 41 | "revision", 42 | "", 43 | ) 44 | metadata.add( 45 | IntegerDataType.dataType, 46 | 0, 47 | "className", 48 | "", 49 | ) 50 | metadata.add( 51 | IntegerDataType.dataType, 52 | 0, 53 | "classInfo_count", 54 | "", 55 | ) 56 | metadata.add( 57 | IntegerDataType.dataType, 58 | 0, 59 | "classInfo_index", 60 | "", 61 | ) 62 | metadata.add( 63 | IntegerDataType.dataType, 64 | 0, 65 | "methods_count", 66 | "", 67 | ) 68 | metadata.add( 69 | IntegerDataType.dataType, 70 | 0, 71 | "methods_index", 72 | "", 73 | ) 74 | metadata.add( 75 | IntegerDataType.dataType, 76 | 0, 77 | "properties_count", 78 | "", 79 | ) 80 | metadata.add( 81 | IntegerDataType.dataType, 82 | 0, 83 | "properties_index", 84 | "", 85 | ) 86 | metadata.add( 87 | IntegerDataType.dataType, 88 | 0, 89 | "enum_count", 90 | "", 91 | ) 92 | metadata.add( 93 | IntegerDataType.dataType, 94 | 0, 95 | "enum_index", 96 | "", 97 | ) 98 | metadata.add( 99 | IntegerDataType.dataType, 100 | 0, 101 | "constructor_count", 102 | "", 103 | ) 104 | metadata.add( 105 | IntegerDataType.dataType, 106 | 0, 107 | "constructor_index", 108 | "", 109 | ) 110 | metadata.add( 111 | IntegerDataType.dataType, 112 | 0, 113 | "flags", 114 | "", 115 | ) 116 | metadata.add( 117 | IntegerDataType.dataType, 118 | 0, 119 | "signal_count", 120 | "", 121 | ) 122 | 123 | currentProgram.getDataTypeManager().addDataType(metadata, DataTypeConflictHandler.REPLACE_HANDLER) 124 | 125 | # you need to requery; since it has no category indicated you should obtain the first one 126 | return getDataTypes(name)[0] 127 | 128 | 129 | def get(): 130 | qt_meta_data_header = getDataTypes('qt_meta_data_header_t') 131 | 132 | if len(qt_meta_data_header) == 0 or qt_meta_data_header[0].isNotYetDefined(): 133 | return create(), True 134 | 135 | return qt_meta_data_header[0], False 136 | 137 | 138 | staticMetaObject = getSymbols('staticMetaObject', getNamespace(None, 'QObject'))[0] 139 | 140 | 141 | def get_Call_enum(): 142 | datatype_call = getDataTypes('Call')[0] 143 | 144 | # minimal check 145 | if type(datatype_call) == EnumDB and datatype_call.getCount() == 13: 146 | return datatype_call 147 | 148 | 149 | # otherwise we can create it 150 | entries = [ 151 | "InvokeMetaMethod", 152 | "ReadProperty", 153 | "WriteProperty", 154 | "ResetProperty", 155 | "QueryPropertyDesignable", 156 | "QueryPropertyScriptable", 157 | "QueryPropertyStored", 158 | "QueryPropertyEditable", 159 | "QueryPropertyUser", 160 | "CreateInstance", 161 | "IndexOfMethod", 162 | "RegisterPropertyMetaType", 163 | "RegisterMethodArgumentMetaType", 164 | ] 165 | call = EnumDataType("Call", 1) 166 | 167 | for idx, name in enumerate(entries): 168 | call.add(name, idx) 169 | 170 | data_type_manager = currentProgram.getDataTypeManager() 171 | data_type_manager.addDataType(call, DataTypeConflictHandler.DEFAULT_HANDLER) 172 | 173 | # requery 174 | return getDataTypes('Call')[0] 175 | 176 | 177 | def get_staticMetaObject_DATA_refs(): 178 | """Return all the DATA references to the QObject::staticMetaObject, probably they are 179 | the first entry of the QMetaObject struct.""" 180 | 181 | return [_ for _ in getReferencesTo(staticMetaObject.getAddress()) if _.getReferenceType() == RefType.DATA] 182 | 183 | 184 | class QMetaObject: 185 | """Wrap the static struct defining the QMetaObject and rebuild the 186 | class that it defines. 187 | 188 | It's originally defined like the following: 189 | 190 | static const uint qt_meta_data_Counter[] = { 191 | 192 | // content: 193 | 7, // revision 194 | 0, // classname 195 | 0, 0, // classinfo 196 | 2, 14, // methods 197 | 0, 0, // properties 198 | 0, 0, // enums/sets 199 | 0, 0, // constructors 200 | 0, // flags 201 | 1, // signalCount 202 | 203 | // signals: name, argc, parameters, tag, flags 204 | 1, 1, 24, 2, 0x06 /* Public */, 205 | 206 | // slots: name, argc, parameters, tag, flags 207 | 4, 1, 27, 2, 0x0a /* Public */, 208 | 209 | // signals: parameters 210 | QMetaType::Void, QMetaType::Int, 3, 211 | 212 | // slots: parameters 213 | QMetaType::Void, QMetaType::Int, 5, 214 | 215 | 0 // eod 216 | }; 217 | 218 | struct qt_meta_stringdata_Counter_t { 219 | QByteArrayData data[6]; 220 | char stringdata0[46]; 221 | }; 222 | #define QT_MOC_LITERAL(idx, ofs, len) \ 223 | Q_STATIC_BYTE_ARRAY_DATA_HEADER_INITIALIZER_WITH_OFFSET(len, \ 224 | qptrdiff(offsetof(qt_meta_stringdata_Counter_t, stringdata0) + ofs \ 225 | - idx * sizeof(QByteArrayData)) \ 226 | ) 227 | static const qt_meta_stringdata_Counter_t qt_meta_stringdata_Counter = { 228 | { 229 | QT_MOC_LITERAL(0, 0, 7), // "Counter" 230 | QT_MOC_LITERAL(1, 8, 12), // "valueChanged" 231 | QT_MOC_LITERAL(2, 21, 0), // "" 232 | QT_MOC_LITERAL(3, 22, 8), // "newValue" 233 | QT_MOC_LITERAL(4, 31, 8), // "setValue" 234 | QT_MOC_LITERAL(5, 40, 5) // "value" 235 | 236 | }, 237 | "Counter\0valueChanged\0\0newValue\0setValue\0" 238 | "value" 239 | }; 240 | #undef QT_MOC_LITERAL 241 | 242 | For reference see . 243 | """ 244 | INT = getDataTypes('int')[0] 245 | HEADER_DATATYPE, _ = get() 246 | 247 | HEADER_FIELD_CLASSNAME_INDEX = 1 248 | HEADER_FIELD_METHODS_COUNT_INDEX = 4 249 | HEADER_FIELD_METHODS_INDEX_INDEX = 5 250 | HEADER_FIELD_PROPS_COUNT_INDEX = 6 251 | HEADER_FIELD_PROPS_INDEX_INDEX = 7 252 | HEADER_FIELD_ENUMS_COUNT_INDEX = 8 253 | HEADER_FIELD_ENUMS_INDEX_INDEX = 9 254 | 255 | 256 | def __init__(self, address): 257 | self.address = address 258 | 259 | pointer_size = currentProgram.getCompilerSpec().getDataOrganization().getPointerSize() 260 | 261 | self.staticMetaObject = common.get_value(address, PointerDataType.dataType) 262 | self.stringdata = common.get_value(address.add(pointer_size), PointerDataType.dataType) 263 | self.data = common.get_value(address.add(2*pointer_size), PointerDataType.dataType) 264 | self.static_metacall = common.get_value(address.add(3*pointer_size), PointerDataType.dataType) 265 | 266 | self.methods = {} # this will contain the idx: method name 267 | 268 | # sanity check 269 | is_original_staticMetaObject = self.staticMetaObject == staticMetaObject.getAddress() 270 | 271 | if not is_original_staticMetaObject: 272 | symbol_at = getSymbolAt(self.staticMetaObject) 273 | 274 | if symbol_at is None: 275 | #raise ValueError("Nothing is defined at {}: you should set the cursor at the start of a MetaObject vtable".format(self.staticMetaObject)) 276 | logger.warning("no symbol defined, proceed wit caution") 277 | elif "staticMetaObject" not in symbol_at.getName(): 278 | logger.warning("you must have a cursor on a memory that references staticMetaObject instead of %s" % symbol_at) 279 | else: 280 | logger.info("this class derives from '%s'" % (symbol_at.getParentNamespace())) 281 | 282 | # obtain the info from the memory 283 | self.__build() 284 | 285 | # set the qt_metacast method as best as we can 286 | if self.static_metacall.getOffset() != 0: 287 | self.__configure_qt_metacast() 288 | 289 | # now find and set the correct signature for the signals 290 | self.find_signals() 291 | 292 | def get_size_int(self): 293 | """Make architecture independent the reading""" 294 | return self.INT.getLength() 295 | 296 | def __str__(self): 297 | return "%s %s #properties=%d #methods=%d" % (self.stringdata, self.data, self.properties_count, self.methods_count) 298 | 299 | def get_ghidra_class(self): 300 | """try to retrieve the class from the name and if doesn't exist create it.""" 301 | # get the class (getObject() because otherwise is a symbol so not a Namespace) 302 | klass = currentProgram.getSymbolTable().getClassSymbol(self.class_name, None) 303 | ns = currentProgram.getSymbolTable().getNamespace(self.class_name, None) 304 | 305 | if klass is None and ns is None: 306 | logger.info("creating class '{}'".format(self.class_name)) 307 | klass = currentProgram.symbolTable.createClass(None, self.class_name, SourceType.USER_DEFINED) 308 | elif klass is None and ns is not None: 309 | logger.info("converting namespace '{}' to class".format(self.class_name)) 310 | klass = currentProgram.getSymbolTable().convertNamespaceToClass(ns) 311 | else: 312 | klass = klass.getObject() 313 | 314 | logger.info("using class namespace '{}'".format(klass)) 315 | 316 | return klass 317 | 318 | def __configure_qt_metacast(self): 319 | func_metacall = getFunctionAt(self.static_metacall) 320 | 321 | if func_metacall is None: 322 | logger.info("function not defined at {}, creating now".format(self.static_metacall)) 323 | # use the "FUN_" prefix so that we change that after 324 | func_metacall = createFunction(self.static_metacall, "FUN_metacall") 325 | 326 | if not func_metacall.getName().startswith("FUN_"): 327 | logger.warning("manually edited function, do not change") 328 | return 329 | 330 | klass = self.get_ghidra_class() 331 | 332 | # now we are going to change Namespace, name and signature 333 | func_metacall.setParentNamespace(klass) 334 | func_metacall.setName("qt_metacast", SourceType.USER_DEFINED) 335 | 336 | sig = FunctionDefinitionDataType("miao") 337 | sig.setGenericCallingConvention(GenericCallingConvention.thiscall) 338 | 339 | datatype_call = get_Call_enum() 340 | 341 | datatype_call = ParameterDefinitionImpl('call', datatype_call, 'type of call') 342 | datatype_index = ParameterDefinitionImpl('index', IntegerDataType.dataType, 'index of slots/methods/signal') 343 | datatype_args = ParameterDefinitionImpl('args', PointerDataType(PointerDataType(VoidDataType.dataType)), 'extra data') 344 | 345 | sig.setArguments([datatype_call, datatype_index, datatype_args]) 346 | 347 | runCommand(ApplyFunctionSignatureCmd(func_metacall.entryPoint, sig, SourceType.USER_DEFINED)) 348 | 349 | def __build_from_header(self): 350 | self.header = getDataAt(self.data) 351 | 352 | if self.header is None or self.header.getDataType() != self.HEADER_DATATYPE: 353 | self.header = createData(self.data, self.HEADER_DATATYPE) 354 | 355 | if self.header is None: 356 | raise ValueError("no data at %s" % self.data) 357 | 358 | self.class_name = self._get_qstring_at( 359 | self.header.getComponent(self.HEADER_FIELD_CLASSNAME_INDEX).value.getValue()).data.value 360 | 361 | logger.info("found class '%s'" % self.class_name) 362 | 363 | self.methods_count = self.header.getComponent(self.HEADER_FIELD_METHODS_COUNT_INDEX).value.getValue() 364 | self.methods_index = self.header.getComponent(self.HEADER_FIELD_METHODS_INDEX_INDEX).value.getValue() 365 | 366 | self.properties_count = self.header.getComponent(self.HEADER_FIELD_PROPS_COUNT_INDEX).value.getValue() 367 | self.properties_index = self.header.getComponent(self.HEADER_FIELD_PROPS_INDEX_INDEX).value.getValue() 368 | 369 | self.enums_count = self.header.getComponent(self.HEADER_FIELD_ENUMS_COUNT_INDEX).value.getValue() 370 | self.enums_index = self.header.getComponent(self.HEADER_FIELD_ENUMS_INDEX_INDEX).value.getValue() 371 | 372 | self.slots_index = self.header.getComponent(self.HEADER_FIELD_ENUMS_COUNT_INDEX).value.getValue() 373 | self.slots_count = self.header.getComponent(self.HEADER_FIELD_ENUMS_COUNT_INDEX).value.getValue() 374 | 375 | def _get_qstring_at(self, index): 376 | offset = QString.getHeaderSize() * index 377 | address = self.stringdata.add(offset) 378 | 379 | logger.debug("address for string: %s" % address) 380 | 381 | return QString(address) 382 | 383 | def _read_int(self, address): 384 | return getInt(address), address.add(self.get_size_int()) 385 | 386 | def _read_uint(self, address): 387 | return struct.unpack(" 0: 530 | tableDialog.show() 531 | 532 | def find_signals(self): 533 | """The idea here is that QMetaObject::activate() with our MetaObject vtable 534 | will identify all the signals of this object.""" 535 | activate = common.get_function_by_name('activate', namespace='QMetaObject') 536 | 537 | # where activate() is called 538 | xrefs_activate = common.get_functions_via_xref(activate.entryPoint) 539 | 540 | logger.debug("xrefs to activate(): {}".format(xrefs_activate)) 541 | 542 | # where out MetaObject vtable is referenced 543 | xrefs_metavtable_func = [func for call_addr, func in common.get_functions_via_xref(self.address)] 544 | logger.info("xrefs to metatable: {}".format(xrefs_metavtable_func)) 545 | 546 | # where the xrefs are not inside a function (we suppose are to be defined) 547 | undefined = filter(lambda _: _ is None, xrefs_metavtable_func) 548 | 549 | logger.info("TODO: table for {}".format(undefined)) 550 | 551 | import collections 552 | 553 | # we look only for functions where there is only one xref 554 | xrefs_activate_counted = collections.Counter([_[1] for _ in xrefs_activate]) 555 | xrefs_activate = [(call_addr, func) for call_addr, func in xrefs_activate if xrefs_activate_counted[func] == 1] 556 | 557 | logger.debug(xrefs_activate_counted) 558 | 559 | # this will be useful later 560 | klass = self.get_ghidra_class() 561 | 562 | # show the undefined 563 | #if undefined: 564 | # self._show_undefined_functions(undefined) 565 | 566 | # take the xrefs (functions) that are common 567 | xrefs = [(call_addr, func) for call_addr, func in xrefs_activate if func in xrefs_metavtable_func] 568 | 569 | logger.debug(xrefs) 570 | 571 | for call_addr, func in xrefs: 572 | if func is None: 573 | # jump the not defined functions 574 | continue 575 | 576 | """ 577 | TODO: Now the logic to follow would be 578 | 579 | 1. it's the unique call inside that function 580 | 2. the first parameter is a function parameter 581 | """ 582 | # give me the arguments please 583 | 584 | caller = common.getCallerInfo(activate, func, call_addr) 585 | logger.debug(func) 586 | logger.info(caller) 587 | # we obtain the signal name from the index (third argument) 588 | signal_index = caller[3] 589 | signal_name = self.methods[signal_index] 590 | 591 | logger.info("found signal: '{}'".format(signal_name)) 592 | 593 | func.setParentNamespace(klass) 594 | func.setCallingConvention('__thiscall') 595 | 596 | if not func.getName().startswith("FUN_"): 597 | logger.warning("not changing function name since it seems user generated") 598 | continue 599 | 600 | logger.info("renaming {} -> {}".format(func.getName(), signal_name)) 601 | 602 | func.setName(signal_name, SourceType.USER_DEFINED) 603 | 604 | def find_meta_object(self): 605 | """Try to find a cross reference between this class MetaObject vtable 606 | and a call to QObjectData::dynamicMetaObject().""" 607 | 608 | xrefs_metavtable = common.get_functions_via_xref(self.address) 609 | dynamicMetaObject = common.get_function_by_name('dynamicMetaObject') 610 | 611 | xrefs_dynamic = common.get_functions_via_xref(dynamicMetaObject.entryPoint) 612 | xrefs_dynamic_w_func = [_[1] for _ in xrefs_dynamic if _[1] is not None] 613 | 614 | return [_ for _ in xrefs_metavtable if _[1] in xrefs_dynamic_w_func] 615 | 616 | 617 | obj = QMetaObject(currentAddress) 618 | 619 | print obj 620 | -------------------------------------------------------------------------------- /QTRegister.py: -------------------------------------------------------------------------------- 1 | # List qmlregister() arguments 2 | #@author 3 | #@category QT 4 | #@keybinding 5 | #@menupath 6 | #@toolbar 7 | import re 8 | from collections import deque 9 | import logging 10 | 11 | from ghidra.app.decompiler import DecompileOptions 12 | from ghidra.app.decompiler import DecompInterface 13 | from ghidra.util.task import ConsoleTaskMonitor 14 | from ghidra.app.tablechooser import TableChooserExecutor, AddressableRowObject, StringColumnDisplay 15 | from ghidra.program.model.pcode import HighLocal, VarnodeAST, Varnode, PcodeOpAST, HighSymbol, PcodeOp 16 | from ghidra.program.database.function import LocalVariableDB 17 | # https://reverseengineering.stackexchange.com/questions/25322/extracting-info-from-ghidra-listing-window 18 | from ghidra.program.model.listing import CodeUnitFormat, CodeUnitFormatOptions 19 | 20 | import common 21 | 22 | logging.basicConfig() 23 | logger = logging.getLogger(__name__) 24 | logger.setLevel("INFO") 25 | 26 | 27 | FUNC_NAME = 'qmlregister' 28 | 29 | # from qtdeclarative/src/qml/qml/qqmlprivate.h 30 | REGISTRATION_TYPE_DECLARATION = """ 31 | enum 32 | RegistrationType 33 | { 34 | TypeRegistration = 0, 35 | InterfaceRegistration = 1, 36 | AutoParentRegistration = 2, 37 | SingletonRegistration = 3, 38 | CompositeRegistration = 4, 39 | CompositeSingletonRegistration = 5, 40 | QmlUnitCacheHookRegistration = 6, 41 | TypeAndRevisionsRegistration = 7, 42 | SingletonAndRevisionsRegistration = 8 43 | }; 44 | """ 45 | 46 | 47 | # set the formatting output for the listing 48 | # so that we can extract information from it 49 | codeUnitFormat = CodeUnitFormat( 50 | CodeUnitFormatOptions( 51 | CodeUnitFormatOptions.ShowBlockName.ALWAYS, 52 | CodeUnitFormatOptions.ShowNamespace.ALWAYS, 53 | "", 54 | True, 55 | True, 56 | True, 57 | True, 58 | True, 59 | True, 60 | True) 61 | ) 62 | 63 | def get_high_function(func): 64 | options = DecompileOptions() 65 | monitor = ConsoleTaskMonitor() 66 | ifc = DecompInterface() 67 | ifc.setOptions(options) 68 | ifc.openProgram(getCurrentProgram()) 69 | # Setting a simplification style will strip useful `indirect` information. 70 | # Please don't use this unless you know why you're using it. 71 | #ifc.setSimplificationStyle("normalize") 72 | res = ifc.decompileFunction(func, 60, monitor) 73 | high = res.getHighFunction() 74 | return high 75 | 76 | 77 | def get_stack_var_from_varnode(func, varnode): 78 | print "get_stack_var_from_varnode():", varnode, type(varnode) 79 | if type(varnode) not in [Varnode, VarnodeAST]: 80 | raise Exception("Invalid value. Expected `Varnode` or `VarnodeAST`, got {}.".format(type(varnode))) 81 | 82 | bitness_masks = { 83 | '16': 0xffff, 84 | '32': 0xffffffff, 85 | '64': 0xffffffffffffffff, 86 | } 87 | 88 | try: 89 | addr_size = currentProgram.getMetadata()['Address Size'] 90 | bitmask = bitness_masks[addr_size] 91 | except KeyError: 92 | raise Exception("Unsupported bitness: {}. Add a bit mask for this target.".format(addr_size)) 93 | 94 | local_variables = func.getAllVariables() 95 | vndef = varnode.getDef() 96 | if vndef: 97 | vndef_inputs = vndef.getInputs() 98 | for defop_input in vndef_inputs: 99 | defop_input_offset = defop_input.getAddress().getOffset() & bitmask 100 | for lv in local_variables: 101 | unsigned_lv_offset = lv.getMinAddress().getUnsignedOffset() & bitmask 102 | if unsigned_lv_offset == defop_input_offset: 103 | return lv 104 | 105 | # If we get here, varnode is likely a "acStack##" variable. 106 | hf = get_high_function(func) 107 | lsm = hf.getLocalSymbolMap() 108 | for vndef_input in vndef_inputs: 109 | defop_input_offset = vndef_input.getAddress().getOffset() & bitmask 110 | for symbol in lsm.getSymbols(): 111 | if symbol.isParameter(): 112 | continue 113 | if defop_input_offset == symbol.getStorage().getFirstVarnode().getOffset() & bitmask: 114 | return symbol 115 | 116 | # unable to resolve stack variable for given varnode 117 | return None 118 | 119 | 120 | def get_vars_from_varnode(func, node, variables=None): 121 | print "get_get_vars_from_varnode():", node, type(node) 122 | if type(node) not in [PcodeOpAST, VarnodeAST]: 123 | raise Exception("Invalid value passed. Got {}.".format(type(node))) 124 | 125 | # create `variables` list on first call. Do not make `variables` default to []. 126 | if variables == None: 127 | variables = [] 128 | 129 | # We must use `getDef()` on VarnodeASTs 130 | if type(node) == VarnodeAST: 131 | print " from addr:", node.getPCAddress() 132 | # For `get_stack_var_from_varnode` see: 133 | # https://github.com/HackOvert/GhidraSnippets 134 | # Ctrl-F for "get_stack_var_from_varnode" 135 | var = get_stack_var_from_varnode(func, node) 136 | if var and type(var) != HighSymbol: 137 | variables.append(var) 138 | node = node.getDef() 139 | if node: 140 | variables = get_vars_from_varnode(func, node, variables) 141 | # We must call `getInputs()` on PcodeOpASTs 142 | elif type(node) == PcodeOpAST: 143 | print " from addr:", node.getSeqnum() 144 | nodes = list(node.getInputs()) 145 | for node in nodes: 146 | if type(node.getHigh()) == HighLocal: 147 | variables.append(node.getHigh()) 148 | else: 149 | variables = get_vars_from_varnode(func, node, variables) 150 | return variables 151 | 152 | 153 | """ 154 | The table code is inspired from here 155 | """ 156 | class ArgumentsExecutor(TableChooserExecutor): 157 | def execute(self, rowObject): 158 | return True 159 | 160 | def getButtonName(self): 161 | return "I'm late!" 162 | 163 | 164 | class Argument(AddressableRowObject): 165 | def __init__(self, row): 166 | # using "address" raises "AttributeError: read-only attr: address" 167 | self.row = row 168 | 169 | def getAddress(self): 170 | return self.row[0] 171 | 172 | 173 | class TypeColumn(StringColumnDisplay): 174 | def getColumnName(self): 175 | return u"Type" 176 | 177 | def getColumnValue(self, row): 178 | return row.row[1] 179 | 180 | 181 | class ClassNameColumn(StringColumnDisplay): 182 | def getColumnName(self): 183 | return u"Class" 184 | 185 | def getColumnValue(self, row): 186 | return row.row[2] 187 | 188 | 189 | def getXref(func): 190 | target_addr = func.entryPoint 191 | references = getReferencesTo(target_addr) 192 | callers = [] 193 | for xref in references: 194 | call_addr = xref.getFromAddress() 195 | caller = getFunctionContaining(call_addr) 196 | callers.append(caller) 197 | return list(set(callers)) 198 | 199 | 200 | 201 | 202 | def getSymbolFromAnnotation(annotation): 203 | """The label referenced from an instruction is something like 204 | 205 | prefix_address+offset 206 | """ 207 | match = re.match(r"(.+?) (r.+)=>(.+?):(.+?),\[sp,", annotation) 208 | 209 | if not match: 210 | print "annotation failed:", annotation 211 | return None 212 | 213 | match = match.group(4) 214 | 215 | try: 216 | offset_plus = match.index("+") 217 | except ValueError: 218 | return getSymbol(match, None) 219 | 220 | return None 221 | 222 | 223 | def getCallerInfo(func, caller, options = DecompileOptions(), ifc = DecompInterface()): 224 | print("function: '%s'" % caller.name) 225 | 226 | target_addr = func.entryPoint 227 | 228 | ifc.setOptions(options) 229 | ifc.openProgram(currentProgram) 230 | 231 | # prog_ctx = currentProgram.getProgramContext() 232 | 233 | monitor = ConsoleTaskMonitor() 234 | res = ifc.decompileFunction(caller, 60, monitor) 235 | high_func = res.getHighFunction() 236 | lsm = high_func.getLocalSymbolMap() 237 | markup = res.getCCodeMarkup() 238 | 239 | symbols = lsm.getSymbols() 240 | stack_frame = caller.getStackFrame() 241 | ref_mgr = currentProgram.getReferenceManager() 242 | 243 | results = [] 244 | 245 | if high_func: 246 | opiter = high_func.getPcodeOps() 247 | 248 | while opiter.hasNext(): 249 | op = opiter.next() 250 | mnemonic = str(op.getMnemonic()) 251 | if mnemonic == "CALL": 252 | inputs = op.getInputs() 253 | 254 | # we are going to save the argument of the requested call 255 | # but we are not interested to the address that is the inputs[0] 256 | # argument from the PcodeOp 257 | calling_args = [0] * (len(inputs) - 1) 258 | 259 | addr = inputs[0].getAddress() 260 | args = inputs[1:] # List of VarnodeAST types 261 | 262 | if addr == target_addr: 263 | source_addr = op.getSeqnum().getTarget() 264 | 265 | print("Call to {} at {} has {} arguments: {}".format(addr, source_addr, len(args), args)) 266 | 267 | for pos, arg in enumerate(args): 268 | # var = arg.getHigh() 269 | # print "var", var, var.getSymbol(), var.getDataType() 270 | # print "lsm", lsm.findLocal(arg.getAddress(), None) 271 | 272 | if pos != 0: 273 | print "initial arg%d: %s" % (pos, arg) 274 | refined = get_vars_from_varnode(caller, arg) 275 | 276 | if len(refined) > 0: 277 | refined = refined[0] 278 | print "found variable '%s' for arg%d" % (refined, pos) 279 | # print refined, type(refined) 280 | """ 281 | 282 | print "symbol", refined.getSymbol(), refined.getSymbol().getAddress(), dir(refined.getSymbol()), refined.getSymbol().getSymbolType() 283 | print "address", refined.getLastStorageVarnode().getAddress() 284 | print "high", refined.getLastStorageVarnode().getHigh() 285 | # print "getDef()", refined.getDef() 286 | print "last", refined.getFirstStorageVarnode().getDef() 287 | print "stack", stack_frame.getVariableContaining(refined.getStackOffset()) 288 | print "references", '\n'.join([str(_) for _ in ref_mgr.getReferencesTo(refined)]) 289 | """ 290 | # print "auaua", [(_.getFromAddress().getOffset(), _.getStackOffset()) for _ in ref_mgr.getReferencesTo(refined) if 291 | # _.getFromAddress() < source_addr] 292 | # here we are going to create an ordered list with all the references to the given variable 293 | # that happen before the call and return only the last one that hopefully is the one 294 | # setting the value 295 | # Bad enough this is a struct so the variable points to the start address of the struct 296 | # if you want a specific field you have to add its offset 297 | offset_field = refined.getStackOffset() + refined.getDataType().getComponent(4).getOffset() 298 | # print "offset_field", offset_field 299 | refs = sorted([(_.getFromAddress().getOffset(), _) 300 | for _ in ref_mgr.getReferencesTo(refined) 301 | if _.getFromAddress() < source_addr 302 | and _.getStackOffset() == offset_field], 303 | key = lambda _ : _[0])[-1] 304 | 305 | instr = getInstructionAt(refs[1].getFromAddress()) 306 | #print "op before", refs, refs[1] 307 | #print "instr:", instr, instr.getPcode(), instr.getDefaultOperandRepresentation(0) 308 | annotation = codeUnitFormat.getRepresentationString(instr) 309 | # print "annotation", annotation 310 | from_annotation = getSymbolFromAnnotation(annotation) 311 | print "symbol from annotations", from_annotation 312 | 313 | rX = instr.getRegister(0) 314 | 315 | # print "instr+reg", rX, instr.getInstructionContext().getRegisterValue(rX) 316 | 317 | pcode = instr.getPcode()[1] 318 | 319 | # print "pcode:", pcode, pcode.getSeqnum().getTarget() 320 | 321 | if pcode.getOpcode() != PcodeOp.STORE: 322 | raise ValueError("I was expecting a STORE operation here") 323 | 324 | value = pcode.getInput(1) 325 | 326 | # print "value", value, value.getAddress(), value.getDef(), value.getDescendants() 327 | 328 | #c_line = getCLine(markup, pcode.getSeqnum().getTarget()) 329 | #print "C code", c_line 330 | 331 | output = getDataAt(from_annotation.getAddress()) if from_annotation else None 332 | 333 | calling_args[pos] = output 334 | 335 | 336 | continue # we exit since our job is finished 337 | while arg.getDef().getOpcode() == PcodeOp.CAST: 338 | arg = arg.getDef().getInput(0) 339 | 340 | # OK, this is a little weird, but PTRSUBs with first arg == 0 341 | # are (usually) global variables at address == second arg 342 | if arg.getDef().getOpcode() == PcodeOp.PTRSUB: 343 | arg = arg.getDef().getInput(1) 344 | elif arg.getDef().getOpcode() == PcodeOp.COPY: 345 | arg = arg.getDef().getInput(0) 346 | else: 347 | raise ValueError("I was not expection that") 348 | 349 | print("arg%d: %08x" % (pos, arg.getAddress().getOffset())) 350 | else: 351 | print("arg0: %d" % int(arg.getAddress().getOffset())) 352 | 353 | calling_args[pos] = arg.getAddress().getOffset() 354 | 355 | calling_args.insert(0, source_addr) 356 | 357 | # remember: it's possible we have more than one call to the same function 358 | results.append(calling_args) 359 | 360 | return results 361 | 362 | 363 | datatype_registerSingletonType = getDataTypes('RegisterSingletonType') 364 | if len(datatype_registerSingletonType) > 1: 365 | raise ValueError("You must have only one RegisterSingletonType data type") 366 | 367 | datatype_registerSingletonType = datatype_registerSingletonType[0] 368 | 369 | def get_string_from_stack(variable, source_addr): 370 | """Try to extract the string pointed by the struct on the struct""" 371 | if type(variable) != LocalVariableDB: 372 | logger.warning("variable {} from {} is not a local variable, nothing to do here".format(variable, source_addr)) 373 | return None 374 | 375 | if variable.getDataType() != datatype_registerSingletonType: 376 | logger.warning("variable {} from {} is not a RegisterSingletonType but {}, nothing to do here".format( 377 | variable, source_addr, variable.getDataType())) 378 | return None 379 | 380 | if not variable.isStackVariable(): 381 | logger.warning("variable {} from {} is not a stack variable, nothing to do here".format(variable, source_addr)) 382 | return None 383 | 384 | ref_mgr = currentProgram.getReferenceManager() 385 | offset_field = variable.getStackOffset() + variable.getDataType().getComponent(4).getOffset() 386 | # print "offset_field", offset_field 387 | refs = sorted([(_.getFromAddress().getOffset(), _) 388 | for _ in ref_mgr.getReferencesTo(variable) 389 | if _.getFromAddress() < source_addr 390 | and _.getStackOffset() == offset_field], 391 | key=lambda _: _[0])[-1] 392 | 393 | instr = getInstructionAt(refs[1].getFromAddress()) 394 | # print "op before", refs, refs[1] 395 | # print "instr:", instr, instr.getPcode(), instr.getDefaultOperandRepresentation(0) 396 | annotation = codeUnitFormat.getRepresentationString(instr) 397 | # print "annotation", annotation 398 | from_annotation = getSymbolFromAnnotation(annotation) 399 | print "symbol from annotations", from_annotation 400 | 401 | rX = instr.getRegister(0) 402 | 403 | # print "instr+reg", rX, instr.getInstructionContext().getRegisterValue(rX) 404 | 405 | pcode = instr.getPcode()[1] 406 | 407 | # print "pcode:", pcode, pcode.getSeqnum().getTarget() 408 | 409 | if pcode.getOpcode() != PcodeOp.STORE: 410 | raise ValueError("I was expecting a STORE operation here") 411 | 412 | value = pcode.getInput(1) 413 | 414 | # print "value", value, value.getAddress(), value.getDef(), value.getDescendants() 415 | 416 | # c_line = getCLine(markup, pcode.getSeqnum().getTarget()) 417 | # print "C code", c_line 418 | 419 | output = getDataAt(from_annotation.getAddress()) if from_annotation else None 420 | 421 | return output 422 | 423 | 424 | def get_function_by_name(name): 425 | """Little hacky way of finding the function by name since getFunction() by FlatAPI 426 | doesn't work.""" 427 | candidates = [_ for _ in currentProgram.getFunctionManager().getFunctionsNoStubs(True) if name == _.name] 428 | 429 | if len(candidates) > 1: 430 | raise ValueError("We expected to find only one of '%s'" % name) 431 | 432 | return candidates[0] 433 | 434 | 435 | 436 | 437 | 438 | 439 | 440 | def check(): 441 | """Check that the data types we are supposed to use exist""" 442 | RegisterSingletonType = getDataTypes('RegisterSingletonType')[0] 443 | 444 | if RegisterSingletonType is None: 445 | print "creating 'RegisterSingletonType'" 446 | 447 | check_and_create('RegistrationType', REGISTRATION_TYPE_DECLARATION) 448 | 449 | 450 | def main(): 451 | check() 452 | QMLREGISTER = get_function_by_name(FUNC_NAME) 453 | print "Found '%s' at %s" % (FUNC_NAME, QMLREGISTER.entryPoint) 454 | 455 | tableDialog = createTableChooserDialog("qmlregister() calls", ArgumentsExecutor(), False) 456 | tableDialog.addCustomColumn(TypeColumn()) 457 | tableDialog.addCustomColumn(ClassNameColumn()) 458 | 459 | results = [] 460 | 461 | for caller in getXref(QMLREGISTER): 462 | if caller is None: 463 | continue 464 | results.extend(common.getCallerInfo(QMLREGISTER, caller)) 465 | 466 | for addr_source, registrationType, var_struct in results: 467 | klass_name = get_string_from_stack(var_struct, addr_source) 468 | tableDialog.add(Argument([addr_source, registrationType, klass_name])) 469 | 470 | tableDialog.show() 471 | 472 | main() -------------------------------------------------------------------------------- /QTResource.py: -------------------------------------------------------------------------------- 1 | # Extract QML resource files 2 | #@author Gianluca Pacchiella 3 | #@category QT 4 | #@keybinding 5 | #@menupath 6 | #@toolbar 7 | """ 8 | Resources on QT are registered using 9 | 10 | bool qRegisterResourceData(int version, const unsigned char *tree, 11 | const unsigned char *name, const unsigned char *data) 12 | 13 | This script looks for calls to this function and after extracting the arguments tries to 14 | rebuild the resource files. 15 | 16 | "tree" is the data structure containing the description of the filesystem tree, how many child 17 | a node has (like a filesystem does); "name" and "data" contain the actual identifier for a given node 18 | and the data associated (but take in mind that the data is only for the leaf node, i.e. the files). 19 | 20 | To understand the layout of the data look at the method 21 | 22 | RCCResourceLibrary::output() 23 | 24 | and the methods it internally calls 25 | 26 | writeDataBlobs() 27 | writeDataNames() 28 | writeDataStructure() 29 | """ 30 | import logging 31 | import os 32 | import struct 33 | import zlib 34 | from collections import deque 35 | 36 | from ghidra.program.model.data import ( 37 | UnsignedIntegerDataType, 38 | ShortDataType, 39 | StructureDataType, 40 | UnsignedLongLongDataType, 41 | EndianSettingsDefinition, 42 | DataTypeConflictHandler, 43 | CategoryPath, 44 | ) 45 | 46 | import common 47 | 48 | 49 | logging.basicConfig() 50 | logger = logging.getLogger(__name__) 51 | logger.setLevel("INFO") 52 | 53 | # first of all we get the data type we want 54 | # remember all the fields are big endian 55 | # probably should be created programmatically 56 | RESOURCE_STRUCT_DECLARATION = """ 57 | struct resource_struct_t { 58 | unsigned int name_offset; 59 | short flags; 60 | unsigned int mix; 61 | unsigned int offset; 62 | unsigned long long lastmod; 63 | }; 64 | """ 65 | 66 | 67 | def create(): 68 | """Create programmatically the struct we need. 69 | 70 | In theory this data type doesn't contain anything exotic BUT since 71 | it's unaligned (because of the short) the CParser add unwanted padding.""" 72 | logger.warning("creating resource_struct_t") 73 | resource = StructureDataType("resource_struct_t", 0) 74 | 75 | resource.setCategoryPath(CategoryPath("/script")) 76 | 77 | resource.add(UnsignedIntegerDataType.dataType, 0, "name_offset", "") 78 | resource.add(ShortDataType.dataType, 0, "flags", "") 79 | resource.add(UnsignedIntegerDataType.dataType, 0, "mix", "") 80 | resource.add(UnsignedIntegerDataType.dataType, 0, "offset", "") 81 | resource.add(UnsignedLongLongDataType.dataType, 0, "lastmod", "") 82 | 83 | # you have to save it 84 | dtm = currentProgram.getDataTypeManager() 85 | dtm.addDataType(resource, DataTypeConflictHandler.REPLACE_HANDLER) 86 | 87 | resource = dtm.getDataType("/script/resource_struct_t") 88 | 89 | # then fix the endianess 90 | # https://reverseengineering.stackexchange.com/questions/23330/ghidra-python-create-struct-with-big-endian-field 91 | for component in resource.getComponents(): 92 | component_settings = component.getDefaultSettings() 93 | component_settings.setLong('endian', EndianSettingsDefinition.BIG) 94 | 95 | # and then requery again 96 | return dtm.getDataType("/script/resource_struct_t") 97 | 98 | 99 | def get(): 100 | dtm = currentProgram.getDataTypeManager() 101 | 102 | resource_struct_t = dtm.getDataType("/script/resource_struct_t") 103 | 104 | if not resource_struct_t: 105 | return create(), True 106 | 107 | return resource_struct_t, False 108 | 109 | 110 | class RCCFileInfoNode: 111 | # value taken from rcc.cpp, class RCCFileInfo 112 | COMPRESSED = 1 113 | DIRECTORY = 2 114 | COMPRESSED_ZSTD = 4 115 | 116 | def __init__(self, name, is_dir, parent=None, **kwargs): 117 | self.name = name 118 | self.is_dir = is_dir 119 | self.parent = parent 120 | 121 | self.is_compressed = kwargs['is_compressed'] 122 | 123 | if is_dir: 124 | self.child_offset = kwargs['child_offset'] 125 | self.child_size = kwargs['child_size'] 126 | else: 127 | self.data = kwargs['data'] 128 | 129 | def __str__(self): 130 | return self.name 131 | 132 | 133 | class QResourceRoot: 134 | """The instance holds the base addresses for tree, names and data from which 135 | obtain the node information.""" 136 | def __init__(self, addr_root, addr_names, addr_data): 137 | self.root = addr_root 138 | self.names = addr_names 139 | self.data = addr_data 140 | 141 | self.resource_struct, _ = get() 142 | 143 | @staticmethod 144 | def __build(node, parent=None): 145 | data = { 146 | 'name_offset': node.getComponent(0).value.value, 147 | 'flags': node.getComponent(1).value.value, 148 | 'lastmod': node.getComponent(4), 149 | } 150 | 151 | data['is_dir'] = bool(data['flags'] & RCCFileInfoNode.DIRECTORY) 152 | data['is_compressed'] = bool(data['flags'] & RCCFileInfoNode.COMPRESSED) 153 | 154 | if data['is_dir']: 155 | data['child_size'] = node.getComponent(2).value.value 156 | data['child_offset'] = node.getComponent(3).value.value 157 | else: 158 | data['data_offset'] = node.getComponent(3).value.value 159 | 160 | # print data 161 | 162 | return data 163 | 164 | def __get_name(self, name_offset): 165 | size = common.get_bytes_from_binary(self.names.add(name_offset), 2) 166 | size = struct.unpack(">h", size)[0] 167 | # we are asking for unicode so we double the data 168 | return common.get_bytes_from_binary(self.names.add(name_offset + 2 + 4), size * 2).decode('utf16') 169 | 170 | def __get_data(self, data_offset, is_compressed): 171 | """The data part has the length of the data is exposed as a 4 byte Big ending value.""" 172 | size = common.get_bytes_from_binary(self.data.add(data_offset), 4) 173 | size = struct.unpack(">I", size)[0] 174 | 175 | # print "getting #{} bytes of data from offset {}".format(size, data_offset) 176 | 177 | # we have two possibilities: 178 | # 1. the data is as is, so we jump the size and use that size as is 179 | # 2. the data is compressed, qCompress() add another 4bytes big-endian 180 | # at the start of the compressed blob with the original file size 181 | # and then the compressed blob itself (so we have to reduce by 4 the 182 | # original size). 183 | offset = 8 if is_compressed else 4 184 | size = size - 4 if is_compressed else size 185 | data = common.get_bytes_from_binary(self.data.add(data_offset + offset), size) 186 | 187 | # if it's compressed you can decompress it via zlib 188 | return zlib.decompress(data) if is_compressed else data 189 | 190 | def __create_data(self, address): 191 | logger.info("Creating data @ {}".format(address)) 192 | return createData(address, self.resource_struct) 193 | 194 | def build_from_address(self, address): 195 | _data = getDataAt(address) 196 | 197 | if _data is None: 198 | _data = self.__create_data(address) 199 | elif _data.getDataType() != self.resource_struct: 200 | logger.warning("cleaning {} at {}".format(_data.getDataType(), address)) 201 | removeData(_data) 202 | _data = self.__create_data(address) 203 | 204 | data = self.__class__.__build(_data) 205 | 206 | name_offset = data.pop('name_offset') 207 | 208 | if not data['is_dir']: 209 | data['data'] = self.__get_data(data['data_offset'], data['is_compressed']) 210 | 211 | return RCCFileInfoNode( 212 | self.__get_name(name_offset), 213 | **data 214 | ) 215 | 216 | def address_for_offset(self, offset): 217 | return self.root.add(self.resource_struct.length*offset) 218 | 219 | def node_at(self, offset): 220 | return self.build_from_address(self.address_for_offset(offset)) 221 | 222 | def get_child_of(self, node): 223 | offset_start = node.child_offset 224 | count = node.child_size 225 | 226 | childs = [] 227 | 228 | for offset in range(offset_start, offset_start + count): 229 | child = self.node_at(offset) 230 | child.parent = node 231 | childs.append(child) 232 | 233 | return childs 234 | 235 | 236 | def dump_file(node, path_root): 237 | # save the original node that is the lead of the tree 238 | file = node 239 | 240 | # if it's a file it has a parent directory (probably?) 241 | if node.parent is not None: 242 | components = [node] 243 | while node.parent is not None: 244 | node = node.parent 245 | components.append(node) 246 | 247 | # build the complete path 248 | components.reverse() 249 | path = "/".join([str(_) for _ in components]) 250 | 251 | logger.info("saving {} ({}compressed)".format(path, "" if file.is_compressed else "no ")) 252 | 253 | # append to the path chosen for the dump 254 | path = os.path.join(path_root, path) 255 | 256 | # check if the directory that will contain the file exists 257 | # and create in case doesn't 258 | dir_containing = os.path.dirname(path) 259 | if not os.path.exists(dir_containing): 260 | os.makedirs(dir_containing) 261 | 262 | # save the data 263 | with open(path, "wb") as output: 264 | output.write(file.data) 265 | 266 | 267 | def dump_root(path_dump, struct_addr, name_addr, data_addr): 268 | ROOT = QResourceRoot(struct_addr, name_addr, data_addr) 269 | nodes = deque() 270 | 271 | root = ROOT.node_at(0) 272 | 273 | nodes.append(root) 274 | 275 | while len(nodes) > 0: 276 | node = nodes.pop() 277 | # print "found node {}".format(node) 278 | 279 | if not node.is_dir: 280 | dump_file(node, path_dump.path) 281 | continue 282 | 283 | nodes.extend(ROOT.get_child_of(node)) 284 | 285 | 286 | def main(): 287 | path_dump = askDirectory("Choose a directory where we'll dump the source tree", "Ok") 288 | 289 | qRegisterResourceData = common.get_function_by_name('qRegisterResourceData') 290 | 291 | for call_address, function in common.get_functions_via_xref(qRegisterResourceData.entryPoint): 292 | if not function: 293 | continue 294 | 295 | try: 296 | info = common.getCallerInfo(qRegisterResourceData, function, call_address) 297 | except: 298 | raise ValueError("Probably the arguments at {} are not 'clean' enough".format(call_address)) 299 | 300 | dump_root(path_dump, *map(toAddr, info[2:])) 301 | 302 | 303 | main() -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import jarray 3 | 4 | from ghidra.app.decompiler import DecompileOptions 5 | from ghidra.app.decompiler import DecompInterface 6 | from ghidra.util.task import ConsoleTaskMonitor 7 | from ghidra.program.model.pcode import HighLocal, VarnodeAST, Varnode, PcodeOpAST, HighSymbol, PcodeOp 8 | from ghidra.program.model.pcode import HighFunctionDBUtil 9 | from ghidra.program.model.symbol import SourceType 10 | from ghidra.program.model.data import CategoryPath 11 | from ghidra.app.tablechooser import TableChooserExecutor, AddressableRowObject, StringColumnDisplay 12 | 13 | 14 | # this below allows to have the global objects as in the scripts themself 15 | # see 16 | from __main__ import * 17 | 18 | logging.basicConfig() 19 | logger = logging.getLogger(__name__) 20 | logger.setLevel("INFO") 21 | 22 | 23 | def get_bytes_from_binary(address, length): 24 | v = jarray.zeros(length, 'b') 25 | currentProgram.getMemory().getBytes(address, v) 26 | 27 | return v.tostring() 28 | 29 | 30 | def get_or_set(address, datatype): 31 | """get the data at the given address checking that 32 | the datatype is the right one. If it's not, clear the data and 33 | set the correct datatype.""" 34 | data = getDataAt(address) 35 | 36 | if data and data.dataType == datatype: # FIXME: for PointerDataType this isn't working 37 | return data 38 | 39 | logger.info('creating data of type {} @{}'.format(datatype, address)) 40 | 41 | return createData(address, datatype) 42 | 43 | 44 | def get_value(address, datatype): 45 | """Get the actual value wrapped by the data type.""" 46 | data = get_or_set(address, datatype) 47 | 48 | return data.getValue() 49 | 50 | 51 | # from 52 | def createDataTypeFromC(declaration, category=None): 53 | """This works only for very simple data types that use "primitive" types.""" 54 | from ghidra.app.util.cparser.C import CParser 55 | from ghidra.program.model.data import DataTypeConflictHandler 56 | 57 | dtm = currentProgram.getDataTypeManager() 58 | parser = CParser(dtm) 59 | 60 | new_dt = parser.parse(declaration) 61 | new_dt.setDescription("created from script") 62 | 63 | if category: 64 | new_dt.setCategoryPath(CategoryPath(category)) 65 | 66 | transaction = dtm.startTransaction("Adding new data") 67 | 68 | dtm.addDataType(new_dt, None) 69 | dtm.endTransaction(transaction, True) 70 | 71 | 72 | def check_and_create(datatype_name, declaration, category=None): 73 | """Check that the datatype exists and if not create it from C declaration. 74 | 75 | It returns the tuple (data type, boolean) where the last one indicates 76 | with the data type was created.""" 77 | logger.info("check_and_create() for '%s'" % datatype_name) 78 | 79 | dataTypes = getDataTypes(datatype_name) 80 | 81 | is_created = False 82 | 83 | if len(dataTypes) == 0 or dataTypes[0].isNotYetDefined(): 84 | logger.warning("creating '%s'" % datatype_name) 85 | createDataTypeFromC(declaration, category) 86 | dataTypes = getDataTypes(datatype_name) 87 | is_created = True 88 | 89 | return dataTypes[0], is_created 90 | 91 | 92 | def cache(f): 93 | _memo = {} 94 | 95 | def _helper(x): 96 | if x not in _memo: 97 | _memo[x] = f(x) 98 | return _memo[x] 99 | return _helper 100 | 101 | 102 | def getCLine(c_markup, address): 103 | """Try to find the line in the C code for the given address""" 104 | # c_markup is a ClangTokenGroup 105 | 106 | queue = deque() 107 | 108 | queue.append(c_markup) 109 | 110 | while True: 111 | tmp = queue.pop() 112 | 113 | if tmp.getMinAddress() == address and tmp.getMaxAddress() == address: 114 | return tmp 115 | 116 | filtered = [(n, _) for n, _ in enumerate(list(tmp)) 117 | if _.getMinAddress() is not None and _.getMinAddress() <= address and _.getMaxAddress() >= address] 118 | 119 | for index, node in filtered: 120 | queue.append(tmp.Child(index)) 121 | 122 | 123 | def get_function_by_name(name, namespace=None, external=False): 124 | """Little hacky way of finding the function by name since getFunction() by FlatAPI 125 | doesn't work.""" 126 | 127 | functionManager = currentProgram.getFunctionManager() 128 | 129 | functions = functionManager.getFunctionsNoStubs(True) if not external else functionManager.getExternalFunctions() 130 | 131 | candidates = [_ for _ in functions if name == _.name] 132 | 133 | if namespace: 134 | candidates = [_ for _ in candidates if _.getParentNamespace().getName() == namespace] 135 | 136 | if len(candidates) != 1: 137 | raise ValueError("We expected to find only one of '%s' instead we have %s" % (name, candidates)) 138 | 139 | return candidates[0] 140 | 141 | 142 | def decompile_function(func): 143 | options = DecompileOptions() 144 | monitor = ConsoleTaskMonitor() 145 | ifc = DecompInterface() 146 | ifc.setOptions(options) 147 | # Setting a simplification style will strip useful `indirect` information. 148 | # Please don't use this unless you know why you're using it. 149 | ifc.setSimplificationStyle("normalize") 150 | 151 | ifc.openProgram(getCurrentProgram()) 152 | return ifc.decompileFunction(func, 60, monitor) 153 | 154 | 155 | def get_high_function(func): 156 | res = decompile_function(func) 157 | high = res.getHighFunction() 158 | 159 | return high 160 | 161 | 162 | def get_stack_var_from_varnode(func, varnode): 163 | logger.debug("get_stack_var_from_varnode(): %s | %s" % (varnode, type(varnode))) 164 | if type(varnode) not in [Varnode, VarnodeAST]: 165 | raise Exception("Invalid value. Expected `Varnode` or `VarnodeAST`, got {}.".format(type(varnode))) 166 | 167 | bitness_masks = { 168 | '16': 0xffff, 169 | '32': 0xffffffff, 170 | '64': 0xffffffffffffffff, 171 | } 172 | 173 | addr_size = currentProgram.getMetadata()['Address Size'] 174 | 175 | try: 176 | bitmask = bitness_masks[addr_size] 177 | except KeyError: 178 | raise Exception("Unsupported bitness: {}. Add a bit mask for this target.".format(addr_size)) 179 | 180 | local_variables = func.getAllVariables() 181 | vndef = varnode.getDef() # .getDef() -> PcodeOp 182 | 183 | if vndef: 184 | vndef_inputs = vndef.getInputs() # -> Varnode[] 185 | for defop_input in vndef_inputs: 186 | defop_input_offset = defop_input.getAddress().getOffset() & bitmask 187 | for lv in local_variables: 188 | unsigned_lv_offset = lv.getMinAddress().getUnsignedOffset() & bitmask 189 | if unsigned_lv_offset == defop_input_offset: 190 | logger.debug(" found stack A: {} (symbol: {})".format(lv, lv.getSymbol())) 191 | return lv 192 | 193 | # If we get here, varnode is likely a "acStack##" variable. 194 | hf = get_high_function(func) 195 | lsm = hf.getLocalSymbolMap() 196 | 197 | for vndef_input in vndef_inputs: 198 | defop_input_offset = vndef_input.getAddress().getOffset() & bitmask 199 | for symbol in lsm.getSymbols(): 200 | if symbol.isParameter(): 201 | continue 202 | if defop_input_offset == symbol.getStorage().getFirstVarnode().getOffset() & bitmask: 203 | logger.debug(" found stack B: {}".format(lv)) 204 | return symbol 205 | 206 | # unable to resolve stack variable for given varnode 207 | logger.debug("no stack variable found") 208 | return None 209 | 210 | 211 | def get_vars_from_varnode(func, node, variables=None): 212 | logger.debug("get_vars_from_varnode(): %s | %s" % (node, type(node))) 213 | if type(node) not in [PcodeOpAST, VarnodeAST]: 214 | raise Exception("Invalid value passed. Got {}.".format(type(node))) 215 | 216 | # create `variables` list on first call. Do not make `variables` default to []. 217 | if variables is None: 218 | variables = [] 219 | 220 | # We must use `getDef()` on VarnodeASTs 221 | if type(node) == VarnodeAST: 222 | logger.debug(" VarnodeAST from addr: {}".format(node.getPCAddress())) 223 | # For `get_stack_var_from_varnode` see: 224 | # https://github.com/HackOvert/GhidraSnippets 225 | # Ctrl-F for "get_stack_var_from_varnode" 226 | var = get_stack_var_from_varnode(func, node) 227 | if var and type(var) != HighSymbol: 228 | variables.append(var) 229 | 230 | node = node.getDef() 231 | if node: 232 | variables = get_vars_from_varnode(func, node, variables) 233 | # We must call `getInputs()` on PcodeOpASTs 234 | elif type(node) == PcodeOpAST: 235 | logger.debug(" PcodeOpAST from addr: {}".format(node.getSeqnum())) 236 | nodes = list(node.getInputs()) 237 | 238 | for node in nodes: 239 | if type(node.getHigh()) == HighLocal: 240 | variables.append(node.getHigh()) 241 | else: 242 | variables = get_vars_from_varnode(func, node, variables) 243 | 244 | if not variables: 245 | logger.debug("get_vars_from_varnode() returned nothing") 246 | 247 | return variables 248 | 249 | 250 | def getXref(func): 251 | target_addr = func.entryPoint 252 | references = getReferencesTo(target_addr) 253 | callers = [] 254 | for xref in references: 255 | call_addr = xref.getFromAddress() 256 | caller = getFunctionContaining(call_addr) 257 | callers.append(caller) 258 | return list(set(callers)) 259 | 260 | 261 | def get_functions_via_xref(target_addr): 262 | """return the xrefs defined towards the target_addr as a list 263 | having as entries couple of the form (call_addr, calling function) 264 | where the latter is None when is not defined.""" 265 | references = getReferencesTo(target_addr) 266 | callers = [] 267 | for xref in references: 268 | call_addr = xref.getFromAddress() 269 | caller = getFunctionContaining(call_addr) 270 | 271 | if caller is None: 272 | logger.debug("found reference to undefined at {}".format(call_addr)) 273 | 274 | callers.append((call_addr, caller)) 275 | 276 | return callers 277 | 278 | 279 | def _getCountedXrefs(target_addr): 280 | from collections import Counter 281 | xrefs = get_functions_via_xref(target_addr) 282 | 283 | return Counter([_[1] for _ in xrefs]) 284 | 285 | 286 | # cache use by getCallerInfo() to avoid calling get_high_function() over and over 287 | _hf_cache = {} 288 | 289 | 290 | def getCallerInfo(func, caller, call_address, options = DecompileOptions(), ifc = DecompInterface()): 291 | logger.debug("function: '%s'" % caller.name) 292 | 293 | if caller not in _hf_cache: 294 | _hf_cache[caller] = get_high_function(caller) 295 | 296 | high_func = _hf_cache[caller] 297 | 298 | # we need to commit the local variable in order to see them 299 | # and make the following analysis working 300 | HighFunctionDBUtil.commitLocalNamesToDatabase(high_func, SourceType.USER_DEFINED) 301 | 302 | # lsm = high_func.getLocalSymbolMap() 303 | # markup = res.getCCodeMarkup() 304 | opiter = high_func.getPcodeOps(call_address) 305 | op = opiter.next() 306 | inputs = op.getInputs() 307 | 308 | # we are going to save the argument of the requested call 309 | # but we are not interested to the address that is the inputs[0] 310 | # argument from the PcodeOp 311 | calling_args = [0] * (len(inputs) - 1) 312 | 313 | addr = inputs[0].getAddress() 314 | args = inputs[1:] # List of VarnodeAST types 315 | 316 | source_addr = op.getSeqnum().getTarget() 317 | 318 | logger.debug("Call to {} at {} has {} arguments: {}".format(addr, source_addr, len(args), args)) 319 | 320 | for pos, arg in enumerate(args): 321 | # var = arg.getHigh() 322 | # print "var", var, var.getSymbol(), var.getDataType() 323 | # print "lsm", lsm.findLocal(arg.getAddress(), None) 324 | 325 | logger.debug("initial arg%d: %s" % (pos, arg)) 326 | refined = get_vars_from_varnode(caller, arg) 327 | 328 | if len(refined) > 0: 329 | logger.debug("found variable '%s' for arg%d" % (refined, pos)) 330 | refined = refined[0] 331 | logger.debug("{} with type {} (symbol: {})".format(refined, type(refined), refined.getSymbol())) 332 | 333 | calling_args[pos] = refined 334 | continue 335 | """ 336 | 337 | print "symbol", refined.getSymbol(), refined.getSymbol().getAddress(), dir(refined.getSymbol()), refined.getSymbol().getSymbolType() 338 | print "address", refined.getLastStorageVarnode().getAddress() 339 | print "high", refined.getLastStorageVarnode().getHigh() 340 | # print "getDef()", refined.getDef() 341 | print "last", refined.getFirstStorageVarnode().getDef() 342 | print "stack", stack_frame.getVariableContaining(refined.getStackOffset()) 343 | print "references", '\n'.join([str(_) for _ in ref_mgr.getReferencesTo(refined)]) 344 | """ 345 | # print "auaua", [(_.getFromAddress().getOffset(), _.getStackOffset()) for _ in ref_mgr.getReferencesTo(refined) if 346 | # _.getFromAddress() < source_addr] 347 | # here we are going to create an ordered list with all the references to the given variable 348 | # that happen before the call and return only the last one that hopefully is the one 349 | # setting the value 350 | # Bad enough this is a struct so the variable points to the start address of the struct 351 | # if you want a specific field you have to add its offset 352 | offset_field = refined.getStackOffset() + refined.getDataType().getComponent(4).getOffset() 353 | # print "offset_field", offset_field 354 | refs = sorted([(_.getFromAddress().getOffset(), _) 355 | for _ in ref_mgr.getReferencesTo(refined) 356 | if _.getFromAddress() < source_addr 357 | and _.getStackOffset() == offset_field], 358 | key = lambda _ : _[0])[-1] 359 | 360 | instr = getInstructionAt(refs[1].getFromAddress()) 361 | #print "op before", refs, refs[1] 362 | #print "instr:", instr, instr.getPcode(), instr.getDefaultOperandRepresentation(0) 363 | annotation = codeUnitFormat.getRepresentationString(instr) 364 | # print "annotation", annotation 365 | from_annotation = getSymbolFromAnnotation(annotation) 366 | logger.debug("symbol from annotations", from_annotation) 367 | 368 | rX = instr.getRegister(0) 369 | 370 | # print "instr+reg", rX, instr.getInstructionContext().getRegisterValue(rX) 371 | 372 | pcode = instr.getPcode()[1] 373 | 374 | # print "pcode:", pcode, pcode.getSeqnum().getTarget() 375 | 376 | if pcode.getOpcode() != PcodeOp.STORE: 377 | raise ValueError("I was expecting a STORE operation here") 378 | 379 | value = pcode.getInput(1) 380 | 381 | # print "value", value, value.getAddress(), value.getDef(), value.getDescendants() 382 | 383 | #c_line = getCLine(markup, pcode.getSeqnum().getTarget()) 384 | #print "C code", c_line 385 | 386 | output = getDataAt(from_annotation.getAddress()) if from_annotation else None 387 | 388 | calling_args[pos] = output 389 | 390 | continue # we exit since our job is finished 391 | 392 | if arg.isConstant(): 393 | logger.debug(" found constant") 394 | calling_args[pos] = arg.getOffset() 395 | continue 396 | 397 | if arg.getDef() is None: 398 | logger.warning("this arg is strange (.def() is None)") 399 | calling_args[pos] = None 400 | continue 401 | 402 | while arg.getDef().getOpcode() == PcodeOp.CAST: 403 | logger.debug(" CAST is on the way {}".format(arg)) 404 | arg = arg.getDef().getInput(0) 405 | 406 | # OK, this is a little weird, but PTRSUBs with first arg == 0 407 | # are (usually) global variables at address == second arg 408 | if arg.getDef().getOpcode() == PcodeOp.PTRSUB: 409 | logger.debug(" found PTRSUB") 410 | arg = arg.getDef().getInput(1) 411 | elif arg.getDef().getOpcode() == PcodeOp.COPY: 412 | logger.debug(" found COPY") 413 | arg = arg.getDef().getInput(0) 414 | else: 415 | raise ValueError("I was not expection that: {} -> {}".format(arg, arg.getOpcode())) 416 | 417 | logger.debug("arg%d: %08x" % (pos, arg.getAddress().getOffset())) 418 | 419 | calling_args[pos] = arg.getAddress().getOffset() 420 | 421 | calling_args.insert(0, source_addr) 422 | 423 | logger.info(calling_args) 424 | return calling_args 425 | 426 | 427 | def create_simple_table(addresses): 428 | """ 429 | The table code is inspired from here 430 | """ 431 | 432 | class ArgumentsExecutor(TableChooserExecutor): 433 | def execute(self, rowObject): 434 | return True 435 | 436 | def getButtonName(self): 437 | return "I'm late!" 438 | 439 | class Argument(AddressableRowObject): 440 | def __init__(self, row): 441 | # using "address" raises "AttributeError: read-only attr: address" 442 | self.row = row 443 | 444 | def getAddress(self): 445 | return self.row 446 | 447 | tableDialog = createTableChooserDialog("list of addresses", ArgumentsExecutor(), False) 448 | 449 | for address in addresses: 450 | tableDialog.add(Argument(address)) 451 | 452 | tableDialog.show() -------------------------------------------------------------------------------- /create_qstring.py: -------------------------------------------------------------------------------- 1 | # Create QString at the cursor 2 | #@author Gianluca Pacchiella 3 | #@category QT 4 | #@keybinding SHIFT-S 5 | #@menupath 6 | #@toolbar 7 | import QString 8 | def main(address): 9 | 10 | string = QString(address) 11 | # check if just after the QArrayData there is another one 12 | address_next = address.add(string.dataType.getLength()) 13 | value = getInt(address_next) 14 | 15 | if value != -1: 16 | # or move the cursor at the end of the string 17 | address_next = string.end_aligned 18 | 19 | goTo(address_next) 20 | 21 | 22 | main(currentAddress) -------------------------------------------------------------------------------- /vtable.py: -------------------------------------------------------------------------------- 1 | # The idea here is to build a data type to use as a "container" for 2 | # the virtual table of a given class. Select the region with the 3 | # pointers to the virtual functions, having as a constraint that a 4 | # label of the form "::vtable" is at the start of it. 5 | #@author Gianluca Pacchiella 6 | #@category QT 7 | #@keybinding SHIFT-V 8 | #@menupath 9 | #@toolbar 10 | import logging 11 | 12 | from ghidra.program.model.data import ( 13 | StructureDataType, 14 | IntegerDataType, 15 | DataTypeConflictHandler, 16 | PointerDataType, 17 | FunctionDefinitionDataType, 18 | ) 19 | 20 | logging.basicConfig() 21 | logger = logging.getLogger(__name__) 22 | logger.setLevel("INFO") 23 | 24 | 25 | def build_structure(class_name, startAddress, count): 26 | path = "{}_vtable_t".format(class_name) 27 | logger.info("building struct named {}".format(path)) 28 | structure = StructureDataType(path, 0) 29 | 30 | for index in range(count): 31 | logger.debug(" index: {}".format(index)) 32 | address = startAddress.add(index * 4) 33 | addr_func = toAddr(getDataAt(address).getInt(0)) 34 | function = getFunctionAt(addr_func) 35 | 36 | if function is None: 37 | logger.info("no function at {}, creating right now!".format(address)) 38 | function = createFunction(addr_func, None) # use default name 39 | 40 | function_name = function.getName() 41 | 42 | # if it's a function with an already defined Namespace don't change that 43 | if function.getParentNamespace().isGlobal(): 44 | # set the right Namespace and the __thiscall convention 45 | namespace = getNamespace(None, class_name) 46 | function.setParentNamespace(namespace) 47 | 48 | function.setCallingConvention('__thiscall') 49 | funcDefinition = FunctionDefinitionDataType(function, False) 50 | 51 | logger.debug(" with signature: {}".format(funcDefinition)) 52 | 53 | ptr_func_definition_data_type = PointerDataType(funcDefinition) 54 | 55 | # we are going to save definition and all 56 | # but probably we should clean the old definitions 57 | # of data types? 58 | data_type_manager = currentProgram.getDataTypeManager() 59 | logger.debug("Replacing {}".format(funcDefinition)) 60 | # we replace all the things since they are generated automagically anyway 61 | data_type_manager.addDataType(funcDefinition, DataTypeConflictHandler.REPLACE_HANDLER) 62 | data_type_manager.addDataType(ptr_func_definition_data_type, DataTypeConflictHandler.REPLACE_HANDLER) 63 | 64 | structure.insertAtOffset( # FIXME: in general 4 is not the right size 65 | index * 4, 66 | ptr_func_definition_data_type, 67 | 4, 68 | function_name, 69 | "", 70 | ) 71 | 72 | return structure 73 | 74 | 75 | def set_vtable_datatype(class_name, structure): 76 | path = "/{}".format(class_name) 77 | class_type = currentProgram.getDataTypeManager().getDataType(path) 78 | 79 | if class_type is None or class_type.isZeroLength(): 80 | raise ValueError("You must define the class '{}' with '_vtable' before".format(class_name)) 81 | 82 | field = class_type.getComponent(0) 83 | field_name = field.getFieldName() 84 | 85 | if field_name != "_vtable": 86 | raise ValueError("I was expecting the first field to be named '_vtable'") 87 | 88 | logger.info("set vtable as a pointer to {}".format(structure.getName())) 89 | field.setDataType(PointerDataType(structure)) 90 | 91 | 92 | def main(): 93 | startAddress = currentSelection.getFirstRange().getMinAddress() 94 | count = currentSelection.getFirstRange().getLength() / 4 95 | 96 | sym = getSymbolAt(startAddress) 97 | 98 | if sym is None or sym.getName() != "vtable" or sym.isGlobal(): 99 | raise ValueError( 100 | "I was expecting a label here indicating the class Namespace, something like 'ClassName::vtable'") 101 | 102 | # FIXME: nested namespaces are not handled correctly 103 | class_name = sym.getParentNamespace().getName() 104 | if "::" in class_name: 105 | raise ValueError("Probably you want to handle manually this one: namespace '{}'".format(class_name)) 106 | 107 | structure = build_structure(class_name, startAddress, count) 108 | 109 | data_type_manager = currentProgram.getDataTypeManager() 110 | logger.info("Replacing {}".format(structure.getName())) 111 | data_type_manager.addDataType(structure, DataTypeConflictHandler.REPLACE_HANDLER) 112 | 113 | set_vtable_datatype(class_name, structure) 114 | 115 | main() 116 | --------------------------------------------------------------------------------