├── .gitignore ├── itanium_demangler ├── MANIFEST.in ├── .gitignore ├── setup.py ├── LICENSE-0BSD.txt ├── README.md ├── tests │ └── test.py └── itanium_demangler │ └── __init__.py ├── doc ├── vtable-after.png ├── vtable-before.png ├── signature-after.png └── signature-before.png ├── LICENSE-0BSD.txt ├── plugin.json ├── README.md └── __init__.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | -------------------------------------------------------------------------------- /itanium_demangler/MANIFEST.in: -------------------------------------------------------------------------------- 1 | include LICENSE-0BSD.txt 2 | -------------------------------------------------------------------------------- /doc/vtable-after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whitequark/binja_itanium_cxx_abi/HEAD/doc/vtable-after.png -------------------------------------------------------------------------------- /doc/vtable-before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whitequark/binja_itanium_cxx_abi/HEAD/doc/vtable-before.png -------------------------------------------------------------------------------- /doc/signature-after.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whitequark/binja_itanium_cxx_abi/HEAD/doc/signature-after.png -------------------------------------------------------------------------------- /doc/signature-before.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/whitequark/binja_itanium_cxx_abi/HEAD/doc/signature-before.png -------------------------------------------------------------------------------- /itanium_demangler/.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | __pycache__/ 3 | _build/ 4 | *.egg-info/ 5 | /.eggs/ 6 | /build/ 7 | /dist/ 8 | -------------------------------------------------------------------------------- /LICENSE-0BSD.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2018 whitequark@whitequark.org 2 | 3 | Permission to use, copy, modify, and/or distribute this software for 4 | any purpose with or without fee is hereby granted. 5 | 6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 7 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 8 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 9 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 10 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 11 | AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 12 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 13 | -------------------------------------------------------------------------------- /itanium_demangler/setup.py: -------------------------------------------------------------------------------- 1 | import setuptools 2 | 3 | 4 | setuptools.setup( 5 | name="itanium_demangler", 6 | version="1.0", 7 | author="whitequark", 8 | author_email="whitequark@whitequark.org", 9 | description="Pure Python parser for mangled itanium symbols", 10 | long_description=open("README.md").read(), 11 | long_description_content_type="text/markdown", 12 | license="BSD", 13 | url="https://github.com/whitequark/python-itanium_demangler", 14 | packages=setuptools.find_packages(), 15 | classifiers=[ 16 | "Programming Language :: Python :: 3", 17 | "Programming Language :: Python :: 2", 18 | "Operating System :: OS Independent", 19 | ], 20 | ) 21 | -------------------------------------------------------------------------------- /itanium_demangler/LICENSE-0BSD.txt: -------------------------------------------------------------------------------- 1 | Copyright (C) 2018 whitequark@whitequark.org 2 | 3 | Permission to use, copy, modify, and/or distribute this software for 4 | any purpose with or without fee is hereby granted. 5 | 6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 7 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 8 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 9 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 10 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN 11 | AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT 12 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 13 | -------------------------------------------------------------------------------- /plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "pluginmetadataversion" : 2, 3 | "name": "Itanium C++ ABI", 4 | "type": ["analysis"], 5 | "api": ["python2", "python3"], 6 | "description": "A plugin providing an analysis for Itanium C++ ABI.", 7 | "longdescription": "This plugin provides a custom demangler, an analysis that decodes mangled names and updates function signatures, and an analysis that decodes RTTI and vtables and discovers new procedures based on virtual function pointers.", 8 | "version": "1.0", 9 | "author": "whitequark", 10 | "minimumbinaryninjaversion": 555, 11 | "platforms": ["Darwin", "Linux", "Windows"], 12 | "license": { 13 | "name": "BSD-0-clause", 14 | "text": "Copyright (C) 2018 by whitequark\n\nPermission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE." 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Binary Ninja Itanium C++ ABI Plugin 2 | 3 | The Itanium C++ ABI plugin provides a custom demangler, an analysis that decodes mangled names and updates function signatures, and an analysis that decodes RTTI and vtables and discovers new procedures based on virtual function pointers. 4 | 5 | ## Custom demangler 6 | 7 | The custom demangler converts the mangled names into abstract syntax trees, allowing to extract more type information than the built-in one. For example, it differentiates between complete and base class constructors and destructors. 8 | 9 | ## Function signature decoding 10 | 11 | Before / after: 12 | 13 | 14 | 15 | Note that the decoding is necessarily heuristic as some information is lost, e.g. class members and standalone functions in a namespace have an exactly identical mangling. It is possible that the `this` argument (or worse) would be missing--consult the full decoded name of the function (`current_function.symbol.full_name`) to see the result of decoding, and apply your expertise. 16 | 17 | ## RTTI and vtable decoding 18 | 19 | Before / after: 20 | 21 | 22 | 23 | ## License 24 | 25 | [0-clause BSD](LICENSE-0BSD.txt) 26 | -------------------------------------------------------------------------------- /itanium_demangler/README.md: -------------------------------------------------------------------------------- 1 | # Itanium Demangler 2 | 3 | The *Python Itanium Demangler* is a pure Python parser for the [Itanium C++ ABI symbol mangling language][manglang]. Note that MSVC mangling language is not supported. 4 | 5 | This demangler generates an abstract syntax tree from mangled symbols, which can be used for directly extracting type information, as opposed to having to interpret the C++ source code corresponding to the demangled symbol 6 | 7 | There is also a built-in AST stringifier, so the demangler can be used as a replacement for `c++filt` or for formatting backtraces. 8 | 9 | [manglang]: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling 10 | 11 | ## Requirements 12 | 13 | The demangler runs on Python 2.7 and 3.3+ and has no dependencies. 14 | 15 | ## Installation 16 | 17 | Installing via PyPI: 18 | 19 | pip install itanium_demangler 20 | 21 | Using a local repository for development: 22 | 23 | git clone https://github.com/whitequark/python-itanium_demangler 24 | cd python-itanium_demangler 25 | python setup.py develop --user 26 | 27 | ## Usage 28 | 29 | ```python 30 | from itanium_demangler import parse as demangle 31 | 32 | ast = demangle("_ZN5boost6chrono24process_system_cpu_clock3nowEv") 33 | 34 | print(repr(ast)) 35 | # , , , )> (,) None> 36 | 37 | print(ast) 38 | # boost::chrono::process_system_cpu_clock::now() 39 | ``` 40 | 41 | ## Future considerations 42 | 43 | A similar (i.e. also parsing to an AST) implementation of a demangler for the MSVC mangling language would be useful to have. 44 | 45 | ## License 46 | 47 | [0-clause BSD](LICENSE-0BSD.txt) 48 | -------------------------------------------------------------------------------- /itanium_demangler/tests/test.py: -------------------------------------------------------------------------------- 1 | import unittest 2 | 3 | from itanium_demangler import parse, _operators, _builtin_types 4 | 5 | 6 | class TestDemangler(unittest.TestCase): 7 | def assertParses(self, mangled, ast): 8 | result = parse(mangled) 9 | self.assertEqual(result, ast) 10 | 11 | def assertDemangles(self, mangled, demangled): 12 | result = parse(mangled) 13 | if result is not None: 14 | result = str(result) 15 | self.assertEqual(result, demangled) 16 | 17 | def test_name(self): 18 | self.assertDemangles('_Z3foo', 'foo') 19 | self.assertDemangles('_Z3x', None) 20 | 21 | def test_ctor_dtor(self): 22 | self.assertDemangles('_ZN3fooC1E', 'foo::{ctor}') 23 | self.assertDemangles('_ZN3fooC2E', 'foo::{base ctor}') 24 | self.assertDemangles('_ZN3fooC3E', 'foo::{allocating ctor}') 25 | self.assertDemangles('_ZN3fooD0E', 'foo::{deleting dtor}') 26 | self.assertDemangles('_ZN3fooD1E', 'foo::{dtor}') 27 | self.assertDemangles('_ZN3fooD2E', 'foo::{base dtor}') 28 | self.assertDemangles('_ZN3fooC1IcEEc', 'foo::{ctor}(char)') 29 | self.assertDemangles('_ZN3fooD1IcEEc', 'foo::{dtor}(char)') 30 | 31 | def test_operator(self): 32 | for op in _operators: 33 | if _operators[op] in ['new', 'new[]', 'delete', 'delete[]']: 34 | continue 35 | self.assertDemangles('_Z' + op, 'operator' + _operators[op]) 36 | self.assertDemangles('_Znw', 'operator new') 37 | self.assertDemangles('_Zna', 'operator new[]') 38 | self.assertDemangles('_Zdl', 'operator delete') 39 | self.assertDemangles('_Zda', 'operator delete[]') 40 | self.assertDemangles('_Zcvi', 'operator int') 41 | 42 | def test_std_substs(self): 43 | self.assertDemangles('_ZSt3foo', 'std::foo') 44 | self.assertDemangles('_ZStN3fooE', 'std::foo') 45 | self.assertDemangles('_ZSs', 'std::string') 46 | self.assertParses('_ZSt', None) 47 | self.assertDemangles('_Z3fooISt6vectorE', 'foo') 48 | self.assertDemangles('_ZSaIhE', 'std::allocator') 49 | 50 | def test_nested_name(self): 51 | self.assertDemangles('_ZN3fooE', 'foo') 52 | self.assertDemangles('_ZN3foo5bargeE', 'foo::barge') 53 | self.assertDemangles('_ZN3fooIcE5bargeE', 'foo::barge') 54 | self.assertDemangles('_ZNK3fooE', 'foo const') 55 | self.assertDemangles('_ZNV3fooE', 'foo volatile') 56 | self.assertDemangles('_ZNKR3fooE', 'foo const&') 57 | self.assertDemangles('_ZNKO3fooE', 'foo const&&') 58 | self.assertParses('_ZNKO3foo', None) 59 | 60 | def test_template_args(self): 61 | self.assertDemangles('_Z3fooIcE', 'foo') 62 | self.assertDemangles('_ZN3fooIcEE', 'foo') 63 | self.assertParses('_Z3fooI', None) 64 | 65 | def test_builtin_types(self): 66 | for ty in _builtin_types: 67 | self.assertDemangles('_Z1fI' + ty + 'E', 'f<' + str(_builtin_types[ty]) + '>') 68 | 69 | def test_qualified_type(self): 70 | self.assertDemangles('_Z1fIriE', 'f') 71 | self.assertDemangles('_Z1fIKiE', 'f') 72 | self.assertDemangles('_Z1fIViE', 'f') 73 | self.assertDemangles('_Z1fIVVViE', 'f') 74 | 75 | def test_function_type(self): 76 | self.assertDemangles('_Z1fv', 'f()') 77 | self.assertDemangles('_Z1fi', 'f(int)') 78 | self.assertDemangles('_Z1fic', 'f(int, char)') 79 | self.assertDemangles('_ZN1fEic', 'f(int, char)') 80 | self.assertDemangles('_ZN1fIEEic', 'int f<>(char)') 81 | self.assertDemangles('_ZN1fIEC1Eic', 'f<>::{ctor}(int, char)') 82 | 83 | def test_indirect_type(self): 84 | self.assertDemangles('_Z1fIPiE', 'f') 85 | self.assertDemangles('_Z1fIPPiE', 'f') 86 | self.assertDemangles('_Z1fIRiE', 'f') 87 | self.assertDemangles('_Z1fIOiE', 'f') 88 | self.assertDemangles('_Z1fIKRiE', 'f') 89 | self.assertDemangles('_Z1fIRKiE', 'f') 90 | 91 | def test_literal(self): 92 | self.assertDemangles('_Z1fILi1EE', 'f<(int)1>') 93 | self.assertDemangles('_Z1fIL_Z1gEE', 'f') 94 | 95 | def test_argpack(self): 96 | self.assertDemangles('_Z1fILb0EJciEE', 'f<(bool)0, char, int>') 97 | self.assertDemangles('_Z1fILb0EIciEE', 'f<(bool)0, char, int>') 98 | self.assertDemangles('_Z1fIJciEEvDpOT_', 'void f(char, int)') 99 | self.assertDemangles('_Z1fIIciEEvDpOT_', 'void f(char, int)') 100 | 101 | def test_special(self): 102 | self.assertDemangles('_ZTV1f', 'vtable for f') 103 | self.assertDemangles('_ZTT1f', 'vtt for f') 104 | self.assertDemangles('_ZTI1f', 'typeinfo for f') 105 | self.assertDemangles('_ZTS1f', 'typeinfo name for f') 106 | self.assertDemangles('_ZThn16_1fv', 'non-virtual thunk for f()') 107 | self.assertDemangles('_ZTv16_8_1fv', 'virtual thunk for f()') 108 | self.assertDemangles('_ZGV1f', 'guard variable for f') 109 | self.assertDemangles('_ZGTt1fv', 'transaction clone for f()') 110 | 111 | def test_template_param(self): 112 | self.assertDemangles('_ZN1fIciEEvT_PT0_', 'void f(char, int*)') 113 | self.assertParses('_ZN1fIciEEvT_PT0', None) 114 | 115 | def test_substitution(self): 116 | self.assertDemangles('_Z3fooIEvS_', 'void foo<>(foo)') 117 | self.assertDemangles('_ZN3foo3barIES_E', 'foo::bar<>::foo') 118 | self.assertDemangles('_ZN3foo3barIES0_E', 'foo::bar<>::foo::bar') 119 | self.assertDemangles('_ZN3foo3barIES1_E', 'foo::bar<>::foo::bar<>') 120 | self.assertParses('_ZN3foo3barIES_ES2_', None) 121 | self.assertDemangles('_Z3fooIS_E', 'foo') 122 | self.assertDemangles('_ZSt3fooIS_E', 'std::foo') 123 | self.assertDemangles('_Z3fooIPiEvS0_', 'void foo(int*)') 124 | self.assertDemangles('_Z3fooISaIcEEvS0_', 125 | 'void foo>(std::allocator)') 126 | self.assertDemangles('_Z3fooI3barS0_E', 'foo') 127 | self.assertDemangles('_ZN2n11fEPNS_1bEPNS_2n21cEPNS2_2n31dE', 128 | 'n1::f(n1::b*, n1::n2::c*, n1::n2::n3::d*)') 129 | self.assertDemangles('_ZN1f1gES_IFvvEE', 'f::g(f)') 130 | self.assertDemangles('_ZplIcET_S0_', 'char operator+(char)') 131 | self.assertParses('_ZplIcET_S1_', None) 132 | # Operator template results don't get added to substitutions 133 | self.assertParses('_ZStplIcEvS0_', None) 134 | 135 | def test_abi_tag(self): 136 | self.assertDemangles('_Z3fooB5cxx11v', 'foo[abi:cxx11]()') 137 | 138 | def test_const(self): 139 | self.assertDemangles('_ZL3foo', 'foo') 140 | 141 | def test_operator_template(self): 142 | self.assertDemangles('_ZmiIiE', 'operator-') 143 | self.assertDemangles('_ZmiIiEvv', 'void operator-()') 144 | self.assertDemangles('_ZmiIiEvKT_RT_', 'void operator-(int const, int&)') 145 | self.assertDemangles('_ZcviIiE', 'operator int') 146 | self.assertDemangles('_ZcviIiEv', 'operator int()') 147 | self.assertDemangles('_ZcviIiET_T_', 'operator int(int, int)') 148 | 149 | def test_array(self): 150 | self.assertDemangles('_Z1fA1_c', 'f(char[(int)1])') 151 | self.assertDemangles('_Z1fRA1_c', 'f(char(&)[(int)1])') 152 | self.assertDemangles('_Z1fIA1_cS0_E', 'f') 153 | self.assertParses('_Z1fA1c', None) 154 | 155 | def test_function(self): 156 | self.assertDemangles('_Z1fFvvE', 'f(void ())') 157 | self.assertDemangles('_Z1fPFvvE', 'f(void (*)())') 158 | self.assertDemangles('_Z1fPPFvvE', 'f(void (**)())') 159 | self.assertDemangles('_Z1fRPFvvE', 'f(void (*&)())') 160 | self.assertDemangles('_Z1fKFvvE', 'f(void () const)') 161 | 162 | def test_member_data(self): 163 | self.assertDemangles('_Z1fM3fooi', 'f(int foo::*)') 164 | self.assertDemangles('_Z1fMN3foo3barEi', 'f(int foo::bar::*)') 165 | self.assertDemangles('_Z1fM3fooN3bar1XE', 'f(bar::X foo::*)') 166 | self.assertDemangles('_Z1fM3fooIcE3bar', 'f(bar foo::*)') 167 | self.assertDemangles('_Z1fM3foo3barIlE', 'f(bar foo::*)') 168 | self.assertDemangles('_Z3fooPM2ABi', 'foo(int AB::**)') 169 | 170 | def test_member_function(self): 171 | self.assertDemangles('_Z1fM3fooFvvE', 'f(void (foo::*)())') 172 | self.assertDemangles('_Z1fMN3foo3barEFvvE', 'f(void (foo::bar::*)())') 173 | self.assertDemangles('_Z3fooRM3barFviE', 'foo(void (bar::*&)(int))') 174 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | import re 2 | from binaryninja import log 3 | from binaryninja.plugin import PluginCommand, BackgroundTaskThread 4 | from binaryninja.binaryview import BinaryReader 5 | from binaryninja.types import Symbol, Type, NamedTypeReferenceBuilder 6 | # Structure has been deprecated in favor of the StructureBuilder API. 7 | try: 8 | from binaryninja.types import StructureBuilder 9 | except ImportError: 10 | from binaryninja.types import Structure 11 | from binaryninja.enums import SymbolType, ReferenceType 12 | 13 | import sys 14 | import os.path 15 | # Prepend so if the itanium-demangler package is installed elsewhere it doesn't 16 | # interfere 17 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "itanium_demangler")) 18 | from itanium_demangler import Node, parse as parse_mangled, is_ctor_or_dtor 19 | 20 | 21 | def analyze_cxx_abi(view, start=None, length=None, task=None): 22 | platform = view.platform 23 | arch = platform.arch 24 | 25 | void_p_ty = Type.pointer(arch, Type.void()) 26 | char_p_ty = Type.pointer(arch, Type.int(1)) 27 | unsigned_int_ty = Type.int(arch.default_int_size, False) 28 | signed_int_ty = Type.int(arch.default_int_size, True) 29 | 30 | base_type_info_ty = Type.named_type(NamedTypeReferenceBuilder.create( 31 | name='std::type_info')) 32 | base_type_info_ptr_ty = Type.pointer(arch, base_type_info_ty) 33 | 34 | def char_array_ty(length): 35 | return Type.array(Type.int(1), strings[0].length) 36 | 37 | def type_info_ty(kind=None): 38 | try: 39 | type_info_struct = StructureBuilder.create() 40 | except NameError: 41 | type_info_struct = Structure() 42 | type_info_struct.append(void_p_ty, 'vtable') 43 | type_info_struct.append(char_p_ty, 'name') 44 | if kind == 'si_class': 45 | type_info_struct.append(base_type_info_ptr_ty, 'base_type') 46 | return Type.structure_type(type_info_struct) 47 | 48 | def vtable_ty(vfunc_count): 49 | try: 50 | vtable_struct = StructureBuilder.create() 51 | except NameError: 52 | vtable_struct = Structure() 53 | vtable_struct.append(signed_int_ty, 'top_offset') 54 | vtable_struct.append(base_type_info_ptr_ty, 'typeinfo') 55 | vtable_struct.append(Type.array(void_p_ty, vfunc_count), 'functions') 56 | return Type.structure_type(vtable_struct) 57 | 58 | if platform.name.startswith("windows-"): 59 | long_size = arch.default_int_size 60 | else: 61 | long_size = arch.address_size 62 | 63 | if arch.name.startswith('x86'): 64 | char_signed = True 65 | else: 66 | char_signed = False # not always true 67 | 68 | short_size = 2 # not always true 69 | long_long_size = 8 # not always true 70 | 71 | ty_for_cxx_builtin = { 72 | 'void': Type.void(), 73 | 'wchar_t': Type.int(2, sign=char_signed, alternate_name='wchar_t'), 74 | 'bool': Type.bool(), 75 | 'char': Type.int(1, sign=char_signed), 76 | 'signed char': Type.int(1, sign=True), 77 | 'unsigned char': Type.int(1, sign=False), 78 | 'short': Type.int(short_size, sign=True), 79 | 'unsigned short': Type.int(short_size, sign=False), 80 | 'int': Type.int(arch.default_int_size, sign=True), 81 | 'unsigned int': Type.int(arch.default_int_size, sign=False), 82 | 'long': Type.int(long_size, sign=True), 83 | 'unsigned long': Type.int(long_size, sign=False), 84 | 'long long': Type.int(long_long_size, sign=True), 85 | 'unsigned long long': Type.int(long_long_size, sign=False), 86 | '__int128': Type.int(16, sign=True), 87 | 'unsigned __int128': Type.int(16, sign=False), 88 | 'float': Type.float(4), 89 | 'double': Type.float(8), 90 | '__float80': Type.float(10), 91 | '__float128': Type.float(16), 92 | 'char32_t': Type.int(4, sign=char_signed, alternate_name='char32_t'), 93 | 'char16_t': Type.int(2, sign=char_signed, alternate_name='char16_t'), 94 | } 95 | 96 | def ty_from_demangler_node(node, cv_qual=frozenset(), arg_count_hint=None): 97 | if node.kind == 'builtin': 98 | if node.value in ty_for_cxx_builtin: 99 | return ty_for_cxx_builtin[node.value] 100 | else: 101 | return None 102 | elif node.kind in ['name', 'qual_name']: 103 | named_ty_ref = NamedTypeReferenceBuilder.create(name=str(node)) 104 | return Type.named_type(named_ty_ref) 105 | elif node.kind in ['pointer', 'lvalue', 'rvalue']: 106 | pointee_ty = ty_from_demangler_node(node.value) 107 | if pointee_ty is None: 108 | return None 109 | is_const = ('const' in cv_qual) 110 | is_volatile = ('volatile' in cv_qual) 111 | if node.kind == 'pointer': 112 | return Type.pointer(arch, pointee_ty, is_const, is_volatile) 113 | elif node.kind == 'lvalue': 114 | return Type.pointer(arch, pointee_ty, is_const, is_volatile, 115 | ref_type=ReferenceType.ReferenceReferenceType) 116 | elif node.kind == 'rvalue': 117 | return Type.pointer(arch, pointee_ty, is_const, is_volatile, 118 | ref_type=ReferenceType.RValueReferenceType) 119 | elif node.kind == 'cv_qual': 120 | return ty_from_demangler_node(node.value, cv_qual=node.qual) 121 | elif node.kind == 'func': 122 | is_ctor_dtor = False 123 | if node.name and node.name.kind == 'qual_name': 124 | qual_name = node.name.value 125 | if qual_name[-1].kind in ['ctor', 'dtor']: 126 | is_ctor_dtor = True 127 | 128 | if is_ctor_dtor: 129 | ret_ty = Type.void() 130 | elif node.ret_ty is not None: 131 | ret_ty = ty_from_demangler_node(node.ret_ty) 132 | if ret_ty is None: 133 | return None 134 | else: 135 | ret_ty = Type.int(arch.default_int_size).with_confidence(0) 136 | 137 | arg_nodes = list(node.arg_tys) 138 | arg_tys = [] 139 | 140 | var_arg = False 141 | if arg_nodes[-1].kind == 'builtin' and arg_nodes[-1].value == '...': 142 | arg_nodes.pop() 143 | var_arg = True 144 | elif arg_nodes[0].kind == 'builtin' and arg_nodes[0].value == 'void': 145 | arg_nodes = arg_nodes[1:] 146 | 147 | this_arg = False 148 | if node.name and node.name.kind == 'qual_name': 149 | qual_name = node.name.value 150 | if is_ctor_dtor or (arg_count_hint is not None and 151 | len(arg_nodes) == arg_count_hint - 1): 152 | this_arg = True 153 | this_node = Node('qual_name', qual_name[:-1]) 154 | this_ty = ty_from_demangler_node(this_node) 155 | if this_ty is None: 156 | return None 157 | arg_tys.append(Type.pointer(arch, this_ty)) 158 | if is_ctor_dtor: 159 | name = '::'.join(str(n) for n in qual_name[:-1]) 160 | if not name.startswith('std') and not view.get_type_by_name(name): 161 | log.log_info(f'Registering new type {name}') 162 | void_p_ty = Type.pointer(arch, Type.void()) 163 | with StructureBuilder.builder(view, name) as s: 164 | s.append(Type.pointer(arch, void_p_ty), 'vtable') 165 | 166 | for arg_node in arg_nodes: 167 | arg_ty = ty_from_demangler_node(arg_node) 168 | if arg_ty is None: 169 | return None 170 | arg_tys.append(arg_ty) 171 | 172 | ty = Type.function(ret_ty, arg_tys, variable_arguments=var_arg) 173 | if arg_count_hint is not None: 174 | # toplevel invocation, so return whether we inferred a this argument 175 | return this_arg, ty, is_ctor_dtor 176 | else: 177 | return ty 178 | else: 179 | log.log_warn("Cannot convert demangled AST {} to a type" 180 | .format(repr(node))) 181 | 182 | reader = BinaryReader(view) 183 | def read(size): 184 | if size == 4: 185 | return reader.read32() 186 | elif size == 8: 187 | return reader.read64() 188 | else: 189 | assert False 190 | 191 | symbols = view.get_symbols(start, length) 192 | if task: 193 | task.set_total(len(symbols)) 194 | 195 | mangled_re = re.compile('_?_Z') 196 | 197 | demangler_failures = 0 198 | for symbol in symbols: 199 | if task and not task.advance(): 200 | break 201 | 202 | if not mangled_re.match(symbol.raw_name): 203 | continue 204 | 205 | is_data = (symbol.type == SymbolType.DataSymbol) 206 | is_code = (symbol.type in [SymbolType.FunctionSymbol, 207 | SymbolType.ImportedFunctionSymbol]) 208 | 209 | raw_name, suffix = symbol.raw_name, '' 210 | if '@' in raw_name: 211 | match = re.match(r'^(.+?)(@.+)$', raw_name) 212 | raw_name, suffix = match.group(1), match.group(2) 213 | 214 | try: 215 | name_ast = parse_mangled(raw_name) 216 | if name_ast is None: 217 | log.log_warn("Demangler failed to recognize {}".format(raw_name)) 218 | demangler_failures += 1 219 | except NotImplementedError as e: 220 | log.log_warn("Demangler feature missing on {}: {}".format(raw_name, str(e))) 221 | demangler_failures += 1 222 | 223 | if name_ast: 224 | if name_ast.kind == 'func': 225 | short_name = str(name_ast.name) 226 | else: 227 | short_name = str(name_ast) 228 | symbol = Symbol(symbol.type, symbol.address, 229 | short_name=short_name + suffix, 230 | full_name=str(name_ast) + suffix, 231 | raw_name=symbol.raw_name) 232 | else: 233 | symbol = Symbol(symbol.type, symbol.address, 234 | short_name=symbol.raw_name, full_name=None, raw_name=symbol.raw_name) 235 | view.define_auto_symbol(symbol) 236 | 237 | if name_ast is None: 238 | continue 239 | 240 | elif is_data and name_ast.kind == 'typeinfo_name': 241 | strings = view.get_strings(symbol.address, 1) 242 | if not strings: 243 | continue 244 | 245 | view.define_data_var(symbol.address, char_array_ty(length)) 246 | 247 | elif is_data and name_ast.kind == 'typeinfo': 248 | reader.offset = symbol.address + arch.address_size * 2 249 | 250 | kind = None 251 | 252 | # heuristic: is this is an abi::__si_class_type_info? 253 | base_or_flags = read(arch.default_int_size) 254 | base_symbol = view.get_symbol_at(base_or_flags) 255 | if base_symbol and base_symbol.raw_name.startswith('_ZTI'): 256 | kind = 'si_class' 257 | 258 | view.define_data_var(symbol.address, type_info_ty(kind)) 259 | 260 | elif is_data and name_ast.kind == 'vtable': 261 | vtable_addr = symbol.address 262 | 263 | reader.offset = vtable_addr + arch.address_size * 2 264 | while True: 265 | vfunc_count = 0 266 | check_next = True 267 | while True: 268 | vfunc_ptr_symbol = view.get_symbol_at(reader.offset) 269 | if vfunc_ptr_symbol and vfunc_ptr_symbol.raw_name.startswith('_Z'): 270 | # any C++ symbol definitely terminates the vtable 271 | check_next = False 272 | break 273 | 274 | # heuristic: existing function 275 | vfunc_addr = read(arch.address_size) 276 | if view.get_function_at(vfunc_addr): 277 | vfunc_count += 1 278 | continue 279 | 280 | # explicitly reject null pointers; in position-independent code 281 | # address zero can belong to the executable segment 282 | if vfunc_addr == 0: 283 | check_next = False 284 | break 285 | 286 | # heuristic: pointer to executable memory 287 | vfunc_segment = view.get_segment_at(vfunc_addr) 288 | if vfunc_addr != 0 and vfunc_segment and vfunc_segment.executable: 289 | view.add_function(vfunc_addr) 290 | vfunc_count += 1 291 | 292 | log.log_info('Discovered function at {:#x} via {}' 293 | .format(vfunc_addr, symbol.full_name or symbol.short_name)) 294 | changed = True 295 | continue 296 | 297 | # we've fell off the end of the vtable 298 | break 299 | 300 | view.define_data_var(vtable_addr, vtable_ty(vfunc_count)) 301 | 302 | if check_next: 303 | # heuristic: can another vtable follow this one? let's see if it has typeinfo, 304 | # since that should be always true for when we have a virtual base 305 | typeinfo_ptr = read(arch.address_size) 306 | typeinfo_ptr_symbol = view.get_symbol_at(typeinfo_ptr) 307 | if typeinfo_ptr_symbol and typeinfo_ptr_symbol.raw_name.startswith('_ZTI'): 308 | vtable_addr = reader.offset - 2 * arch.address_size 309 | 310 | # documentat it with a symbol 311 | secondary_symbol_name = '{}_secondary_{:x}'.format(symbol.short_name, 312 | vtable_addr - symbol.address) 313 | secondary_symbol = Symbol(SymbolType.DataSymbol, vtable_addr, 314 | short_name=secondary_symbol_name) 315 | view.define_auto_symbol(secondary_symbol) 316 | continue 317 | 318 | break 319 | 320 | elif is_code and name_ast.kind == 'func': 321 | func = view.get_function_at(symbol.address) 322 | 323 | ftype = getattr(func, 'type', None) 324 | if ftype is None: 325 | ftype = ftype.function_type 326 | 327 | demangled = ty_from_demangler_node(name_ast, arg_count_hint=len(ftype.parameters)) 328 | if demangled is not None: 329 | this_arg, ty, dtor_ctor = demangled 330 | func.apply_auto_discovered_type(ty) 331 | if dtor_ctor and this_arg: 332 | start = func.address_ranges[0].start 333 | callers = list(view.get_callers(start)) 334 | for caller in callers: 335 | try: 336 | il_call = next(ins for ins in view.hlil_instructions if ins.address == caller.address) 337 | except StopIteration: 338 | continue 339 | 340 | try: 341 | # If the calling function is a ctor/dtor, it's 342 | # probably running inherited constructors 343 | # so we shouldn't override the type 344 | ast = parse_mangled(il_call.function.source_function.name) 345 | except NotImplementedError as e: 346 | log.log_warn("Demangler feature missing on {}: {}".format(il_call.function.source_function.name, str(e))) 347 | demangler_failures += 1 348 | 349 | if ast and is_ctor_or_dtor(ast): 350 | continue 351 | if not hasattr(il_call, 'params') or not il_call.params: 352 | continue 353 | this = il_call.params[0] 354 | class_type = func.parameter_vars[0].type 355 | if hasattr(this, 'var'): 356 | this.var.type = class_type 357 | 358 | view.update_analysis() 359 | 360 | if demangler_failures: 361 | log.log_warn('{} demangler failures'.format(demangler_failures)) 362 | 363 | 364 | class CxxAbiAnalysis(BackgroundTaskThread): 365 | _PROGRESS_TEXT = 'Analyzing Itanium C++ ABI' 366 | 367 | def __init__(self, view): 368 | BackgroundTaskThread.__init__(self, 369 | initial_progress_text=self._PROGRESS_TEXT + "...", can_cancel=True) 370 | self._view = view 371 | self._total = 0 372 | self._current = 0 373 | 374 | def set_total(self, total): 375 | self._total = total 376 | 377 | def advance(self): 378 | self._current += 1 379 | self.progress = "{} ({}/{})...".format(self._PROGRESS_TEXT, self._current, self._total) 380 | return not self.cancelled 381 | 382 | def run(self): 383 | try: 384 | state = self._view.begin_undo_actions() 385 | analyze_cxx_abi(self._view, task=self) 386 | self._view.commit_undo_actions(state) 387 | finally: 388 | self.finish() 389 | 390 | 391 | PluginCommand.register( 392 | 'Analyze Itanium C++ ABI...', 393 | 'Infer data types from C++ symbol names conforming to Itanium ABI.', 394 | lambda view: CxxAbiAnalysis(view).start() 395 | ) 396 | -------------------------------------------------------------------------------- /itanium_demangler/itanium_demangler/__init__.py: -------------------------------------------------------------------------------- 1 | # encoding:utf-8 2 | name = "itanium_demangler" 3 | 4 | """ 5 | This module implements a C++ Itanium ABI demangler. 6 | 7 | The demangler provides a single entry point, `demangle`, and returns either `None` 8 | or an abstract syntax tree. All nodes have, at least, a `kind` field. 9 | 10 | Name nodes: 11 | * `name`: `node.value` (`str`) holds an unqualified name 12 | * `ctor`: `node.value` is one of `"complete"`, `"base"`, or `"allocating"`, specifying 13 | the type of constructor 14 | * `dtor`: `node.value` is one of `"deleting"`, `"complete"`, or `"base"`, specifying 15 | the type of destructor 16 | * `oper`: `node.value` (`str`) holds a symbolic operator name, without the keyword 17 | "operator" 18 | * `oper_cast`: `node.value` holds a type node 19 | * `tpl_args`: `node.value` (`tuple`) holds a sequence of type nodes 20 | * `qual_name`: `node.value` (`tuple`) holds a sequence of `name` and `tpl_args` nodes, 21 | possibly ending in a `ctor`, `dtor` or `operator` node 22 | * `abi`: `node.value` holds a name node, `node.qual` (`frozenset`) holds a set of ABI tags 23 | 24 | Type nodes: 25 | * `name` and `qual_name` specify a type by its name 26 | * `builtin`: `node.value` (`str`) specifies a builtin type by its name 27 | * `pointer`, `lvalue` and `rvalue`: `node.value` holds a pointee type node 28 | * `cv_qual`: `node.value` holds a type node, `node.qual` (`frozenset`) is any of 29 | `"const"`, `"volatile"`, or `"restrict"` 30 | * `literal`: `node.value` (`str`) holds the literal representation as-is, 31 | `node.ty` holds a type node specifying the type of the literal 32 | * `function`: `node.name` holds a name node specifying the function name, 33 | `node.ret_ty` holds a type node specifying the return type of a template function, 34 | if any, or `None`, ``node.arg_tys` (`tuple`) holds a sequence of type nodes 35 | specifying thefunction arguments 36 | 37 | Special nodes: 38 | * `vtable`, `vtt`, `typeinfo`, and `typeinfo_name`: `node.value` holds a type node 39 | specifying the type described by this RTTI data structure 40 | * `nonvirt_thunk`, `virt_thunk`: `node.value` holds a function node specifying 41 | the function to which the thunk dispatches 42 | """ 43 | 44 | import re 45 | from collections import namedtuple 46 | 47 | 48 | class _Cursor: 49 | def __init__(self, raw, pos=0): 50 | self._raw = raw 51 | self._pos = pos 52 | self._substs = {} 53 | 54 | def at_end(self): 55 | return self._pos == len(self._raw) 56 | 57 | def accept(self, delim): 58 | if self._raw[self._pos:self._pos + len(delim)] == delim: 59 | self._pos += len(delim) 60 | return True 61 | 62 | def advance(self, amount): 63 | if self._pos + amount > len(self._raw): 64 | return None 65 | result = self._raw[self._pos:self._pos + amount] 66 | self._pos += amount 67 | return result 68 | 69 | def advance_until(self, delim): 70 | new_pos = self._raw.find(delim, self._pos) 71 | if new_pos == -1: 72 | return None 73 | result = self._raw[self._pos:new_pos] 74 | self._pos = new_pos + len(delim) 75 | return result 76 | 77 | def match(self, pattern): 78 | match = pattern.match(self._raw, self._pos) 79 | if match: 80 | self._pos = match.end(0) 81 | return match 82 | 83 | def add_subst(self, node): 84 | # print("S[{}] = {}".format(len(self._substs), str(node))) 85 | if not node in self._substs.values(): 86 | self._substs[len(self._substs)] = node 87 | 88 | def resolve_subst(self, seq_id): 89 | if seq_id in self._substs: 90 | return self._substs[seq_id] 91 | 92 | def __repr__(self): 93 | return "_Cursor({}, {})".format(self._raw[:self._pos] + '→' + self._raw[self._pos:], 94 | self._pos) 95 | 96 | 97 | class Node(namedtuple('Node', 'kind value')): 98 | def __repr__(self): 99 | return "".format(self.kind, repr(self.value)) 100 | 101 | def __str__(self): 102 | if self.kind in ('name', 'builtin'): 103 | return self.value 104 | elif self.kind == 'qual_name': 105 | result = '' 106 | for node in self.value: 107 | if result != '' and node.kind != 'tpl_args': 108 | result += '::' 109 | result += str(node) 110 | return result 111 | elif self.kind == 'tpl_args': 112 | return '<' + ', '.join(map(str, self.value)) + '>' 113 | elif self.kind == 'ctor': 114 | if self.value == 'complete': 115 | return '{ctor}' 116 | elif self.value == 'base': 117 | return '{base ctor}' 118 | elif self.value == 'allocating': 119 | return '{allocating ctor}' 120 | else: 121 | assert False 122 | elif self.kind == 'dtor': 123 | if self.value == 'deleting': 124 | return '{deleting dtor}' 125 | elif self.value == 'complete': 126 | return '{dtor}' 127 | elif self.value == 'base': 128 | return '{base dtor}' 129 | else: 130 | assert False 131 | elif self.kind == 'oper': 132 | if self.value.startswith('new') or self.value.startswith('delete'): 133 | return 'operator ' + self.value 134 | else: 135 | return 'operator' + self.value 136 | elif self.kind == 'oper_cast': 137 | return 'operator ' + str(self.value) 138 | elif self.kind == 'pointer': 139 | return self.value.left() + '*' + self.value.right() 140 | elif self.kind == 'lvalue': 141 | return self.value.left() + '&' + self.value.right() 142 | elif self.kind == 'rvalue': 143 | return self.value.left() + '&&' + self.value.right() 144 | elif self.kind == 'tpl_param': 145 | return '{T' + str(self.value) + '}' 146 | elif self.kind == 'subst': 147 | return '{S' + str(self.value) + '}' 148 | elif self.kind == 'vtable': 149 | return 'vtable for ' + str(self.value) 150 | elif self.kind == 'vtt': 151 | return 'vtt for ' + str(self.value) 152 | elif self.kind == 'typeinfo': 153 | return 'typeinfo for ' + str(self.value) 154 | elif self.kind == 'typeinfo_name': 155 | return 'typeinfo name for ' + str(self.value) 156 | elif self.kind == 'nonvirt_thunk': 157 | return 'non-virtual thunk for ' + str(self.value) 158 | elif self.kind == 'virt_thunk': 159 | return 'virtual thunk for ' + str(self.value) 160 | elif self.kind == 'guard_variable': 161 | return 'guard variable for ' + str(self.value) 162 | elif self.kind == 'transaction_clone': 163 | return 'transaction clone for ' + str(self.value) 164 | else: 165 | return repr(self) 166 | 167 | def left(self): 168 | if self.kind == "pointer": 169 | return self.value.left() + "*" 170 | elif self.kind == "lvalue": 171 | return self.value.left() + "&" 172 | elif self.kind == "rvalue": 173 | return self.value.left() + "&&" 174 | else: 175 | return str(self) 176 | 177 | def right(self): 178 | if self.kind in ("pointer", "lvalue", "rvalue"): 179 | return self.value.right() 180 | else: 181 | return "" 182 | 183 | def map(self, f): 184 | if self.kind in ('oper_cast', 'pointer', 'lvalue', 'rvalue', 'expand_arg_pack', 185 | 'vtable', 'vtt', 'typeinfo', 'typeinfo_name'): 186 | return self._replace(value=f(self.value)) 187 | elif self.kind in ('qual_name', 'tpl_args', 'tpl_arg_pack'): 188 | return self._replace(value=tuple(map(f, self.value))) 189 | else: 190 | return self 191 | 192 | 193 | class QualNode(namedtuple('QualNode', 'kind value qual')): 194 | def __repr__(self): 195 | return "".format(self.kind, repr(self.qual), repr(self.value)) 196 | 197 | def __str__(self): 198 | if self.kind == 'abi': 199 | return str(self.value) + "".join(['[abi:' + tag + ']' for tag in self.qual]) 200 | elif self.kind == 'cv_qual': 201 | return ' '.join([str(self.value)] + list(self.qual)) 202 | else: 203 | return repr(self) 204 | 205 | def left(self): 206 | return str(self) 207 | 208 | def right(self): 209 | return "" 210 | 211 | def map(self, f): 212 | if self.kind == 'cv_qual': 213 | return self._replace(value=f(self.value)) 214 | else: 215 | return self 216 | 217 | 218 | class CastNode(namedtuple('CastNode', 'kind value ty')): 219 | def __repr__(self): 220 | return "".format(self.kind, repr(self.ty), repr(self.value)) 221 | 222 | def __str__(self): 223 | if self.kind == 'literal': 224 | return '(' + str(self.ty) + ')' + str(self.value) 225 | else: 226 | return repr(self) 227 | 228 | def left(self): 229 | return str(self) 230 | 231 | def right(self): 232 | return "" 233 | 234 | def map(self, f): 235 | if self.kind == 'literal': 236 | return self._replace(ty=f(self.ty)) 237 | else: 238 | return self 239 | 240 | 241 | class FuncNode(namedtuple('FuncNode', 'kind name arg_tys ret_ty')): 242 | def __repr__(self): 243 | return "".format(self.kind, repr(self.name), 244 | repr(self.arg_tys), repr(self.ret_ty)) 245 | 246 | def __str__(self): 247 | if self.kind == 'func': 248 | result = "" 249 | if self.ret_ty is not None: 250 | result += str(self.ret_ty) + ' ' 251 | if self.name is not None: 252 | result += str(self.name) 253 | if self.arg_tys == (Node('builtin', 'void'),): 254 | result += '()' 255 | else: 256 | result += '(' + ', '.join(map(str, self.arg_tys)) + ')' 257 | return result 258 | else: 259 | return repr(self) 260 | 261 | def left(self): 262 | if self.kind == 'func': 263 | result = "" 264 | if self.ret_ty is not None: 265 | result += str(self.ret_ty) + ' ' 266 | result += "(" 267 | if self.name is not None: 268 | result += str(self.name) 269 | return result 270 | else: 271 | return str(self) 272 | 273 | def right(self): 274 | if self.kind == 'func': 275 | result = ")" 276 | if self.arg_tys == (Node('builtin', 'void'),): 277 | result += '()' 278 | else: 279 | result += '(' + ', '.join(map(str, self.arg_tys)) + ')' 280 | return result 281 | else: 282 | return "" 283 | 284 | def map(self, f): 285 | if self.kind == 'func': 286 | return self._replace(name=f(self.name) if self.name else None, 287 | arg_tys=tuple(map(f, self.arg_tys)), 288 | ret_ty=f(self.ret_ty) if self.ret_ty else None) 289 | else: 290 | return self 291 | 292 | 293 | class ArrayNode(namedtuple('ArrayNode', 'kind dimension ty')): 294 | def __repr__(self): 295 | return "".format(self.kind, repr(self.dimension), repr(self.ty)) 296 | 297 | def __str__(self): 298 | if self.kind == 'array': 299 | result = "" 300 | result += str(self.ty) 301 | result += "[" + str(self.dimension) + "]" 302 | return result 303 | else: 304 | return repr(self) 305 | 306 | def left(self): 307 | if self.kind == 'array': 308 | result = str(self.ty) + "(" 309 | return result 310 | else: 311 | return str(self) 312 | 313 | def right(self): 314 | if self.kind == 'array': 315 | result = ")[" + str(self.dimension) + "]" 316 | return result 317 | else: 318 | return "" 319 | 320 | def map(self, f): 321 | if self.kind == 'array': 322 | return self._replace(dimension=f(self.dimension) if self.dimension else None, 323 | ty=f(self.ty) if self.ty else None) 324 | else: 325 | return self 326 | 327 | 328 | class MemberNode(namedtuple('MemberNode', 'kind cls_ty member_ty')): 329 | def __repr__(self): 330 | return "".format(self.kind, repr(self.cls_ty), repr(self.member_ty)) 331 | 332 | def __str__(self): 333 | if self.kind == 'data': 334 | result = str(self.member_ty) + " " + str(self.cls_ty) + "::*" 335 | return result 336 | elif self.kind == 'method': 337 | result = self.member_ty.left() + str(self.cls_ty) + "::*" + self.member_ty.right() 338 | return result 339 | else: 340 | return repr(self) 341 | 342 | def left(self): 343 | if self.kind == 'method': 344 | return self.member_ty.left() + str(self.cls_ty) + "::*" 345 | else: 346 | return str(self) 347 | 348 | def right(self): 349 | if self.kind == 'method': 350 | return self.member_ty.right() 351 | else: 352 | return "" 353 | 354 | def map(self, f): 355 | if self.kind in ('data', 'func'): 356 | return self._replace(cls_ty=f(self.cls_ty) if self.cls_ty else None, 357 | member_ty=f(self.member_ty) if self.member_ty else None) 358 | else: 359 | return self 360 | 361 | 362 | _ctor_dtor_map = { 363 | 'C1': 'complete', 364 | 'C2': 'base', 365 | 'C3': 'allocating', 366 | 'D0': 'deleting', 367 | 'D1': 'complete', 368 | 'D2': 'base' 369 | } 370 | 371 | _std_names = { 372 | 'St': [Node('name', 'std')], 373 | 'Sa': [Node('name', 'std'), Node('name', 'allocator')], 374 | 'Sb': [Node('name', 'std'), Node('name', 'basic_string')], 375 | 'Ss': [Node('name', 'std'), Node('name', 'string')], 376 | 'Si': [Node('name', 'std'), Node('name', 'istream')], 377 | 'So': [Node('name', 'std'), Node('name', 'ostream')], 378 | 'Sd': [Node('name', 'std'), Node('name', 'iostream')], 379 | } 380 | 381 | _operators = { 382 | 'nw': 'new', 383 | 'na': 'new[]', 384 | 'dl': 'delete', 385 | 'da': 'delete[]', 386 | 'ps': '+', # (unary) 387 | 'ng': '-', # (unary) 388 | 'ad': '&', # (unary) 389 | 'de': '*', # (unary) 390 | 'co': '~', 391 | 'pl': '+', 392 | 'mi': '-', 393 | 'ml': '*', 394 | 'dv': '/', 395 | 'rm': '%', 396 | 'an': '&', 397 | 'or': '|', 398 | 'eo': '^', 399 | 'aS': '=', 400 | 'pL': '+=', 401 | 'mI': '-=', 402 | 'mL': '*=', 403 | 'dV': '/=', 404 | 'rM': '%=', 405 | 'aN': '&=', 406 | 'oR': '|=', 407 | 'eO': '^=', 408 | 'ls': '<<', 409 | 'rs': '>>', 410 | 'lS': '<<=', 411 | 'rS': '>>=', 412 | 'eq': '==', 413 | 'ne': '!=', 414 | 'lt': '<', 415 | 'gt': '>', 416 | 'le': '<=', 417 | 'ge': '>=', 418 | 'nt': '!', 419 | 'aa': '&&', 420 | 'oo': '||', 421 | 'pp': '++', # (postfix in context) 422 | 'mm': '--', # (postfix in context) 423 | 'cm': ',', 424 | 'pm': '->*', 425 | 'pt': '->', 426 | 'cl': '()', 427 | 'ix': '[]', 428 | 'qu': '?', 429 | } 430 | 431 | _builtin_types = { 432 | 'v': Node('builtin', 'void'), 433 | 'w': Node('builtin', 'wchar_t'), 434 | 'b': Node('builtin', 'bool'), 435 | 'c': Node('builtin', 'char'), 436 | 'a': Node('builtin', 'signed char'), 437 | 'h': Node('builtin', 'unsigned char'), 438 | 's': Node('builtin', 'short'), 439 | 't': Node('builtin', 'unsigned short'), 440 | 'i': Node('builtin', 'int'), 441 | 'j': Node('builtin', 'unsigned int'), 442 | 'l': Node('builtin', 'long'), 443 | 'm': Node('builtin', 'unsigned long'), 444 | 'x': Node('builtin', 'long long'), 445 | 'y': Node('builtin', 'unsigned long long'), 446 | 'n': Node('builtin', '__int128'), 447 | 'o': Node('builtin', 'unsigned __int128'), 448 | 'f': Node('builtin', 'float'), 449 | 'd': Node('builtin', 'double'), 450 | 'e': Node('builtin', '__float80'), 451 | 'g': Node('builtin', '__float128'), 452 | 'z': Node('builtin', '...'), 453 | 'Dd': Node('builtin', '_Decimal64'), 454 | 'De': Node('builtin', '_Decimal128'), 455 | 'Df': Node('builtin', '_Decimal32'), 456 | 'Dh': Node('builtin', '_Float16'), 457 | 'Di': Node('builtin', 'char32_t'), 458 | 'Ds': Node('builtin', 'char16_t'), 459 | 'Da': Node('builtin', 'auto'), 460 | 'Dn': Node('qual_name', (Node('name', 'std'), Node('builtin', 'nullptr_t'))) 461 | } 462 | 463 | 464 | def _handle_cv(qualifiers, node): 465 | qualifier_set = set() 466 | if 'r' in qualifiers: 467 | qualifier_set.add('restrict') 468 | if 'V' in qualifiers: 469 | qualifier_set.add('volatile') 470 | if 'K' in qualifiers: 471 | qualifier_set.add('const') 472 | if qualifier_set: 473 | return QualNode('cv_qual', node, frozenset(qualifier_set)) 474 | return node 475 | 476 | def _handle_indirect(qualifier, node): 477 | if qualifier == 'P': 478 | return Node('pointer', node) 479 | elif qualifier == 'R': 480 | return Node('lvalue', node) 481 | elif qualifier == 'O': 482 | return Node('rvalue', node) 483 | return node 484 | 485 | 486 | _NUMBER_RE = re.compile(r"\d+") 487 | 488 | def _parse_number(cursor): 489 | match = cursor.match(_NUMBER_RE) 490 | if match is None: 491 | return None 492 | return int(match.group(0)) 493 | 494 | def _parse_seq_id(cursor): 495 | seq_id = cursor.advance_until('_') 496 | if seq_id is None: 497 | return None 498 | if seq_id == '': 499 | return 0 500 | else: 501 | return 1 + int(seq_id, 36) 502 | 503 | def _parse_until_end(cursor, kind, fn): 504 | nodes = [] 505 | while not cursor.accept('E'): 506 | node = fn(cursor) 507 | if node is None or cursor.at_end(): 508 | return None 509 | nodes.append(node) 510 | return Node(kind, tuple(nodes)) 511 | 512 | 513 | _SOURCE_NAME_RE = re.compile(r"\d+") 514 | 515 | def _parse_source_name(cursor): 516 | match = cursor.match(_SOURCE_NAME_RE) 517 | name_len = int(match.group(0)) 518 | name = cursor.advance(name_len) 519 | if name is None: 520 | return None 521 | return name 522 | 523 | 524 | _NAME_RE = re.compile(r""" 525 | (?P (?= \d)) | 526 | (?P C[123]) | 527 | (?P D[012]) | 528 | (?P S[absiod]) | 529 | (?P nw|na|dl|da|ps|ng|ad|de|co|pl|mi|ml|dv|rm|an|or| 530 | eo|aS|pL|mI|mL|dV|rM|aN|oR|eO|ls|rs|lS|rS|eq|ne| 531 | lt|gt|le|ge|nt|aa|oo|pp|mm|cm|pm|pt|cl|ix|qu) | 532 | (?P cv) | 533 | (?P St) | 534 | (?P S) | 535 | (?P N (?P [rVK]*) (?P [RO]?)) | 536 | (?P T) | 537 | (?P I) | 538 | (?P L) | 539 | (?P Z) | 540 | (?P Ut) | 541 | (?P Ul) 542 | """, re.X) 543 | 544 | def _parse_name(cursor, is_nested=False): 545 | match = cursor.match(_NAME_RE) 546 | if match is None: 547 | return None 548 | elif match.group('source_name') is not None: 549 | name = _parse_source_name(cursor) 550 | if name is None: 551 | return None 552 | node = Node('name', name) 553 | elif match.group('ctor_name') is not None: 554 | node = Node('ctor', _ctor_dtor_map[match.group('ctor_name')]) 555 | elif match.group('dtor_name') is not None: 556 | node = Node('dtor', _ctor_dtor_map[match.group('dtor_name')]) 557 | elif match.group('std_name') is not None: 558 | node = Node('qual_name', _std_names[match.group('std_name')]) 559 | elif match.group('operator_name') is not None: 560 | node = Node('oper', _operators[match.group('operator_name')]) 561 | elif match.group('operator_cv') is not None: 562 | ty = _parse_type(cursor) 563 | if ty is None: 564 | return None 565 | node = Node('oper_cast', ty) 566 | elif match.group('std_prefix') is not None: 567 | name = _parse_name(cursor, is_nested=True) 568 | if name is None: 569 | return None 570 | if name.kind == 'qual_name': 571 | node = Node('qual_name', (Node('name', 'std'),) + name.value) 572 | else: 573 | node = Node('qual_name', (Node('name', 'std'), name)) 574 | elif match.group('substitution') is not None: 575 | seq_id = _parse_seq_id(cursor) 576 | if seq_id is None: 577 | return None 578 | node = cursor.resolve_subst(seq_id) 579 | if node is None: 580 | return None 581 | elif match.group('nested_name') is not None: 582 | nodes = [] 583 | while True: 584 | name = _parse_name(cursor, is_nested=True) 585 | if name is None or cursor.at_end(): 586 | return None 587 | if name.kind == 'qual_name': 588 | nodes += name.value 589 | else: 590 | nodes.append(name) 591 | if cursor.accept('E'): 592 | break 593 | else: 594 | cursor.add_subst(Node('qual_name', tuple(nodes))) 595 | node = Node('qual_name', tuple(nodes)) 596 | node = _handle_cv(match.group('cv_qual'), node) 597 | node = _handle_indirect(match.group('ref_qual'), node) 598 | elif match.group('template_param') is not None: 599 | seq_id = _parse_seq_id(cursor) 600 | if seq_id is None: 601 | return None 602 | node = Node('tpl_param', seq_id) 603 | cursor.add_subst(node) 604 | elif match.group('template_args') is not None: 605 | node = _parse_until_end(cursor, 'tpl_args', _parse_type) 606 | elif match.group('constant') is not None: 607 | # not in the ABI doc, but probably means `const` 608 | return _parse_name(cursor, is_nested) 609 | elif match.group('local_name') is not None: 610 | raise NotImplementedError("local names are not supported") 611 | elif match.group('unnamed_type') is not None: 612 | raise NotImplementedError("unnamed types are not supported") 613 | elif match.group('closure_type') is not None: 614 | raise NotImplementedError("closure (lambda) types are not supported") 615 | if node is None: 616 | return None 617 | 618 | abi_tags = [] 619 | while cursor.accept('B'): 620 | abi_tags.append(_parse_source_name(cursor)) 621 | if abi_tags: 622 | node = QualNode('abi', node, frozenset(abi_tags)) 623 | 624 | if not is_nested and cursor.accept('I') and ( 625 | node.kind in ('name', 'oper', 'oper_cast') or 626 | match.group('std_prefix') is not None or 627 | match.group('std_name') is not None or 628 | match.group('substitution') is not None): 629 | if node.kind in ('name', 'oper', 'oper_cast') or match.group('std_prefix') is not None: 630 | cursor.add_subst(node) # ::= 631 | templ_args = _parse_until_end(cursor, 'tpl_args', _parse_type) 632 | if templ_args is None: 633 | return None 634 | node = Node('qual_name', (node, templ_args)) 635 | if ((match.group('std_prefix') is not None or 636 | match.group('std_name') is not None) and 637 | node.value[0].value[1].kind not in ('oper', 'oper_cast')): 638 | cursor.add_subst(node) 639 | 640 | return node 641 | 642 | 643 | _TYPE_RE = re.compile(r""" 644 | (?P v|w|b|c|a|h|s|t|i|j|l|m|x|y|n|o|f|d|e|g|z| 645 | Dd|De|Df|Dh|DF|Di|Ds|Da|Dc|Dn) | 646 | (?P [rVK]+) | 647 | (?P [PRO]) | 648 | (?P F) | 649 | (?P X) | 650 | (?P (?= L)) | 651 | (?P J) | 652 | (?P Dp) | 653 | (?P D[tT]) | 654 | (?P A) | 655 | (?P M) 656 | """, re.X) 657 | 658 | def _parse_type(cursor): 659 | match = cursor.match(_TYPE_RE) 660 | if match is None: 661 | node = _parse_name(cursor) 662 | cursor.add_subst(node) 663 | elif match.group('builtin_type') is not None: 664 | node = _builtin_types[match.group('builtin_type')] 665 | elif match.group('qualified_type') is not None: 666 | ty = _parse_type(cursor) 667 | if ty is None: 668 | return None 669 | node = _handle_cv(match.group('qualified_type'), ty) 670 | cursor.add_subst(node) 671 | elif match.group('indirect_type') is not None: 672 | ty = _parse_type(cursor) 673 | if ty is None: 674 | return None 675 | node = _handle_indirect(match.group('indirect_type'), ty) 676 | cursor.add_subst(node) 677 | elif match.group('function_type') is not None: 678 | ret_ty = _parse_type(cursor) 679 | if ret_ty is None: 680 | return None 681 | arg_tys = [] 682 | while not cursor.accept('E'): 683 | arg_ty = _parse_type(cursor) 684 | if arg_ty is None: 685 | return None 686 | arg_tys.append(arg_ty) 687 | node = FuncNode('func', None, tuple(arg_tys), ret_ty) 688 | cursor.add_subst(node) 689 | elif match.group('expression') is not None: 690 | raise NotImplementedError("expressions are not supported") 691 | elif match.group('expr_primary') is not None: 692 | node = _parse_expr_primary(cursor) 693 | elif match.group('template_arg_pack') is not None: 694 | node = _parse_until_end(cursor, 'tpl_arg_pack', _parse_type) 695 | elif match.group('arg_pack_expansion') is not None: 696 | node = _parse_type(cursor) 697 | node = Node('expand_arg_pack', node) 698 | elif match.group('decltype') is not None: 699 | raise NotImplementedError("decltype is not supported") 700 | elif match.group('array_type') is not None: 701 | dimension = _parse_number(cursor) 702 | if dimension is None: 703 | return None 704 | else: 705 | dimension = CastNode('literal', dimension, Node('builtin', 'int')) 706 | if not cursor.accept('_'): 707 | return None 708 | type = _parse_type(cursor) 709 | node = ArrayNode('array', dimension, type) 710 | cursor.add_subst(node) 711 | elif match.group('member_type') is not None: 712 | cls_ty = _parse_type(cursor) 713 | member_ty = _parse_type(cursor) 714 | if member_ty is not None and member_ty.kind == 'func': 715 | kind = "method" 716 | else: 717 | kind = "data" 718 | node = MemberNode(kind, cls_ty, member_ty) 719 | else: 720 | return None 721 | return node 722 | 723 | 724 | _EXPR_PRIMARY_RE = re.compile(r""" 725 | (?P L (?= _Z)) | 726 | (?P L) 727 | """, re.X) 728 | 729 | def _parse_expr_primary(cursor): 730 | match = cursor.match(_EXPR_PRIMARY_RE) 731 | if match is None: 732 | return None 733 | elif match.group('mangled_name') is not None: 734 | mangled_name = cursor.advance_until('E') 735 | return _parse_mangled_name(_Cursor(mangled_name)) 736 | elif match.group('literal') is not None: 737 | ty = _parse_type(cursor) 738 | if ty is None: 739 | return None 740 | value = cursor.advance_until('E') 741 | if value is None: 742 | return None 743 | return CastNode('literal', value, ty) 744 | 745 | 746 | def _expand_template_args(func): 747 | if func.name.kind == 'qual_name': 748 | name_suffix = func.name.value[-1] 749 | if name_suffix.kind == 'tpl_args': 750 | tpl_args = name_suffix.value 751 | def mapper(node): 752 | if node is None: 753 | return None 754 | elif node.kind == 'tpl_param' and node.value < len(tpl_args): 755 | return tpl_args[node.value] 756 | return node.map(mapper) 757 | return mapper(func) 758 | return func 759 | 760 | def _parse_encoding(cursor): 761 | name = _parse_name(cursor) 762 | if name is None: 763 | return None 764 | if cursor.at_end(): 765 | return name 766 | 767 | if name.kind == 'qual_name' \ 768 | and name.value[-1].kind == 'tpl_args' \ 769 | and name.value[-2].kind not in ('ctor', 'dtor', 'oper_cast'): 770 | ret_ty = _parse_type(cursor) 771 | if ret_ty is None: 772 | return None 773 | else: 774 | ret_ty = None 775 | 776 | arg_tys = [] 777 | while not cursor.at_end(): 778 | arg_ty = _parse_type(cursor) 779 | if arg_ty is None: 780 | return None 781 | arg_tys.append(arg_ty) 782 | 783 | if arg_tys: 784 | func = FuncNode('func', name, tuple(arg_tys), ret_ty) 785 | return _expand_template_args(func) 786 | else: 787 | return name 788 | 789 | 790 | _SPECIAL_RE = re.compile(r""" 791 | (?P T (?P [VTIS])) | 792 | (?P Th (?P n? \d+) _) | 793 | (?P Tv (?P n? \d+) _ (?P n? \d+) _) | 794 | (?P Tc) | 795 | (?P GV) | 796 | (?P GR) | 797 | (?P GTt) 798 | """, re.X) 799 | 800 | def _parse_special(cursor): 801 | match = cursor.match(_SPECIAL_RE) 802 | if match is None: 803 | return None 804 | elif match.group('rtti') is not None: 805 | name = _parse_type(cursor) 806 | if name is None: 807 | return None 808 | if match.group('kind') == 'V': 809 | return Node('vtable', name) 810 | elif match.group('kind') == 'T': 811 | return Node('vtt', name) 812 | elif match.group('kind') == 'I': 813 | return Node('typeinfo', name) 814 | elif match.group('kind') == 'S': 815 | return Node('typeinfo_name', name) 816 | elif match.group('nonvirtual_thunk') is not None: 817 | func = _parse_encoding(cursor) 818 | if func is None: 819 | return None 820 | return Node('nonvirt_thunk', func) 821 | elif match.group('virtual_thunk') is not None: 822 | func = _parse_encoding(cursor) 823 | if func is None: 824 | return None 825 | return Node('virt_thunk', func) 826 | elif match.group('covariant_thunk') is not None: 827 | raise NotImplementedError("covariant thunks are not supported") 828 | elif match.group('guard_variable'): 829 | name = _parse_type(cursor) 830 | if name is None: 831 | return None 832 | return Node('guard_variable', name) 833 | elif match.group('extended_temporary'): 834 | raise NotImplementedError("extended temporaries are not supported") 835 | elif match.group('transaction_clone'): 836 | func = _parse_encoding(cursor) 837 | if func is None: 838 | return None 839 | return Node('transaction_clone', func) 840 | 841 | 842 | _MANGLED_NAME_RE = re.compile(r""" 843 | (?P _?_Z) 844 | """, re.X) 845 | 846 | def _parse_mangled_name(cursor): 847 | match = cursor.match(_MANGLED_NAME_RE) 848 | if match is None: 849 | return None 850 | else: 851 | cursor_position = cursor._pos 852 | special = _parse_special(cursor) 853 | if special is not None: 854 | return special 855 | 856 | # Return the cursor position to it's previous state before continuing 857 | cursor._pos = cursor_position 858 | return _parse_encoding(cursor) 859 | 860 | 861 | def _expand_arg_packs(ast): 862 | def mapper(node): 863 | if node is None: 864 | return None 865 | elif node.kind == 'tpl_args': 866 | exp_args = [] 867 | for arg in node.value: 868 | if arg.kind in ['tpl_arg_pack', 'tpl_args']: 869 | exp_args += arg.value 870 | else: 871 | exp_args.append(arg) 872 | return Node('tpl_args', tuple(map(mapper, exp_args))) 873 | elif node.kind == 'func': 874 | node = node.map(mapper) 875 | exp_arg_tys = [] 876 | for arg_ty in node.arg_tys: 877 | if arg_ty.kind == 'expand_arg_pack' and \ 878 | arg_ty.value is not None and \ 879 | arg_ty.value.kind == 'rvalue' and \ 880 | arg_ty.value.value.kind in ['tpl_arg_pack', 'tpl_args']: 881 | exp_arg_tys += arg_ty.value.value.value 882 | else: 883 | exp_arg_tys.append(arg_ty) 884 | return node._replace(arg_tys=tuple(exp_arg_tys)) 885 | else: 886 | return node.map(mapper) 887 | return mapper(ast) 888 | 889 | def parse(raw): 890 | ast = _parse_mangled_name(_Cursor(raw)) 891 | if ast is not None: 892 | ast = _expand_arg_packs(ast) 893 | return ast 894 | 895 | def is_ctor_or_dtor(ast) -> bool: 896 | if ast.kind == 'func': 897 | return is_ctor_or_dtor(ast.name) 898 | elif ast.kind == 'qual_name': 899 | kind = ast.value[-1].kind 900 | return kind == 'ctor' or kind == 'dtor' 901 | else: 902 | return False 903 | 904 | # ================================================================================================ 905 | 906 | 907 | if __name__ == '__main__': 908 | import sys 909 | if len(sys.argv) == 1: 910 | while True: 911 | name = sys.stdin.readline() 912 | if not name: 913 | break 914 | print(parse(name.strip())) 915 | else: 916 | for name in sys.argv[1:]: 917 | ast = parse(name) 918 | print(repr(ast)) 919 | print(ast) 920 | --------------------------------------------------------------------------------