├── .gitignore
├── itanium_demangler
├── MANIFEST.in
├── .gitignore
├── setup.py
├── LICENSE-0BSD.txt
├── README.md
├── tests
│ └── test.py
└── itanium_demangler
│ └── __init__.py
├── doc
├── vtable-after.png
├── vtable-before.png
├── signature-after.png
└── signature-before.png
├── LICENSE-0BSD.txt
├── plugin.json
├── README.md
└── __init__.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 |
--------------------------------------------------------------------------------
/itanium_demangler/MANIFEST.in:
--------------------------------------------------------------------------------
1 | include LICENSE-0BSD.txt
2 |
--------------------------------------------------------------------------------
/doc/vtable-after.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whitequark/binja_itanium_cxx_abi/HEAD/doc/vtable-after.png
--------------------------------------------------------------------------------
/doc/vtable-before.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whitequark/binja_itanium_cxx_abi/HEAD/doc/vtable-before.png
--------------------------------------------------------------------------------
/doc/signature-after.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whitequark/binja_itanium_cxx_abi/HEAD/doc/signature-after.png
--------------------------------------------------------------------------------
/doc/signature-before.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/whitequark/binja_itanium_cxx_abi/HEAD/doc/signature-before.png
--------------------------------------------------------------------------------
/itanium_demangler/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | __pycache__/
3 | _build/
4 | *.egg-info/
5 | /.eggs/
6 | /build/
7 | /dist/
8 |
--------------------------------------------------------------------------------
/LICENSE-0BSD.txt:
--------------------------------------------------------------------------------
1 | Copyright (C) 2018 whitequark@whitequark.org
2 |
3 | Permission to use, copy, modify, and/or distribute this software for
4 | any purpose with or without fee is hereby granted.
5 |
6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
7 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
8 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
9 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
10 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
11 | AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
12 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
13 |
--------------------------------------------------------------------------------
/itanium_demangler/setup.py:
--------------------------------------------------------------------------------
1 | import setuptools
2 |
3 |
4 | setuptools.setup(
5 | name="itanium_demangler",
6 | version="1.0",
7 | author="whitequark",
8 | author_email="whitequark@whitequark.org",
9 | description="Pure Python parser for mangled itanium symbols",
10 | long_description=open("README.md").read(),
11 | long_description_content_type="text/markdown",
12 | license="BSD",
13 | url="https://github.com/whitequark/python-itanium_demangler",
14 | packages=setuptools.find_packages(),
15 | classifiers=[
16 | "Programming Language :: Python :: 3",
17 | "Programming Language :: Python :: 2",
18 | "Operating System :: OS Independent",
19 | ],
20 | )
21 |
--------------------------------------------------------------------------------
/itanium_demangler/LICENSE-0BSD.txt:
--------------------------------------------------------------------------------
1 | Copyright (C) 2018 whitequark@whitequark.org
2 |
3 | Permission to use, copy, modify, and/or distribute this software for
4 | any purpose with or without fee is hereby granted.
5 |
6 | THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
7 | WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
8 | MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
9 | ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
10 | WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN
11 | AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT
12 | OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
13 |
--------------------------------------------------------------------------------
/plugin.json:
--------------------------------------------------------------------------------
1 | {
2 | "pluginmetadataversion" : 2,
3 | "name": "Itanium C++ ABI",
4 | "type": ["analysis"],
5 | "api": ["python2", "python3"],
6 | "description": "A plugin providing an analysis for Itanium C++ ABI.",
7 | "longdescription": "This plugin provides a custom demangler, an analysis that decodes mangled names and updates function signatures, and an analysis that decodes RTTI and vtables and discovers new procedures based on virtual function pointers.",
8 | "version": "1.0",
9 | "author": "whitequark",
10 | "minimumbinaryninjaversion": 555,
11 | "platforms": ["Darwin", "Linux", "Windows"],
12 | "license": {
13 | "name": "BSD-0-clause",
14 | "text": "Copyright (C) 2018 by whitequark\n\nPermission to use, copy, modify, and/or distribute this software for any purpose with or without fee is hereby granted.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE."
15 | }
16 | }
17 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # Binary Ninja Itanium C++ ABI Plugin
2 |
3 | The Itanium C++ ABI plugin provides a custom demangler, an analysis that decodes mangled names and updates function signatures, and an analysis that decodes RTTI and vtables and discovers new procedures based on virtual function pointers.
4 |
5 | ## Custom demangler
6 |
7 | The custom demangler converts the mangled names into abstract syntax trees, allowing to extract more type information than the built-in one. For example, it differentiates between complete and base class constructors and destructors.
8 |
9 | ## Function signature decoding
10 |
11 | Before / after:
12 |
13 |
14 |
15 | Note that the decoding is necessarily heuristic as some information is lost, e.g. class members and standalone functions in a namespace have an exactly identical mangling. It is possible that the `this` argument (or worse) would be missing--consult the full decoded name of the function (`current_function.symbol.full_name`) to see the result of decoding, and apply your expertise.
16 |
17 | ## RTTI and vtable decoding
18 |
19 | Before / after:
20 |
21 |
22 |
23 | ## License
24 |
25 | [0-clause BSD](LICENSE-0BSD.txt)
26 |
--------------------------------------------------------------------------------
/itanium_demangler/README.md:
--------------------------------------------------------------------------------
1 | # Itanium Demangler
2 |
3 | The *Python Itanium Demangler* is a pure Python parser for the [Itanium C++ ABI symbol mangling language][manglang]. Note that MSVC mangling language is not supported.
4 |
5 | This demangler generates an abstract syntax tree from mangled symbols, which can be used for directly extracting type information, as opposed to having to interpret the C++ source code corresponding to the demangled symbol
6 |
7 | There is also a built-in AST stringifier, so the demangler can be used as a replacement for `c++filt` or for formatting backtraces.
8 |
9 | [manglang]: https://itanium-cxx-abi.github.io/cxx-abi/abi.html#mangling
10 |
11 | ## Requirements
12 |
13 | The demangler runs on Python 2.7 and 3.3+ and has no dependencies.
14 |
15 | ## Installation
16 |
17 | Installing via PyPI:
18 |
19 | pip install itanium_demangler
20 |
21 | Using a local repository for development:
22 |
23 | git clone https://github.com/whitequark/python-itanium_demangler
24 | cd python-itanium_demangler
25 | python setup.py develop --user
26 |
27 | ## Usage
28 |
29 | ```python
30 | from itanium_demangler import parse as demangle
31 |
32 | ast = demangle("_ZN5boost6chrono24process_system_cpu_clock3nowEv")
33 |
34 | print(repr(ast))
35 | # , , , )> (,) None>
36 |
37 | print(ast)
38 | # boost::chrono::process_system_cpu_clock::now()
39 | ```
40 |
41 | ## Future considerations
42 |
43 | A similar (i.e. also parsing to an AST) implementation of a demangler for the MSVC mangling language would be useful to have.
44 |
45 | ## License
46 |
47 | [0-clause BSD](LICENSE-0BSD.txt)
48 |
--------------------------------------------------------------------------------
/itanium_demangler/tests/test.py:
--------------------------------------------------------------------------------
1 | import unittest
2 |
3 | from itanium_demangler import parse, _operators, _builtin_types
4 |
5 |
6 | class TestDemangler(unittest.TestCase):
7 | def assertParses(self, mangled, ast):
8 | result = parse(mangled)
9 | self.assertEqual(result, ast)
10 |
11 | def assertDemangles(self, mangled, demangled):
12 | result = parse(mangled)
13 | if result is not None:
14 | result = str(result)
15 | self.assertEqual(result, demangled)
16 |
17 | def test_name(self):
18 | self.assertDemangles('_Z3foo', 'foo')
19 | self.assertDemangles('_Z3x', None)
20 |
21 | def test_ctor_dtor(self):
22 | self.assertDemangles('_ZN3fooC1E', 'foo::{ctor}')
23 | self.assertDemangles('_ZN3fooC2E', 'foo::{base ctor}')
24 | self.assertDemangles('_ZN3fooC3E', 'foo::{allocating ctor}')
25 | self.assertDemangles('_ZN3fooD0E', 'foo::{deleting dtor}')
26 | self.assertDemangles('_ZN3fooD1E', 'foo::{dtor}')
27 | self.assertDemangles('_ZN3fooD2E', 'foo::{base dtor}')
28 | self.assertDemangles('_ZN3fooC1IcEEc', 'foo::{ctor}(char)')
29 | self.assertDemangles('_ZN3fooD1IcEEc', 'foo::{dtor}(char)')
30 |
31 | def test_operator(self):
32 | for op in _operators:
33 | if _operators[op] in ['new', 'new[]', 'delete', 'delete[]']:
34 | continue
35 | self.assertDemangles('_Z' + op, 'operator' + _operators[op])
36 | self.assertDemangles('_Znw', 'operator new')
37 | self.assertDemangles('_Zna', 'operator new[]')
38 | self.assertDemangles('_Zdl', 'operator delete')
39 | self.assertDemangles('_Zda', 'operator delete[]')
40 | self.assertDemangles('_Zcvi', 'operator int')
41 |
42 | def test_std_substs(self):
43 | self.assertDemangles('_ZSt3foo', 'std::foo')
44 | self.assertDemangles('_ZStN3fooE', 'std::foo')
45 | self.assertDemangles('_ZSs', 'std::string')
46 | self.assertParses('_ZSt', None)
47 | self.assertDemangles('_Z3fooISt6vectorE', 'foo')
48 | self.assertDemangles('_ZSaIhE', 'std::allocator')
49 |
50 | def test_nested_name(self):
51 | self.assertDemangles('_ZN3fooE', 'foo')
52 | self.assertDemangles('_ZN3foo5bargeE', 'foo::barge')
53 | self.assertDemangles('_ZN3fooIcE5bargeE', 'foo::barge')
54 | self.assertDemangles('_ZNK3fooE', 'foo const')
55 | self.assertDemangles('_ZNV3fooE', 'foo volatile')
56 | self.assertDemangles('_ZNKR3fooE', 'foo const&')
57 | self.assertDemangles('_ZNKO3fooE', 'foo const&&')
58 | self.assertParses('_ZNKO3foo', None)
59 |
60 | def test_template_args(self):
61 | self.assertDemangles('_Z3fooIcE', 'foo')
62 | self.assertDemangles('_ZN3fooIcEE', 'foo')
63 | self.assertParses('_Z3fooI', None)
64 |
65 | def test_builtin_types(self):
66 | for ty in _builtin_types:
67 | self.assertDemangles('_Z1fI' + ty + 'E', 'f<' + str(_builtin_types[ty]) + '>')
68 |
69 | def test_qualified_type(self):
70 | self.assertDemangles('_Z1fIriE', 'f')
71 | self.assertDemangles('_Z1fIKiE', 'f')
72 | self.assertDemangles('_Z1fIViE', 'f')
73 | self.assertDemangles('_Z1fIVVViE', 'f')
74 |
75 | def test_function_type(self):
76 | self.assertDemangles('_Z1fv', 'f()')
77 | self.assertDemangles('_Z1fi', 'f(int)')
78 | self.assertDemangles('_Z1fic', 'f(int, char)')
79 | self.assertDemangles('_ZN1fEic', 'f(int, char)')
80 | self.assertDemangles('_ZN1fIEEic', 'int f<>(char)')
81 | self.assertDemangles('_ZN1fIEC1Eic', 'f<>::{ctor}(int, char)')
82 |
83 | def test_indirect_type(self):
84 | self.assertDemangles('_Z1fIPiE', 'f')
85 | self.assertDemangles('_Z1fIPPiE', 'f')
86 | self.assertDemangles('_Z1fIRiE', 'f')
87 | self.assertDemangles('_Z1fIOiE', 'f')
88 | self.assertDemangles('_Z1fIKRiE', 'f')
89 | self.assertDemangles('_Z1fIRKiE', 'f')
90 |
91 | def test_literal(self):
92 | self.assertDemangles('_Z1fILi1EE', 'f<(int)1>')
93 | self.assertDemangles('_Z1fIL_Z1gEE', 'f')
94 |
95 | def test_argpack(self):
96 | self.assertDemangles('_Z1fILb0EJciEE', 'f<(bool)0, char, int>')
97 | self.assertDemangles('_Z1fILb0EIciEE', 'f<(bool)0, char, int>')
98 | self.assertDemangles('_Z1fIJciEEvDpOT_', 'void f(char, int)')
99 | self.assertDemangles('_Z1fIIciEEvDpOT_', 'void f(char, int)')
100 |
101 | def test_special(self):
102 | self.assertDemangles('_ZTV1f', 'vtable for f')
103 | self.assertDemangles('_ZTT1f', 'vtt for f')
104 | self.assertDemangles('_ZTI1f', 'typeinfo for f')
105 | self.assertDemangles('_ZTS1f', 'typeinfo name for f')
106 | self.assertDemangles('_ZThn16_1fv', 'non-virtual thunk for f()')
107 | self.assertDemangles('_ZTv16_8_1fv', 'virtual thunk for f()')
108 | self.assertDemangles('_ZGV1f', 'guard variable for f')
109 | self.assertDemangles('_ZGTt1fv', 'transaction clone for f()')
110 |
111 | def test_template_param(self):
112 | self.assertDemangles('_ZN1fIciEEvT_PT0_', 'void f(char, int*)')
113 | self.assertParses('_ZN1fIciEEvT_PT0', None)
114 |
115 | def test_substitution(self):
116 | self.assertDemangles('_Z3fooIEvS_', 'void foo<>(foo)')
117 | self.assertDemangles('_ZN3foo3barIES_E', 'foo::bar<>::foo')
118 | self.assertDemangles('_ZN3foo3barIES0_E', 'foo::bar<>::foo::bar')
119 | self.assertDemangles('_ZN3foo3barIES1_E', 'foo::bar<>::foo::bar<>')
120 | self.assertParses('_ZN3foo3barIES_ES2_', None)
121 | self.assertDemangles('_Z3fooIS_E', 'foo')
122 | self.assertDemangles('_ZSt3fooIS_E', 'std::foo')
123 | self.assertDemangles('_Z3fooIPiEvS0_', 'void foo(int*)')
124 | self.assertDemangles('_Z3fooISaIcEEvS0_',
125 | 'void foo>(std::allocator)')
126 | self.assertDemangles('_Z3fooI3barS0_E', 'foo')
127 | self.assertDemangles('_ZN2n11fEPNS_1bEPNS_2n21cEPNS2_2n31dE',
128 | 'n1::f(n1::b*, n1::n2::c*, n1::n2::n3::d*)')
129 | self.assertDemangles('_ZN1f1gES_IFvvEE', 'f::g(f)')
130 | self.assertDemangles('_ZplIcET_S0_', 'char operator+(char)')
131 | self.assertParses('_ZplIcET_S1_', None)
132 | # Operator template results don't get added to substitutions
133 | self.assertParses('_ZStplIcEvS0_', None)
134 |
135 | def test_abi_tag(self):
136 | self.assertDemangles('_Z3fooB5cxx11v', 'foo[abi:cxx11]()')
137 |
138 | def test_const(self):
139 | self.assertDemangles('_ZL3foo', 'foo')
140 |
141 | def test_operator_template(self):
142 | self.assertDemangles('_ZmiIiE', 'operator-')
143 | self.assertDemangles('_ZmiIiEvv', 'void operator-()')
144 | self.assertDemangles('_ZmiIiEvKT_RT_', 'void operator-(int const, int&)')
145 | self.assertDemangles('_ZcviIiE', 'operator int')
146 | self.assertDemangles('_ZcviIiEv', 'operator int()')
147 | self.assertDemangles('_ZcviIiET_T_', 'operator int(int, int)')
148 |
149 | def test_array(self):
150 | self.assertDemangles('_Z1fA1_c', 'f(char[(int)1])')
151 | self.assertDemangles('_Z1fRA1_c', 'f(char(&)[(int)1])')
152 | self.assertDemangles('_Z1fIA1_cS0_E', 'f')
153 | self.assertParses('_Z1fA1c', None)
154 |
155 | def test_function(self):
156 | self.assertDemangles('_Z1fFvvE', 'f(void ())')
157 | self.assertDemangles('_Z1fPFvvE', 'f(void (*)())')
158 | self.assertDemangles('_Z1fPPFvvE', 'f(void (**)())')
159 | self.assertDemangles('_Z1fRPFvvE', 'f(void (*&)())')
160 | self.assertDemangles('_Z1fKFvvE', 'f(void () const)')
161 |
162 | def test_member_data(self):
163 | self.assertDemangles('_Z1fM3fooi', 'f(int foo::*)')
164 | self.assertDemangles('_Z1fMN3foo3barEi', 'f(int foo::bar::*)')
165 | self.assertDemangles('_Z1fM3fooN3bar1XE', 'f(bar::X foo::*)')
166 | self.assertDemangles('_Z1fM3fooIcE3bar', 'f(bar foo::*)')
167 | self.assertDemangles('_Z1fM3foo3barIlE', 'f(bar foo::*)')
168 | self.assertDemangles('_Z3fooPM2ABi', 'foo(int AB::**)')
169 |
170 | def test_member_function(self):
171 | self.assertDemangles('_Z1fM3fooFvvE', 'f(void (foo::*)())')
172 | self.assertDemangles('_Z1fMN3foo3barEFvvE', 'f(void (foo::bar::*)())')
173 | self.assertDemangles('_Z3fooRM3barFviE', 'foo(void (bar::*&)(int))')
174 |
--------------------------------------------------------------------------------
/__init__.py:
--------------------------------------------------------------------------------
1 | import re
2 | from binaryninja import log
3 | from binaryninja.plugin import PluginCommand, BackgroundTaskThread
4 | from binaryninja.binaryview import BinaryReader
5 | from binaryninja.types import Symbol, Type, NamedTypeReferenceBuilder
6 | # Structure has been deprecated in favor of the StructureBuilder API.
7 | try:
8 | from binaryninja.types import StructureBuilder
9 | except ImportError:
10 | from binaryninja.types import Structure
11 | from binaryninja.enums import SymbolType, ReferenceType
12 |
13 | import sys
14 | import os.path
15 | # Prepend so if the itanium-demangler package is installed elsewhere it doesn't
16 | # interfere
17 | sys.path.insert(0, os.path.join(os.path.dirname(__file__), "itanium_demangler"))
18 | from itanium_demangler import Node, parse as parse_mangled, is_ctor_or_dtor
19 |
20 |
21 | def analyze_cxx_abi(view, start=None, length=None, task=None):
22 | platform = view.platform
23 | arch = platform.arch
24 |
25 | void_p_ty = Type.pointer(arch, Type.void())
26 | char_p_ty = Type.pointer(arch, Type.int(1))
27 | unsigned_int_ty = Type.int(arch.default_int_size, False)
28 | signed_int_ty = Type.int(arch.default_int_size, True)
29 |
30 | base_type_info_ty = Type.named_type(NamedTypeReferenceBuilder.create(
31 | name='std::type_info'))
32 | base_type_info_ptr_ty = Type.pointer(arch, base_type_info_ty)
33 |
34 | def char_array_ty(length):
35 | return Type.array(Type.int(1), strings[0].length)
36 |
37 | def type_info_ty(kind=None):
38 | try:
39 | type_info_struct = StructureBuilder.create()
40 | except NameError:
41 | type_info_struct = Structure()
42 | type_info_struct.append(void_p_ty, 'vtable')
43 | type_info_struct.append(char_p_ty, 'name')
44 | if kind == 'si_class':
45 | type_info_struct.append(base_type_info_ptr_ty, 'base_type')
46 | return Type.structure_type(type_info_struct)
47 |
48 | def vtable_ty(vfunc_count):
49 | try:
50 | vtable_struct = StructureBuilder.create()
51 | except NameError:
52 | vtable_struct = Structure()
53 | vtable_struct.append(signed_int_ty, 'top_offset')
54 | vtable_struct.append(base_type_info_ptr_ty, 'typeinfo')
55 | vtable_struct.append(Type.array(void_p_ty, vfunc_count), 'functions')
56 | return Type.structure_type(vtable_struct)
57 |
58 | if platform.name.startswith("windows-"):
59 | long_size = arch.default_int_size
60 | else:
61 | long_size = arch.address_size
62 |
63 | if arch.name.startswith('x86'):
64 | char_signed = True
65 | else:
66 | char_signed = False # not always true
67 |
68 | short_size = 2 # not always true
69 | long_long_size = 8 # not always true
70 |
71 | ty_for_cxx_builtin = {
72 | 'void': Type.void(),
73 | 'wchar_t': Type.int(2, sign=char_signed, alternate_name='wchar_t'),
74 | 'bool': Type.bool(),
75 | 'char': Type.int(1, sign=char_signed),
76 | 'signed char': Type.int(1, sign=True),
77 | 'unsigned char': Type.int(1, sign=False),
78 | 'short': Type.int(short_size, sign=True),
79 | 'unsigned short': Type.int(short_size, sign=False),
80 | 'int': Type.int(arch.default_int_size, sign=True),
81 | 'unsigned int': Type.int(arch.default_int_size, sign=False),
82 | 'long': Type.int(long_size, sign=True),
83 | 'unsigned long': Type.int(long_size, sign=False),
84 | 'long long': Type.int(long_long_size, sign=True),
85 | 'unsigned long long': Type.int(long_long_size, sign=False),
86 | '__int128': Type.int(16, sign=True),
87 | 'unsigned __int128': Type.int(16, sign=False),
88 | 'float': Type.float(4),
89 | 'double': Type.float(8),
90 | '__float80': Type.float(10),
91 | '__float128': Type.float(16),
92 | 'char32_t': Type.int(4, sign=char_signed, alternate_name='char32_t'),
93 | 'char16_t': Type.int(2, sign=char_signed, alternate_name='char16_t'),
94 | }
95 |
96 | def ty_from_demangler_node(node, cv_qual=frozenset(), arg_count_hint=None):
97 | if node.kind == 'builtin':
98 | if node.value in ty_for_cxx_builtin:
99 | return ty_for_cxx_builtin[node.value]
100 | else:
101 | return None
102 | elif node.kind in ['name', 'qual_name']:
103 | named_ty_ref = NamedTypeReferenceBuilder.create(name=str(node))
104 | return Type.named_type(named_ty_ref)
105 | elif node.kind in ['pointer', 'lvalue', 'rvalue']:
106 | pointee_ty = ty_from_demangler_node(node.value)
107 | if pointee_ty is None:
108 | return None
109 | is_const = ('const' in cv_qual)
110 | is_volatile = ('volatile' in cv_qual)
111 | if node.kind == 'pointer':
112 | return Type.pointer(arch, pointee_ty, is_const, is_volatile)
113 | elif node.kind == 'lvalue':
114 | return Type.pointer(arch, pointee_ty, is_const, is_volatile,
115 | ref_type=ReferenceType.ReferenceReferenceType)
116 | elif node.kind == 'rvalue':
117 | return Type.pointer(arch, pointee_ty, is_const, is_volatile,
118 | ref_type=ReferenceType.RValueReferenceType)
119 | elif node.kind == 'cv_qual':
120 | return ty_from_demangler_node(node.value, cv_qual=node.qual)
121 | elif node.kind == 'func':
122 | is_ctor_dtor = False
123 | if node.name and node.name.kind == 'qual_name':
124 | qual_name = node.name.value
125 | if qual_name[-1].kind in ['ctor', 'dtor']:
126 | is_ctor_dtor = True
127 |
128 | if is_ctor_dtor:
129 | ret_ty = Type.void()
130 | elif node.ret_ty is not None:
131 | ret_ty = ty_from_demangler_node(node.ret_ty)
132 | if ret_ty is None:
133 | return None
134 | else:
135 | ret_ty = Type.int(arch.default_int_size).with_confidence(0)
136 |
137 | arg_nodes = list(node.arg_tys)
138 | arg_tys = []
139 |
140 | var_arg = False
141 | if arg_nodes[-1].kind == 'builtin' and arg_nodes[-1].value == '...':
142 | arg_nodes.pop()
143 | var_arg = True
144 | elif arg_nodes[0].kind == 'builtin' and arg_nodes[0].value == 'void':
145 | arg_nodes = arg_nodes[1:]
146 |
147 | this_arg = False
148 | if node.name and node.name.kind == 'qual_name':
149 | qual_name = node.name.value
150 | if is_ctor_dtor or (arg_count_hint is not None and
151 | len(arg_nodes) == arg_count_hint - 1):
152 | this_arg = True
153 | this_node = Node('qual_name', qual_name[:-1])
154 | this_ty = ty_from_demangler_node(this_node)
155 | if this_ty is None:
156 | return None
157 | arg_tys.append(Type.pointer(arch, this_ty))
158 | if is_ctor_dtor:
159 | name = '::'.join(str(n) for n in qual_name[:-1])
160 | if not name.startswith('std') and not view.get_type_by_name(name):
161 | log.log_info(f'Registering new type {name}')
162 | void_p_ty = Type.pointer(arch, Type.void())
163 | with StructureBuilder.builder(view, name) as s:
164 | s.append(Type.pointer(arch, void_p_ty), 'vtable')
165 |
166 | for arg_node in arg_nodes:
167 | arg_ty = ty_from_demangler_node(arg_node)
168 | if arg_ty is None:
169 | return None
170 | arg_tys.append(arg_ty)
171 |
172 | ty = Type.function(ret_ty, arg_tys, variable_arguments=var_arg)
173 | if arg_count_hint is not None:
174 | # toplevel invocation, so return whether we inferred a this argument
175 | return this_arg, ty, is_ctor_dtor
176 | else:
177 | return ty
178 | else:
179 | log.log_warn("Cannot convert demangled AST {} to a type"
180 | .format(repr(node)))
181 |
182 | reader = BinaryReader(view)
183 | def read(size):
184 | if size == 4:
185 | return reader.read32()
186 | elif size == 8:
187 | return reader.read64()
188 | else:
189 | assert False
190 |
191 | symbols = view.get_symbols(start, length)
192 | if task:
193 | task.set_total(len(symbols))
194 |
195 | mangled_re = re.compile('_?_Z')
196 |
197 | demangler_failures = 0
198 | for symbol in symbols:
199 | if task and not task.advance():
200 | break
201 |
202 | if not mangled_re.match(symbol.raw_name):
203 | continue
204 |
205 | is_data = (symbol.type == SymbolType.DataSymbol)
206 | is_code = (symbol.type in [SymbolType.FunctionSymbol,
207 | SymbolType.ImportedFunctionSymbol])
208 |
209 | raw_name, suffix = symbol.raw_name, ''
210 | if '@' in raw_name:
211 | match = re.match(r'^(.+?)(@.+)$', raw_name)
212 | raw_name, suffix = match.group(1), match.group(2)
213 |
214 | try:
215 | name_ast = parse_mangled(raw_name)
216 | if name_ast is None:
217 | log.log_warn("Demangler failed to recognize {}".format(raw_name))
218 | demangler_failures += 1
219 | except NotImplementedError as e:
220 | log.log_warn("Demangler feature missing on {}: {}".format(raw_name, str(e)))
221 | demangler_failures += 1
222 |
223 | if name_ast:
224 | if name_ast.kind == 'func':
225 | short_name = str(name_ast.name)
226 | else:
227 | short_name = str(name_ast)
228 | symbol = Symbol(symbol.type, symbol.address,
229 | short_name=short_name + suffix,
230 | full_name=str(name_ast) + suffix,
231 | raw_name=symbol.raw_name)
232 | else:
233 | symbol = Symbol(symbol.type, symbol.address,
234 | short_name=symbol.raw_name, full_name=None, raw_name=symbol.raw_name)
235 | view.define_auto_symbol(symbol)
236 |
237 | if name_ast is None:
238 | continue
239 |
240 | elif is_data and name_ast.kind == 'typeinfo_name':
241 | strings = view.get_strings(symbol.address, 1)
242 | if not strings:
243 | continue
244 |
245 | view.define_data_var(symbol.address, char_array_ty(length))
246 |
247 | elif is_data and name_ast.kind == 'typeinfo':
248 | reader.offset = symbol.address + arch.address_size * 2
249 |
250 | kind = None
251 |
252 | # heuristic: is this is an abi::__si_class_type_info?
253 | base_or_flags = read(arch.default_int_size)
254 | base_symbol = view.get_symbol_at(base_or_flags)
255 | if base_symbol and base_symbol.raw_name.startswith('_ZTI'):
256 | kind = 'si_class'
257 |
258 | view.define_data_var(symbol.address, type_info_ty(kind))
259 |
260 | elif is_data and name_ast.kind == 'vtable':
261 | vtable_addr = symbol.address
262 |
263 | reader.offset = vtable_addr + arch.address_size * 2
264 | while True:
265 | vfunc_count = 0
266 | check_next = True
267 | while True:
268 | vfunc_ptr_symbol = view.get_symbol_at(reader.offset)
269 | if vfunc_ptr_symbol and vfunc_ptr_symbol.raw_name.startswith('_Z'):
270 | # any C++ symbol definitely terminates the vtable
271 | check_next = False
272 | break
273 |
274 | # heuristic: existing function
275 | vfunc_addr = read(arch.address_size)
276 | if view.get_function_at(vfunc_addr):
277 | vfunc_count += 1
278 | continue
279 |
280 | # explicitly reject null pointers; in position-independent code
281 | # address zero can belong to the executable segment
282 | if vfunc_addr == 0:
283 | check_next = False
284 | break
285 |
286 | # heuristic: pointer to executable memory
287 | vfunc_segment = view.get_segment_at(vfunc_addr)
288 | if vfunc_addr != 0 and vfunc_segment and vfunc_segment.executable:
289 | view.add_function(vfunc_addr)
290 | vfunc_count += 1
291 |
292 | log.log_info('Discovered function at {:#x} via {}'
293 | .format(vfunc_addr, symbol.full_name or symbol.short_name))
294 | changed = True
295 | continue
296 |
297 | # we've fell off the end of the vtable
298 | break
299 |
300 | view.define_data_var(vtable_addr, vtable_ty(vfunc_count))
301 |
302 | if check_next:
303 | # heuristic: can another vtable follow this one? let's see if it has typeinfo,
304 | # since that should be always true for when we have a virtual base
305 | typeinfo_ptr = read(arch.address_size)
306 | typeinfo_ptr_symbol = view.get_symbol_at(typeinfo_ptr)
307 | if typeinfo_ptr_symbol and typeinfo_ptr_symbol.raw_name.startswith('_ZTI'):
308 | vtable_addr = reader.offset - 2 * arch.address_size
309 |
310 | # documentat it with a symbol
311 | secondary_symbol_name = '{}_secondary_{:x}'.format(symbol.short_name,
312 | vtable_addr - symbol.address)
313 | secondary_symbol = Symbol(SymbolType.DataSymbol, vtable_addr,
314 | short_name=secondary_symbol_name)
315 | view.define_auto_symbol(secondary_symbol)
316 | continue
317 |
318 | break
319 |
320 | elif is_code and name_ast.kind == 'func':
321 | func = view.get_function_at(symbol.address)
322 |
323 | ftype = getattr(func, 'type', None)
324 | if ftype is None:
325 | ftype = ftype.function_type
326 |
327 | demangled = ty_from_demangler_node(name_ast, arg_count_hint=len(ftype.parameters))
328 | if demangled is not None:
329 | this_arg, ty, dtor_ctor = demangled
330 | func.apply_auto_discovered_type(ty)
331 | if dtor_ctor and this_arg:
332 | start = func.address_ranges[0].start
333 | callers = list(view.get_callers(start))
334 | for caller in callers:
335 | try:
336 | il_call = next(ins for ins in view.hlil_instructions if ins.address == caller.address)
337 | except StopIteration:
338 | continue
339 |
340 | try:
341 | # If the calling function is a ctor/dtor, it's
342 | # probably running inherited constructors
343 | # so we shouldn't override the type
344 | ast = parse_mangled(il_call.function.source_function.name)
345 | except NotImplementedError as e:
346 | log.log_warn("Demangler feature missing on {}: {}".format(il_call.function.source_function.name, str(e)))
347 | demangler_failures += 1
348 |
349 | if ast and is_ctor_or_dtor(ast):
350 | continue
351 | if not hasattr(il_call, 'params') or not il_call.params:
352 | continue
353 | this = il_call.params[0]
354 | class_type = func.parameter_vars[0].type
355 | if hasattr(this, 'var'):
356 | this.var.type = class_type
357 |
358 | view.update_analysis()
359 |
360 | if demangler_failures:
361 | log.log_warn('{} demangler failures'.format(demangler_failures))
362 |
363 |
364 | class CxxAbiAnalysis(BackgroundTaskThread):
365 | _PROGRESS_TEXT = 'Analyzing Itanium C++ ABI'
366 |
367 | def __init__(self, view):
368 | BackgroundTaskThread.__init__(self,
369 | initial_progress_text=self._PROGRESS_TEXT + "...", can_cancel=True)
370 | self._view = view
371 | self._total = 0
372 | self._current = 0
373 |
374 | def set_total(self, total):
375 | self._total = total
376 |
377 | def advance(self):
378 | self._current += 1
379 | self.progress = "{} ({}/{})...".format(self._PROGRESS_TEXT, self._current, self._total)
380 | return not self.cancelled
381 |
382 | def run(self):
383 | try:
384 | state = self._view.begin_undo_actions()
385 | analyze_cxx_abi(self._view, task=self)
386 | self._view.commit_undo_actions(state)
387 | finally:
388 | self.finish()
389 |
390 |
391 | PluginCommand.register(
392 | 'Analyze Itanium C++ ABI...',
393 | 'Infer data types from C++ symbol names conforming to Itanium ABI.',
394 | lambda view: CxxAbiAnalysis(view).start()
395 | )
396 |
--------------------------------------------------------------------------------
/itanium_demangler/itanium_demangler/__init__.py:
--------------------------------------------------------------------------------
1 | # encoding:utf-8
2 | name = "itanium_demangler"
3 |
4 | """
5 | This module implements a C++ Itanium ABI demangler.
6 |
7 | The demangler provides a single entry point, `demangle`, and returns either `None`
8 | or an abstract syntax tree. All nodes have, at least, a `kind` field.
9 |
10 | Name nodes:
11 | * `name`: `node.value` (`str`) holds an unqualified name
12 | * `ctor`: `node.value` is one of `"complete"`, `"base"`, or `"allocating"`, specifying
13 | the type of constructor
14 | * `dtor`: `node.value` is one of `"deleting"`, `"complete"`, or `"base"`, specifying
15 | the type of destructor
16 | * `oper`: `node.value` (`str`) holds a symbolic operator name, without the keyword
17 | "operator"
18 | * `oper_cast`: `node.value` holds a type node
19 | * `tpl_args`: `node.value` (`tuple`) holds a sequence of type nodes
20 | * `qual_name`: `node.value` (`tuple`) holds a sequence of `name` and `tpl_args` nodes,
21 | possibly ending in a `ctor`, `dtor` or `operator` node
22 | * `abi`: `node.value` holds a name node, `node.qual` (`frozenset`) holds a set of ABI tags
23 |
24 | Type nodes:
25 | * `name` and `qual_name` specify a type by its name
26 | * `builtin`: `node.value` (`str`) specifies a builtin type by its name
27 | * `pointer`, `lvalue` and `rvalue`: `node.value` holds a pointee type node
28 | * `cv_qual`: `node.value` holds a type node, `node.qual` (`frozenset`) is any of
29 | `"const"`, `"volatile"`, or `"restrict"`
30 | * `literal`: `node.value` (`str`) holds the literal representation as-is,
31 | `node.ty` holds a type node specifying the type of the literal
32 | * `function`: `node.name` holds a name node specifying the function name,
33 | `node.ret_ty` holds a type node specifying the return type of a template function,
34 | if any, or `None`, ``node.arg_tys` (`tuple`) holds a sequence of type nodes
35 | specifying thefunction arguments
36 |
37 | Special nodes:
38 | * `vtable`, `vtt`, `typeinfo`, and `typeinfo_name`: `node.value` holds a type node
39 | specifying the type described by this RTTI data structure
40 | * `nonvirt_thunk`, `virt_thunk`: `node.value` holds a function node specifying
41 | the function to which the thunk dispatches
42 | """
43 |
44 | import re
45 | from collections import namedtuple
46 |
47 |
48 | class _Cursor:
49 | def __init__(self, raw, pos=0):
50 | self._raw = raw
51 | self._pos = pos
52 | self._substs = {}
53 |
54 | def at_end(self):
55 | return self._pos == len(self._raw)
56 |
57 | def accept(self, delim):
58 | if self._raw[self._pos:self._pos + len(delim)] == delim:
59 | self._pos += len(delim)
60 | return True
61 |
62 | def advance(self, amount):
63 | if self._pos + amount > len(self._raw):
64 | return None
65 | result = self._raw[self._pos:self._pos + amount]
66 | self._pos += amount
67 | return result
68 |
69 | def advance_until(self, delim):
70 | new_pos = self._raw.find(delim, self._pos)
71 | if new_pos == -1:
72 | return None
73 | result = self._raw[self._pos:new_pos]
74 | self._pos = new_pos + len(delim)
75 | return result
76 |
77 | def match(self, pattern):
78 | match = pattern.match(self._raw, self._pos)
79 | if match:
80 | self._pos = match.end(0)
81 | return match
82 |
83 | def add_subst(self, node):
84 | # print("S[{}] = {}".format(len(self._substs), str(node)))
85 | if not node in self._substs.values():
86 | self._substs[len(self._substs)] = node
87 |
88 | def resolve_subst(self, seq_id):
89 | if seq_id in self._substs:
90 | return self._substs[seq_id]
91 |
92 | def __repr__(self):
93 | return "_Cursor({}, {})".format(self._raw[:self._pos] + '→' + self._raw[self._pos:],
94 | self._pos)
95 |
96 |
97 | class Node(namedtuple('Node', 'kind value')):
98 | def __repr__(self):
99 | return "".format(self.kind, repr(self.value))
100 |
101 | def __str__(self):
102 | if self.kind in ('name', 'builtin'):
103 | return self.value
104 | elif self.kind == 'qual_name':
105 | result = ''
106 | for node in self.value:
107 | if result != '' and node.kind != 'tpl_args':
108 | result += '::'
109 | result += str(node)
110 | return result
111 | elif self.kind == 'tpl_args':
112 | return '<' + ', '.join(map(str, self.value)) + '>'
113 | elif self.kind == 'ctor':
114 | if self.value == 'complete':
115 | return '{ctor}'
116 | elif self.value == 'base':
117 | return '{base ctor}'
118 | elif self.value == 'allocating':
119 | return '{allocating ctor}'
120 | else:
121 | assert False
122 | elif self.kind == 'dtor':
123 | if self.value == 'deleting':
124 | return '{deleting dtor}'
125 | elif self.value == 'complete':
126 | return '{dtor}'
127 | elif self.value == 'base':
128 | return '{base dtor}'
129 | else:
130 | assert False
131 | elif self.kind == 'oper':
132 | if self.value.startswith('new') or self.value.startswith('delete'):
133 | return 'operator ' + self.value
134 | else:
135 | return 'operator' + self.value
136 | elif self.kind == 'oper_cast':
137 | return 'operator ' + str(self.value)
138 | elif self.kind == 'pointer':
139 | return self.value.left() + '*' + self.value.right()
140 | elif self.kind == 'lvalue':
141 | return self.value.left() + '&' + self.value.right()
142 | elif self.kind == 'rvalue':
143 | return self.value.left() + '&&' + self.value.right()
144 | elif self.kind == 'tpl_param':
145 | return '{T' + str(self.value) + '}'
146 | elif self.kind == 'subst':
147 | return '{S' + str(self.value) + '}'
148 | elif self.kind == 'vtable':
149 | return 'vtable for ' + str(self.value)
150 | elif self.kind == 'vtt':
151 | return 'vtt for ' + str(self.value)
152 | elif self.kind == 'typeinfo':
153 | return 'typeinfo for ' + str(self.value)
154 | elif self.kind == 'typeinfo_name':
155 | return 'typeinfo name for ' + str(self.value)
156 | elif self.kind == 'nonvirt_thunk':
157 | return 'non-virtual thunk for ' + str(self.value)
158 | elif self.kind == 'virt_thunk':
159 | return 'virtual thunk for ' + str(self.value)
160 | elif self.kind == 'guard_variable':
161 | return 'guard variable for ' + str(self.value)
162 | elif self.kind == 'transaction_clone':
163 | return 'transaction clone for ' + str(self.value)
164 | else:
165 | return repr(self)
166 |
167 | def left(self):
168 | if self.kind == "pointer":
169 | return self.value.left() + "*"
170 | elif self.kind == "lvalue":
171 | return self.value.left() + "&"
172 | elif self.kind == "rvalue":
173 | return self.value.left() + "&&"
174 | else:
175 | return str(self)
176 |
177 | def right(self):
178 | if self.kind in ("pointer", "lvalue", "rvalue"):
179 | return self.value.right()
180 | else:
181 | return ""
182 |
183 | def map(self, f):
184 | if self.kind in ('oper_cast', 'pointer', 'lvalue', 'rvalue', 'expand_arg_pack',
185 | 'vtable', 'vtt', 'typeinfo', 'typeinfo_name'):
186 | return self._replace(value=f(self.value))
187 | elif self.kind in ('qual_name', 'tpl_args', 'tpl_arg_pack'):
188 | return self._replace(value=tuple(map(f, self.value)))
189 | else:
190 | return self
191 |
192 |
193 | class QualNode(namedtuple('QualNode', 'kind value qual')):
194 | def __repr__(self):
195 | return "".format(self.kind, repr(self.qual), repr(self.value))
196 |
197 | def __str__(self):
198 | if self.kind == 'abi':
199 | return str(self.value) + "".join(['[abi:' + tag + ']' for tag in self.qual])
200 | elif self.kind == 'cv_qual':
201 | return ' '.join([str(self.value)] + list(self.qual))
202 | else:
203 | return repr(self)
204 |
205 | def left(self):
206 | return str(self)
207 |
208 | def right(self):
209 | return ""
210 |
211 | def map(self, f):
212 | if self.kind == 'cv_qual':
213 | return self._replace(value=f(self.value))
214 | else:
215 | return self
216 |
217 |
218 | class CastNode(namedtuple('CastNode', 'kind value ty')):
219 | def __repr__(self):
220 | return "".format(self.kind, repr(self.ty), repr(self.value))
221 |
222 | def __str__(self):
223 | if self.kind == 'literal':
224 | return '(' + str(self.ty) + ')' + str(self.value)
225 | else:
226 | return repr(self)
227 |
228 | def left(self):
229 | return str(self)
230 |
231 | def right(self):
232 | return ""
233 |
234 | def map(self, f):
235 | if self.kind == 'literal':
236 | return self._replace(ty=f(self.ty))
237 | else:
238 | return self
239 |
240 |
241 | class FuncNode(namedtuple('FuncNode', 'kind name arg_tys ret_ty')):
242 | def __repr__(self):
243 | return "".format(self.kind, repr(self.name),
244 | repr(self.arg_tys), repr(self.ret_ty))
245 |
246 | def __str__(self):
247 | if self.kind == 'func':
248 | result = ""
249 | if self.ret_ty is not None:
250 | result += str(self.ret_ty) + ' '
251 | if self.name is not None:
252 | result += str(self.name)
253 | if self.arg_tys == (Node('builtin', 'void'),):
254 | result += '()'
255 | else:
256 | result += '(' + ', '.join(map(str, self.arg_tys)) + ')'
257 | return result
258 | else:
259 | return repr(self)
260 |
261 | def left(self):
262 | if self.kind == 'func':
263 | result = ""
264 | if self.ret_ty is not None:
265 | result += str(self.ret_ty) + ' '
266 | result += "("
267 | if self.name is not None:
268 | result += str(self.name)
269 | return result
270 | else:
271 | return str(self)
272 |
273 | def right(self):
274 | if self.kind == 'func':
275 | result = ")"
276 | if self.arg_tys == (Node('builtin', 'void'),):
277 | result += '()'
278 | else:
279 | result += '(' + ', '.join(map(str, self.arg_tys)) + ')'
280 | return result
281 | else:
282 | return ""
283 |
284 | def map(self, f):
285 | if self.kind == 'func':
286 | return self._replace(name=f(self.name) if self.name else None,
287 | arg_tys=tuple(map(f, self.arg_tys)),
288 | ret_ty=f(self.ret_ty) if self.ret_ty else None)
289 | else:
290 | return self
291 |
292 |
293 | class ArrayNode(namedtuple('ArrayNode', 'kind dimension ty')):
294 | def __repr__(self):
295 | return "".format(self.kind, repr(self.dimension), repr(self.ty))
296 |
297 | def __str__(self):
298 | if self.kind == 'array':
299 | result = ""
300 | result += str(self.ty)
301 | result += "[" + str(self.dimension) + "]"
302 | return result
303 | else:
304 | return repr(self)
305 |
306 | def left(self):
307 | if self.kind == 'array':
308 | result = str(self.ty) + "("
309 | return result
310 | else:
311 | return str(self)
312 |
313 | def right(self):
314 | if self.kind == 'array':
315 | result = ")[" + str(self.dimension) + "]"
316 | return result
317 | else:
318 | return ""
319 |
320 | def map(self, f):
321 | if self.kind == 'array':
322 | return self._replace(dimension=f(self.dimension) if self.dimension else None,
323 | ty=f(self.ty) if self.ty else None)
324 | else:
325 | return self
326 |
327 |
328 | class MemberNode(namedtuple('MemberNode', 'kind cls_ty member_ty')):
329 | def __repr__(self):
330 | return "".format(self.kind, repr(self.cls_ty), repr(self.member_ty))
331 |
332 | def __str__(self):
333 | if self.kind == 'data':
334 | result = str(self.member_ty) + " " + str(self.cls_ty) + "::*"
335 | return result
336 | elif self.kind == 'method':
337 | result = self.member_ty.left() + str(self.cls_ty) + "::*" + self.member_ty.right()
338 | return result
339 | else:
340 | return repr(self)
341 |
342 | def left(self):
343 | if self.kind == 'method':
344 | return self.member_ty.left() + str(self.cls_ty) + "::*"
345 | else:
346 | return str(self)
347 |
348 | def right(self):
349 | if self.kind == 'method':
350 | return self.member_ty.right()
351 | else:
352 | return ""
353 |
354 | def map(self, f):
355 | if self.kind in ('data', 'func'):
356 | return self._replace(cls_ty=f(self.cls_ty) if self.cls_ty else None,
357 | member_ty=f(self.member_ty) if self.member_ty else None)
358 | else:
359 | return self
360 |
361 |
362 | _ctor_dtor_map = {
363 | 'C1': 'complete',
364 | 'C2': 'base',
365 | 'C3': 'allocating',
366 | 'D0': 'deleting',
367 | 'D1': 'complete',
368 | 'D2': 'base'
369 | }
370 |
371 | _std_names = {
372 | 'St': [Node('name', 'std')],
373 | 'Sa': [Node('name', 'std'), Node('name', 'allocator')],
374 | 'Sb': [Node('name', 'std'), Node('name', 'basic_string')],
375 | 'Ss': [Node('name', 'std'), Node('name', 'string')],
376 | 'Si': [Node('name', 'std'), Node('name', 'istream')],
377 | 'So': [Node('name', 'std'), Node('name', 'ostream')],
378 | 'Sd': [Node('name', 'std'), Node('name', 'iostream')],
379 | }
380 |
381 | _operators = {
382 | 'nw': 'new',
383 | 'na': 'new[]',
384 | 'dl': 'delete',
385 | 'da': 'delete[]',
386 | 'ps': '+', # (unary)
387 | 'ng': '-', # (unary)
388 | 'ad': '&', # (unary)
389 | 'de': '*', # (unary)
390 | 'co': '~',
391 | 'pl': '+',
392 | 'mi': '-',
393 | 'ml': '*',
394 | 'dv': '/',
395 | 'rm': '%',
396 | 'an': '&',
397 | 'or': '|',
398 | 'eo': '^',
399 | 'aS': '=',
400 | 'pL': '+=',
401 | 'mI': '-=',
402 | 'mL': '*=',
403 | 'dV': '/=',
404 | 'rM': '%=',
405 | 'aN': '&=',
406 | 'oR': '|=',
407 | 'eO': '^=',
408 | 'ls': '<<',
409 | 'rs': '>>',
410 | 'lS': '<<=',
411 | 'rS': '>>=',
412 | 'eq': '==',
413 | 'ne': '!=',
414 | 'lt': '<',
415 | 'gt': '>',
416 | 'le': '<=',
417 | 'ge': '>=',
418 | 'nt': '!',
419 | 'aa': '&&',
420 | 'oo': '||',
421 | 'pp': '++', # (postfix in context)
422 | 'mm': '--', # (postfix in context)
423 | 'cm': ',',
424 | 'pm': '->*',
425 | 'pt': '->',
426 | 'cl': '()',
427 | 'ix': '[]',
428 | 'qu': '?',
429 | }
430 |
431 | _builtin_types = {
432 | 'v': Node('builtin', 'void'),
433 | 'w': Node('builtin', 'wchar_t'),
434 | 'b': Node('builtin', 'bool'),
435 | 'c': Node('builtin', 'char'),
436 | 'a': Node('builtin', 'signed char'),
437 | 'h': Node('builtin', 'unsigned char'),
438 | 's': Node('builtin', 'short'),
439 | 't': Node('builtin', 'unsigned short'),
440 | 'i': Node('builtin', 'int'),
441 | 'j': Node('builtin', 'unsigned int'),
442 | 'l': Node('builtin', 'long'),
443 | 'm': Node('builtin', 'unsigned long'),
444 | 'x': Node('builtin', 'long long'),
445 | 'y': Node('builtin', 'unsigned long long'),
446 | 'n': Node('builtin', '__int128'),
447 | 'o': Node('builtin', 'unsigned __int128'),
448 | 'f': Node('builtin', 'float'),
449 | 'd': Node('builtin', 'double'),
450 | 'e': Node('builtin', '__float80'),
451 | 'g': Node('builtin', '__float128'),
452 | 'z': Node('builtin', '...'),
453 | 'Dd': Node('builtin', '_Decimal64'),
454 | 'De': Node('builtin', '_Decimal128'),
455 | 'Df': Node('builtin', '_Decimal32'),
456 | 'Dh': Node('builtin', '_Float16'),
457 | 'Di': Node('builtin', 'char32_t'),
458 | 'Ds': Node('builtin', 'char16_t'),
459 | 'Da': Node('builtin', 'auto'),
460 | 'Dn': Node('qual_name', (Node('name', 'std'), Node('builtin', 'nullptr_t')))
461 | }
462 |
463 |
464 | def _handle_cv(qualifiers, node):
465 | qualifier_set = set()
466 | if 'r' in qualifiers:
467 | qualifier_set.add('restrict')
468 | if 'V' in qualifiers:
469 | qualifier_set.add('volatile')
470 | if 'K' in qualifiers:
471 | qualifier_set.add('const')
472 | if qualifier_set:
473 | return QualNode('cv_qual', node, frozenset(qualifier_set))
474 | return node
475 |
476 | def _handle_indirect(qualifier, node):
477 | if qualifier == 'P':
478 | return Node('pointer', node)
479 | elif qualifier == 'R':
480 | return Node('lvalue', node)
481 | elif qualifier == 'O':
482 | return Node('rvalue', node)
483 | return node
484 |
485 |
486 | _NUMBER_RE = re.compile(r"\d+")
487 |
488 | def _parse_number(cursor):
489 | match = cursor.match(_NUMBER_RE)
490 | if match is None:
491 | return None
492 | return int(match.group(0))
493 |
494 | def _parse_seq_id(cursor):
495 | seq_id = cursor.advance_until('_')
496 | if seq_id is None:
497 | return None
498 | if seq_id == '':
499 | return 0
500 | else:
501 | return 1 + int(seq_id, 36)
502 |
503 | def _parse_until_end(cursor, kind, fn):
504 | nodes = []
505 | while not cursor.accept('E'):
506 | node = fn(cursor)
507 | if node is None or cursor.at_end():
508 | return None
509 | nodes.append(node)
510 | return Node(kind, tuple(nodes))
511 |
512 |
513 | _SOURCE_NAME_RE = re.compile(r"\d+")
514 |
515 | def _parse_source_name(cursor):
516 | match = cursor.match(_SOURCE_NAME_RE)
517 | name_len = int(match.group(0))
518 | name = cursor.advance(name_len)
519 | if name is None:
520 | return None
521 | return name
522 |
523 |
524 | _NAME_RE = re.compile(r"""
525 | (?P (?= \d)) |
526 | (?P C[123]) |
527 | (?P D[012]) |
528 | (?P S[absiod]) |
529 | (?P nw|na|dl|da|ps|ng|ad|de|co|pl|mi|ml|dv|rm|an|or|
530 | eo|aS|pL|mI|mL|dV|rM|aN|oR|eO|ls|rs|lS|rS|eq|ne|
531 | lt|gt|le|ge|nt|aa|oo|pp|mm|cm|pm|pt|cl|ix|qu) |
532 | (?P cv) |
533 | (?P St) |
534 | (?P S) |
535 | (?P N (?P [rVK]*) (?P [RO]?)) |
536 | (?P T) |
537 | (?P I) |
538 | (?P L) |
539 | (?P Z) |
540 | (?P Ut) |
541 | (?P Ul)
542 | """, re.X)
543 |
544 | def _parse_name(cursor, is_nested=False):
545 | match = cursor.match(_NAME_RE)
546 | if match is None:
547 | return None
548 | elif match.group('source_name') is not None:
549 | name = _parse_source_name(cursor)
550 | if name is None:
551 | return None
552 | node = Node('name', name)
553 | elif match.group('ctor_name') is not None:
554 | node = Node('ctor', _ctor_dtor_map[match.group('ctor_name')])
555 | elif match.group('dtor_name') is not None:
556 | node = Node('dtor', _ctor_dtor_map[match.group('dtor_name')])
557 | elif match.group('std_name') is not None:
558 | node = Node('qual_name', _std_names[match.group('std_name')])
559 | elif match.group('operator_name') is not None:
560 | node = Node('oper', _operators[match.group('operator_name')])
561 | elif match.group('operator_cv') is not None:
562 | ty = _parse_type(cursor)
563 | if ty is None:
564 | return None
565 | node = Node('oper_cast', ty)
566 | elif match.group('std_prefix') is not None:
567 | name = _parse_name(cursor, is_nested=True)
568 | if name is None:
569 | return None
570 | if name.kind == 'qual_name':
571 | node = Node('qual_name', (Node('name', 'std'),) + name.value)
572 | else:
573 | node = Node('qual_name', (Node('name', 'std'), name))
574 | elif match.group('substitution') is not None:
575 | seq_id = _parse_seq_id(cursor)
576 | if seq_id is None:
577 | return None
578 | node = cursor.resolve_subst(seq_id)
579 | if node is None:
580 | return None
581 | elif match.group('nested_name') is not None:
582 | nodes = []
583 | while True:
584 | name = _parse_name(cursor, is_nested=True)
585 | if name is None or cursor.at_end():
586 | return None
587 | if name.kind == 'qual_name':
588 | nodes += name.value
589 | else:
590 | nodes.append(name)
591 | if cursor.accept('E'):
592 | break
593 | else:
594 | cursor.add_subst(Node('qual_name', tuple(nodes)))
595 | node = Node('qual_name', tuple(nodes))
596 | node = _handle_cv(match.group('cv_qual'), node)
597 | node = _handle_indirect(match.group('ref_qual'), node)
598 | elif match.group('template_param') is not None:
599 | seq_id = _parse_seq_id(cursor)
600 | if seq_id is None:
601 | return None
602 | node = Node('tpl_param', seq_id)
603 | cursor.add_subst(node)
604 | elif match.group('template_args') is not None:
605 | node = _parse_until_end(cursor, 'tpl_args', _parse_type)
606 | elif match.group('constant') is not None:
607 | # not in the ABI doc, but probably means `const`
608 | return _parse_name(cursor, is_nested)
609 | elif match.group('local_name') is not None:
610 | raise NotImplementedError("local names are not supported")
611 | elif match.group('unnamed_type') is not None:
612 | raise NotImplementedError("unnamed types are not supported")
613 | elif match.group('closure_type') is not None:
614 | raise NotImplementedError("closure (lambda) types are not supported")
615 | if node is None:
616 | return None
617 |
618 | abi_tags = []
619 | while cursor.accept('B'):
620 | abi_tags.append(_parse_source_name(cursor))
621 | if abi_tags:
622 | node = QualNode('abi', node, frozenset(abi_tags))
623 |
624 | if not is_nested and cursor.accept('I') and (
625 | node.kind in ('name', 'oper', 'oper_cast') or
626 | match.group('std_prefix') is not None or
627 | match.group('std_name') is not None or
628 | match.group('substitution') is not None):
629 | if node.kind in ('name', 'oper', 'oper_cast') or match.group('std_prefix') is not None:
630 | cursor.add_subst(node) # ::=
631 | templ_args = _parse_until_end(cursor, 'tpl_args', _parse_type)
632 | if templ_args is None:
633 | return None
634 | node = Node('qual_name', (node, templ_args))
635 | if ((match.group('std_prefix') is not None or
636 | match.group('std_name') is not None) and
637 | node.value[0].value[1].kind not in ('oper', 'oper_cast')):
638 | cursor.add_subst(node)
639 |
640 | return node
641 |
642 |
643 | _TYPE_RE = re.compile(r"""
644 | (?P v|w|b|c|a|h|s|t|i|j|l|m|x|y|n|o|f|d|e|g|z|
645 | Dd|De|Df|Dh|DF|Di|Ds|Da|Dc|Dn) |
646 | (?P [rVK]+) |
647 | (?P [PRO]) |
648 | (?P F) |
649 | (?P X) |
650 | (?P (?= L)) |
651 | (?P J) |
652 | (?P Dp) |
653 | (?P D[tT]) |
654 | (?P A) |
655 | (?P M)
656 | """, re.X)
657 |
658 | def _parse_type(cursor):
659 | match = cursor.match(_TYPE_RE)
660 | if match is None:
661 | node = _parse_name(cursor)
662 | cursor.add_subst(node)
663 | elif match.group('builtin_type') is not None:
664 | node = _builtin_types[match.group('builtin_type')]
665 | elif match.group('qualified_type') is not None:
666 | ty = _parse_type(cursor)
667 | if ty is None:
668 | return None
669 | node = _handle_cv(match.group('qualified_type'), ty)
670 | cursor.add_subst(node)
671 | elif match.group('indirect_type') is not None:
672 | ty = _parse_type(cursor)
673 | if ty is None:
674 | return None
675 | node = _handle_indirect(match.group('indirect_type'), ty)
676 | cursor.add_subst(node)
677 | elif match.group('function_type') is not None:
678 | ret_ty = _parse_type(cursor)
679 | if ret_ty is None:
680 | return None
681 | arg_tys = []
682 | while not cursor.accept('E'):
683 | arg_ty = _parse_type(cursor)
684 | if arg_ty is None:
685 | return None
686 | arg_tys.append(arg_ty)
687 | node = FuncNode('func', None, tuple(arg_tys), ret_ty)
688 | cursor.add_subst(node)
689 | elif match.group('expression') is not None:
690 | raise NotImplementedError("expressions are not supported")
691 | elif match.group('expr_primary') is not None:
692 | node = _parse_expr_primary(cursor)
693 | elif match.group('template_arg_pack') is not None:
694 | node = _parse_until_end(cursor, 'tpl_arg_pack', _parse_type)
695 | elif match.group('arg_pack_expansion') is not None:
696 | node = _parse_type(cursor)
697 | node = Node('expand_arg_pack', node)
698 | elif match.group('decltype') is not None:
699 | raise NotImplementedError("decltype is not supported")
700 | elif match.group('array_type') is not None:
701 | dimension = _parse_number(cursor)
702 | if dimension is None:
703 | return None
704 | else:
705 | dimension = CastNode('literal', dimension, Node('builtin', 'int'))
706 | if not cursor.accept('_'):
707 | return None
708 | type = _parse_type(cursor)
709 | node = ArrayNode('array', dimension, type)
710 | cursor.add_subst(node)
711 | elif match.group('member_type') is not None:
712 | cls_ty = _parse_type(cursor)
713 | member_ty = _parse_type(cursor)
714 | if member_ty is not None and member_ty.kind == 'func':
715 | kind = "method"
716 | else:
717 | kind = "data"
718 | node = MemberNode(kind, cls_ty, member_ty)
719 | else:
720 | return None
721 | return node
722 |
723 |
724 | _EXPR_PRIMARY_RE = re.compile(r"""
725 | (?P L (?= _Z)) |
726 | (?P L)
727 | """, re.X)
728 |
729 | def _parse_expr_primary(cursor):
730 | match = cursor.match(_EXPR_PRIMARY_RE)
731 | if match is None:
732 | return None
733 | elif match.group('mangled_name') is not None:
734 | mangled_name = cursor.advance_until('E')
735 | return _parse_mangled_name(_Cursor(mangled_name))
736 | elif match.group('literal') is not None:
737 | ty = _parse_type(cursor)
738 | if ty is None:
739 | return None
740 | value = cursor.advance_until('E')
741 | if value is None:
742 | return None
743 | return CastNode('literal', value, ty)
744 |
745 |
746 | def _expand_template_args(func):
747 | if func.name.kind == 'qual_name':
748 | name_suffix = func.name.value[-1]
749 | if name_suffix.kind == 'tpl_args':
750 | tpl_args = name_suffix.value
751 | def mapper(node):
752 | if node is None:
753 | return None
754 | elif node.kind == 'tpl_param' and node.value < len(tpl_args):
755 | return tpl_args[node.value]
756 | return node.map(mapper)
757 | return mapper(func)
758 | return func
759 |
760 | def _parse_encoding(cursor):
761 | name = _parse_name(cursor)
762 | if name is None:
763 | return None
764 | if cursor.at_end():
765 | return name
766 |
767 | if name.kind == 'qual_name' \
768 | and name.value[-1].kind == 'tpl_args' \
769 | and name.value[-2].kind not in ('ctor', 'dtor', 'oper_cast'):
770 | ret_ty = _parse_type(cursor)
771 | if ret_ty is None:
772 | return None
773 | else:
774 | ret_ty = None
775 |
776 | arg_tys = []
777 | while not cursor.at_end():
778 | arg_ty = _parse_type(cursor)
779 | if arg_ty is None:
780 | return None
781 | arg_tys.append(arg_ty)
782 |
783 | if arg_tys:
784 | func = FuncNode('func', name, tuple(arg_tys), ret_ty)
785 | return _expand_template_args(func)
786 | else:
787 | return name
788 |
789 |
790 | _SPECIAL_RE = re.compile(r"""
791 | (?P T (?P [VTIS])) |
792 | (?P Th (?P n? \d+) _) |
793 | (?P Tv (?P n? \d+) _ (?P n? \d+) _) |
794 | (?P Tc) |
795 | (?P GV) |
796 | (?P GR) |
797 | (?P GTt)
798 | """, re.X)
799 |
800 | def _parse_special(cursor):
801 | match = cursor.match(_SPECIAL_RE)
802 | if match is None:
803 | return None
804 | elif match.group('rtti') is not None:
805 | name = _parse_type(cursor)
806 | if name is None:
807 | return None
808 | if match.group('kind') == 'V':
809 | return Node('vtable', name)
810 | elif match.group('kind') == 'T':
811 | return Node('vtt', name)
812 | elif match.group('kind') == 'I':
813 | return Node('typeinfo', name)
814 | elif match.group('kind') == 'S':
815 | return Node('typeinfo_name', name)
816 | elif match.group('nonvirtual_thunk') is not None:
817 | func = _parse_encoding(cursor)
818 | if func is None:
819 | return None
820 | return Node('nonvirt_thunk', func)
821 | elif match.group('virtual_thunk') is not None:
822 | func = _parse_encoding(cursor)
823 | if func is None:
824 | return None
825 | return Node('virt_thunk', func)
826 | elif match.group('covariant_thunk') is not None:
827 | raise NotImplementedError("covariant thunks are not supported")
828 | elif match.group('guard_variable'):
829 | name = _parse_type(cursor)
830 | if name is None:
831 | return None
832 | return Node('guard_variable', name)
833 | elif match.group('extended_temporary'):
834 | raise NotImplementedError("extended temporaries are not supported")
835 | elif match.group('transaction_clone'):
836 | func = _parse_encoding(cursor)
837 | if func is None:
838 | return None
839 | return Node('transaction_clone', func)
840 |
841 |
842 | _MANGLED_NAME_RE = re.compile(r"""
843 | (?P _?_Z)
844 | """, re.X)
845 |
846 | def _parse_mangled_name(cursor):
847 | match = cursor.match(_MANGLED_NAME_RE)
848 | if match is None:
849 | return None
850 | else:
851 | cursor_position = cursor._pos
852 | special = _parse_special(cursor)
853 | if special is not None:
854 | return special
855 |
856 | # Return the cursor position to it's previous state before continuing
857 | cursor._pos = cursor_position
858 | return _parse_encoding(cursor)
859 |
860 |
861 | def _expand_arg_packs(ast):
862 | def mapper(node):
863 | if node is None:
864 | return None
865 | elif node.kind == 'tpl_args':
866 | exp_args = []
867 | for arg in node.value:
868 | if arg.kind in ['tpl_arg_pack', 'tpl_args']:
869 | exp_args += arg.value
870 | else:
871 | exp_args.append(arg)
872 | return Node('tpl_args', tuple(map(mapper, exp_args)))
873 | elif node.kind == 'func':
874 | node = node.map(mapper)
875 | exp_arg_tys = []
876 | for arg_ty in node.arg_tys:
877 | if arg_ty.kind == 'expand_arg_pack' and \
878 | arg_ty.value is not None and \
879 | arg_ty.value.kind == 'rvalue' and \
880 | arg_ty.value.value.kind in ['tpl_arg_pack', 'tpl_args']:
881 | exp_arg_tys += arg_ty.value.value.value
882 | else:
883 | exp_arg_tys.append(arg_ty)
884 | return node._replace(arg_tys=tuple(exp_arg_tys))
885 | else:
886 | return node.map(mapper)
887 | return mapper(ast)
888 |
889 | def parse(raw):
890 | ast = _parse_mangled_name(_Cursor(raw))
891 | if ast is not None:
892 | ast = _expand_arg_packs(ast)
893 | return ast
894 |
895 | def is_ctor_or_dtor(ast) -> bool:
896 | if ast.kind == 'func':
897 | return is_ctor_or_dtor(ast.name)
898 | elif ast.kind == 'qual_name':
899 | kind = ast.value[-1].kind
900 | return kind == 'ctor' or kind == 'dtor'
901 | else:
902 | return False
903 |
904 | # ================================================================================================
905 |
906 |
907 | if __name__ == '__main__':
908 | import sys
909 | if len(sys.argv) == 1:
910 | while True:
911 | name = sys.stdin.readline()
912 | if not name:
913 | break
914 | print(parse(name.strip()))
915 | else:
916 | for name in sys.argv[1:]:
917 | ast = parse(name)
918 | print(repr(ast))
919 | print(ast)
920 |
--------------------------------------------------------------------------------