├── .github ├── dependabot.yml └── workflows │ └── release.yml ├── .gitignore ├── LICENSE ├── README.md ├── pyproject.toml └── src └── binexport ├── __init__.py ├── __main__.py ├── basic_block.py ├── binexport2.proto ├── binexport2_pb2.py ├── expression.py ├── function.py ├── instruction.py ├── operand.py ├── program.py ├── types.py └── utils.py /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | # To get started with Dependabot version updates, you'll need to specify which 2 | # package ecosystems to update and where the package manifests are located. 3 | # Please see the documentation for all configuration options: 4 | # https://docs.github.com/code-security/dependabot/dependabot-version-updates/configuration-options-for-the-dependabot.yml-file 5 | 6 | version: 2 7 | updates: 8 | - package-ecosystem: "pip" # See documentation for possible values 9 | directory: "/" # Location of package manifests 10 | groups: 11 | python: 12 | patterns: 13 | - "*" 14 | schedule: 15 | interval: "daily" 16 | 17 | - package-ecosystem: "github-actions" 18 | directory: "/" 19 | groups: 20 | python: 21 | patterns: 22 | - "*" 23 | schedule: 24 | interval: "daily" 25 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | name: Release 2 | 3 | on: 4 | release: 5 | types: [published] 6 | 7 | jobs: 8 | build_wheel_and_sdist: 9 | name: Build wheel and tar.gz 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - uses: actions/checkout@v4 14 | - uses: actions/setup-python@v5 15 | with: 16 | python-version: '3.10' 17 | - name: Install dependencies 18 | run: pip install build 19 | - name: Build wheel and sdist 20 | run: python -m build 21 | - uses: actions/upload-artifact@v4 22 | with: 23 | name: artifact 24 | path: | 25 | ./dist/python_binexport*.whl 26 | ./dist/*.tar.gz 27 | 28 | upload_pypi: 29 | needs: build_wheel_and_sdist 30 | runs-on: ubuntu-latest 31 | steps: 32 | - uses: actions/download-artifact@v4 33 | with: 34 | # unpacks default artifact into dist/ 35 | # if `name: artifact` is omitted, the action will create extra parent dir 36 | name: artifact 37 | path: dist 38 | 39 | - name: Publish a Python distribution to PyPI 40 | uses: pypa/gh-action-pypi-publish@v1.12.4 41 | with: 42 | password: ${{ secrets.PYPI_DEPLOY_TOKEN }} 43 | 44 | - name: Upload Python packages for release notes 45 | uses: softprops/action-gh-release@v2.3.0 46 | with: 47 | files: | 48 | dist/* 49 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Python template 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | build/ 8 | dist/ 9 | *.egg-info/ 10 | /venv/ 11 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Apache License 2 | Version 2.0, January 2004 3 | http://www.apache.org/licenses/ 4 | 5 | TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION 6 | 7 | 1. Definitions. 8 | 9 | "License" shall mean the terms and conditions for use, reproduction, 10 | and distribution as defined by Sections 1 through 9 of this document. 11 | 12 | "Licensor" shall mean the copyright owner or entity authorized by 13 | the copyright owner that is granting the License. 14 | 15 | "Legal Entity" shall mean the union of the acting entity and all 16 | other entities that control, are controlled by, or are under common 17 | control with that entity. For the purposes of this definition, 18 | "control" means (i) the power, direct or indirect, to cause the 19 | direction or management of such entity, whether by contract or 20 | otherwise, or (ii) ownership of fifty percent (50%) or more of the 21 | outstanding shares, or (iii) beneficial ownership of such entity. 22 | 23 | "You" (or "Your") shall mean an individual or Legal Entity 24 | exercising permissions granted by this License. 25 | 26 | "Source" form shall mean the preferred form for making modifications, 27 | including but not limited to software source code, documentation 28 | source, and configuration files. 29 | 30 | "Object" form shall mean any form resulting from mechanical 31 | transformation or translation of a Source form, including but 32 | not limited to compiled object code, generated documentation, 33 | and conversions to other media types. 34 | 35 | "Work" shall mean the work of authorship, whether in Source or 36 | Object form, made available under the License, as indicated by a 37 | copyright notice that is included in or attached to the work 38 | (an example is provided in the Appendix below). 39 | 40 | "Derivative Works" shall mean any work, whether in Source or Object 41 | form, that is based on (or derived from) the Work and for which the 42 | editorial revisions, annotations, elaborations, or other modifications 43 | represent, as a whole, an original work of authorship. For the purposes 44 | of this License, Derivative Works shall not include works that remain 45 | separable from, or merely link (or bind by name) to the interfaces of, 46 | the Work and Derivative Works thereof. 47 | 48 | "Contribution" shall mean any work of authorship, including 49 | the original version of the Work and any modifications or additions 50 | to that Work or Derivative Works thereof, that is intentionally 51 | submitted to Licensor for inclusion in the Work by the copyright owner 52 | or by an individual or Legal Entity authorized to submit on behalf of 53 | the copyright owner. For the purposes of this definition, "submitted" 54 | means any form of electronic, verbal, or written communication sent 55 | to the Licensor or its representatives, including but not limited to 56 | communication on electronic mailing lists, source code control systems, 57 | and issue tracking systems that are managed by, or on behalf of, the 58 | Licensor for the purpose of discussing and improving the Work, but 59 | excluding communication that is conspicuously marked or otherwise 60 | designated in writing by the copyright owner as "Not a Contribution." 61 | 62 | "Contributor" shall mean Licensor and any individual or Legal Entity 63 | on behalf of whom a Contribution has been received by Licensor and 64 | subsequently incorporated within the Work. 65 | 66 | 2. Grant of Copyright License. Subject to the terms and conditions of 67 | this License, each Contributor hereby grants to You a perpetual, 68 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 69 | copyright license to reproduce, prepare Derivative Works of, 70 | publicly display, publicly perform, sublicense, and distribute the 71 | Work and such Derivative Works in Source or Object form. 72 | 73 | 3. Grant of Patent License. Subject to the terms and conditions of 74 | this License, each Contributor hereby grants to You a perpetual, 75 | worldwide, non-exclusive, no-charge, royalty-free, irrevocable 76 | (except as stated in this section) patent license to make, have made, 77 | use, offer to sell, sell, import, and otherwise transfer the Work, 78 | where such license applies only to those patent claims licensable 79 | by such Contributor that are necessarily infringed by their 80 | Contribution(s) alone or by combination of their Contribution(s) 81 | with the Work to which such Contribution(s) was submitted. If You 82 | institute patent litigation against any entity (including a 83 | cross-claim or counterclaim in a lawsuit) alleging that the Work 84 | or a Contribution incorporated within the Work constitutes direct 85 | or contributory patent infringement, then any patent licenses 86 | granted to You under this License for that Work shall terminate 87 | as of the date such litigation is filed. 88 | 89 | 4. Redistribution. You may reproduce and distribute copies of the 90 | Work or Derivative Works thereof in any medium, with or without 91 | modifications, and in Source or Object form, provided that You 92 | meet the following conditions: 93 | 94 | (a) You must give any other recipients of the Work or 95 | Derivative Works a copy of this License; and 96 | 97 | (b) You must cause any modified files to carry prominent notices 98 | stating that You changed the files; and 99 | 100 | (c) You must retain, in the Source form of any Derivative Works 101 | that You distribute, all copyright, patent, trademark, and 102 | attribution notices from the Source form of the Work, 103 | excluding those notices that do not pertain to any part of 104 | the Derivative Works; and 105 | 106 | (d) If the Work includes a "NOTICE" text file as part of its 107 | distribution, then any Derivative Works that You distribute must 108 | include a readable copy of the attribution notices contained 109 | within such NOTICE file, excluding those notices that do not 110 | pertain to any part of the Derivative Works, in at least one 111 | of the following places: within a NOTICE text file distributed 112 | as part of the Derivative Works; within the Source form or 113 | documentation, if provided along with the Derivative Works; or, 114 | within a display generated by the Derivative Works, if and 115 | wherever such third-party notices normally appear. The contents 116 | of the NOTICE file are for informational purposes only and 117 | do not modify the License. You may add Your own attribution 118 | notices within Derivative Works that You distribute, alongside 119 | or as an addendum to the NOTICE text from the Work, provided 120 | that such additional attribution notices cannot be construed 121 | as modifying the License. 122 | 123 | You may add Your own copyright statement to Your modifications and 124 | may provide additional or different license terms and conditions 125 | for use, reproduction, or distribution of Your modifications, or 126 | for any such Derivative Works as a whole, provided Your use, 127 | reproduction, and distribution of the Work otherwise complies with 128 | the conditions stated in this License. 129 | 130 | 5. Submission of Contributions. Unless You explicitly state otherwise, 131 | any Contribution intentionally submitted for inclusion in the Work 132 | by You to the Licensor shall be under the terms and conditions of 133 | this License, without any additional terms or conditions. 134 | Notwithstanding the above, nothing herein shall supersede or modify 135 | the terms of any separate license agreement you may have executed 136 | with Licensor regarding such Contributions. 137 | 138 | 6. Trademarks. This License does not grant permission to use the trade 139 | names, trademarks, service marks, or product names of the Licensor, 140 | except as required for reasonable and customary use in describing the 141 | origin of the Work and reproducing the content of the NOTICE file. 142 | 143 | 7. Disclaimer of Warranty. Unless required by applicable law or 144 | agreed to in writing, Licensor provides the Work (and each 145 | Contributor provides its Contributions) on an "AS IS" BASIS, 146 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or 147 | implied, including, without limitation, any warranties or conditions 148 | of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A 149 | PARTICULAR PURPOSE. You are solely responsible for determining the 150 | appropriateness of using or redistributing the Work and assume any 151 | risks associated with Your exercise of permissions under this License. 152 | 153 | 8. Limitation of Liability. In no event and under no legal theory, 154 | whether in tort (including negligence), contract, or otherwise, 155 | unless required by applicable law (such as deliberate and grossly 156 | negligent acts) or agreed to in writing, shall any Contributor be 157 | liable to You for damages, including any direct, indirect, special, 158 | incidental, or consequential damages of any character arising as a 159 | result of this License or out of the use or inability to use the 160 | Work (including but not limited to damages for loss of goodwill, 161 | work stoppage, computer failure or malfunction, or any and all 162 | other commercial damages or losses), even if such Contributor 163 | has been advised of the possibility of such damages. 164 | 165 | 9. Accepting Warranty or Additional Liability. While redistributing 166 | the Work or Derivative Works thereof, You may choose to offer, 167 | and charge a fee for, acceptance of support, warranty, indemnity, 168 | or other liability obligations and/or rights consistent with this 169 | License. However, in accepting such obligations, You may act only 170 | on Your own behalf and on Your sole responsibility, not on behalf 171 | of any other Contributor, and only if You agree to indemnify, 172 | defend, and hold each Contributor harmless for any liability 173 | incurred by, or claims asserted against, such Contributor by reason 174 | of your accepting any such warranty or additional liability. 175 | 176 | END OF TERMS AND CONDITIONS 177 | 178 | APPENDIX: How to apply the Apache License to your work. 179 | 180 | To apply the Apache License to your work, attach the following 181 | boilerplate notice, with the fields enclosed by brackets "[]" 182 | replaced with your own identifying information. (Don't include 183 | the brackets!) The text should be enclosed in the appropriate 184 | comment syntax for the file format. We also recommend that a 185 | file or class name and description of purpose be included on the 186 | same "printed page" as the copyright notice for easier 187 | identification within third-party archives. 188 | 189 | Copyright [yyyy] [name of copyright owner] 190 | 191 | Licensed under the Apache License, Version 2.0 (the "License"); 192 | you may not use this file except in compliance with the License. 193 | You may obtain a copy of the License at 194 | 195 | http://www.apache.org/licenses/LICENSE-2.0 196 | 197 | Unless required by applicable law or agreed to in writing, software 198 | distributed under the License is distributed on an "AS IS" BASIS, 199 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 200 | See the License for the specific language governing permissions and 201 | limitations under the License. 202 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Python-Binexport 2 | 3 | ``python-binexport`` is a python module aiming to give a friendly interface to load 4 | and manipulate binexport files. 5 | 6 | ## What is binexport ? 7 | 8 | Binexport is a ``protobuf`` format used by Bindiff to extract IDA database and 9 | to process them outside. It gives a very optimizated (in size) representation 10 | of the program. 11 | 12 | ## Dependencies 13 | 14 | Python-binexport can load any .BinExport files generated from the supported disassemblers 15 | IDA, Ghidra and Binary Ninja. 16 | 17 | However to perform the export with ``binexporter`` or from the API ``ProgramBinexport.from_binary_file()`` 18 | the IDA plugin must be installed as it is the only supported at the moment. For that it has to be [installed first from the github page](https://github.com/google/binexport). 19 | To use the feature python-binexport requires IDA >=7.2 (as it calls the ``BinExportBinary`` IDC function). 20 | 21 | > [!WARNING] 22 | > If you export files from python-binexport make sure the IDA Pro binexport plugin is properly installed 23 | > and works when running it manually before trying to use it from the python library (it can hang if not properly installed). 24 | 25 | 26 | > [!NOTE] 27 | > The possibility to export files using Ghidra, or Binary Ninja from python-binexport 28 | > might be supported in the future. 29 | 30 | 31 | ## Installation 32 | 33 | pip install python-binexport 34 | 35 | 36 | 37 | ## Python module usage 38 | 39 | The main intended usage of ``python-binexport`` is as a python module. 40 | The main entry point is the class ``ProgramBinExport`` which triggers the 41 | loading of the whole file. Here is a snippet to iterate on every expression 42 | of every instruction in the program: 43 | 44 | ```python 45 | from binexport import ProgramBinExport 46 | 47 | p = ProgramBinExport("myprogram.BinExport") 48 | for fun_addr, fun in p.items(): 49 | with fun: # Preload all the basic blocks 50 | for bb_addr, bb in fun.items(): 51 | for inst_addr, inst in bb.instructions.items(): 52 | for operand in inst.operands: 53 | for exp in operand.expressions: 54 | pass # Do whatever at such deep level 55 | ``` 56 | 57 | Obviously ``ProgramBinExport``, ``FunctionBinExport``, ``InstructionBinExport`` and ``OperandBinExport`` 58 | all provides various attributes and method to get their type, and multiple other infos. 59 | 60 | > If the module ``idascript`` is installed you can directly generate a BinExport 61 | > file using the ``Program.from_binary_file`` static method. 62 | 63 | ## Command line usage 64 | 65 | The executable script ``binexporter`` provides a very basic utility 66 | to export a BinExport file straight from the command line *(without 67 | having to launch IDA etc..)*. This is basically a wrapper for ``Program.from_binary_file``. 68 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["setuptools"] 3 | build-backend = "setuptools.build_meta" 4 | 5 | [project] 6 | name = "python-binexport" 7 | version = "0.4.0" 8 | description = "Python wrapper to manipulate binexport files (protobuf)" 9 | readme = { file = "README.md", content-type = "text/markdown" } 10 | authors = [{ name = "Quarkslab", email = "diffing@quarkslab.com" }] 11 | license = {text = "AGPL-3.0"} 12 | requires-python = ">=3.9" 13 | dependencies = [ 14 | "python-magic; os_name!='nt'", 15 | "python-magic-bin; os_name=='nt'", 16 | "click", 17 | "protobuf", 18 | "networkx", 19 | "enum_tools", 20 | "idascript", 21 | ] 22 | classifiers = [ 23 | 'Topic :: Security', 24 | 'Environment :: Console', 25 | 'Operating System :: OS Independent', 26 | ] 27 | 28 | [project.urls] 29 | Homepage = "https://github.com/quarkslab/python-binexport" 30 | Repository = "https://github.com/quarkslab/python-binexport" 31 | Documentation = "https://quarkslab.github.io/diffing-portal/exporter/binexport.html#python-binexport" 32 | "Bug Tracker" = "https://github.com/quarkslab/python-binexport/issues" 33 | 34 | [project.scripts] 35 | binexporter = 'binexport.__main__:main' 36 | 37 | [tool.black] 38 | line-length = 100 39 | target-version = ['py310'] 40 | -------------------------------------------------------------------------------- /src/binexport/__init__.py: -------------------------------------------------------------------------------- 1 | from .program import ProgramBinExport 2 | from .function import FunctionBinExport 3 | from .basic_block import BasicBlockBinExport 4 | from .instruction import InstructionBinExport 5 | from .operand import OperandBinExport 6 | from .expression import ExpressionBinExport 7 | from .types import DisassemblerBackend 8 | -------------------------------------------------------------------------------- /src/binexport/__main__.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # coding: utf-8 3 | 4 | import logging 5 | import traceback 6 | from pathlib import Path 7 | from typing import Generator 8 | 9 | import magic 10 | import click 11 | import queue 12 | import os 13 | 14 | from multiprocessing import Pool, Queue, Manager 15 | from binexport import ProgramBinExport 16 | from binexport.utils import logger 17 | from binexport.types import DisassemblerBackend 18 | 19 | BINARY_FORMAT = { 20 | "application/x-dosexec", 21 | "application/x-sharedlib", 22 | "application/x-mach-binary", 23 | "application/x-executable", 24 | "application/x-pie-executable", 25 | } 26 | 27 | EXTENSIONS_WHITELIST = {"application/octet-stream": [".dex"]} 28 | 29 | CONTEXT_SETTINGS = dict(help_option_names=["-h", "--help"], max_content_width=300) 30 | 31 | class Bcolors: 32 | HEADER = "\033[95m" 33 | OKBLUE = "\033[94m" 34 | OKCYAN = "\033[96m" 35 | OKGREEN = "\033[92m" 36 | WARNING = "\033[93m" 37 | FAIL = "\033[91m" 38 | ENDC = "\033[0m" 39 | BOLD = "\033[1m" 40 | UNDERLINE = "\033[4m" 41 | 42 | 43 | def recursive_file_iter(p: Path) -> Generator[Path, None, None]: 44 | if p.is_file(): 45 | mime_type = magic.from_file(str(p), mime=True) 46 | if mime_type not in BINARY_FORMAT and p.suffix not in EXTENSIONS_WHITELIST.get( 47 | mime_type, [] 48 | ): 49 | pass 50 | else: 51 | yield p 52 | elif p.is_dir(): 53 | for f in p.iterdir(): 54 | yield from recursive_file_iter(f) 55 | 56 | 57 | def export_job(ingress, egress, backend: DisassemblerBackend) -> bool: 58 | while True: 59 | try: 60 | file = ingress.get(timeout=0.5) 61 | res = ProgramBinExport.from_binary_file( 62 | file.as_posix(), backend=backend, open_export=False 63 | ) 64 | egress.put((file, res)) 65 | except Exception as e: 66 | # Might not be printed as triggered withing a fork 67 | logger.error(traceback.format_exception(e).decode()) 68 | egress.put((file, e)) 69 | except queue.Empty: 70 | pass 71 | except KeyboardInterrupt: 72 | break 73 | 74 | 75 | def __check_path() -> bool: 76 | global IDA_BINARY 77 | if "PATH" in os.environ: 78 | for p in os.environ["PATH"].split(":"): 79 | for bin_name in __get_names(): 80 | if (Path(p) / bin_name).exists(): 81 | IDA_BINARY = (Path(p) / bin_name).resolve() 82 | return True 83 | return False 84 | 85 | def check_disassembler_availability(disass: DisassemblerBackend, disass_path: str) -> bool: 86 | """ 87 | Check if the disassembler is available in the system. 88 | :param disass: Disassembler backend to check 89 | :param disass_path: Path of the disassembler (if not in PATH) 90 | :return: True if the disassembler is available, False otherwise 91 | """ 92 | if disass == DisassemblerBackend.IDA: 93 | if disass_path: 94 | ida_path = Path(disass_path) 95 | os.environ["IDA_PATH_ENV"] = str(ida_path) if ida_path.is_dir() else str(ida_path.parent) 96 | try: 97 | from idascript import __check_path 98 | return __check_path() 99 | except ImportError: 100 | logger.error("Cannot import idascript python module") 101 | return False 102 | 103 | elif disass == DisassemblerBackend.GHIDRA: 104 | if disass_path: 105 | ghidra_path = Path(disass_path) 106 | os.environ["GHIDRA_PATH"] = disass_path 107 | return ghidra_path.exists() 108 | else: 109 | logger.error(f"Ghidra path {ghidra_path} does not exist") 110 | return False 111 | 112 | elif disass == DisassemblerBackend.BINARY_NINJA: 113 | try: 114 | import binaryninja 115 | except ImportError: 116 | logger.error("Cannot import binaryninja python module") 117 | return False 118 | else: 119 | logger.error(f"Unknown disassembler {disass}") 120 | return False 121 | return True 122 | 123 | 124 | @click.command(context_settings=CONTEXT_SETTINGS) 125 | @click.option( 126 | "-d", 127 | "--disassembler", 128 | type=click.Choice(["ida", "ghidra", "binja"], case_sensitive=False), 129 | default="ida", 130 | help="Disassembler to use", 131 | ) 132 | @click.option( 133 | "--disass-path", 134 | type=click.Path(exists=True), 135 | default=None, 136 | help="Ghidra installation directory", 137 | ) 138 | @click.option("-t", "--threads", type=int, default=1, help="Thread number to use") 139 | @click.option("-v", "--verbose", count=True, help="To activate or not the verbosity") 140 | @click.option("--stop-on-error", is_flag=True, default=False, help="Stop on error") 141 | @click.argument("input_file", type=click.Path(exists=True), metavar="") 142 | def main(disassembler: str, 143 | disass_path: str, 144 | input_file: str, 145 | threads: int, 146 | verbose: bool, 147 | stop_on_error: bool) -> None: 148 | """ 149 | binexporter is a very simple utility to generate a .BinExport file 150 | for a given binary or a directory. It opens all binary files and export 151 | the them seamlessly. 152 | 153 | :param disassembler: Disassembler engine to use 154 | :param disass_path: Path of the disassembler (if not in PATH) 155 | :param input_file: Path of the binary to export 156 | :param threads: number of threads to use 157 | :param verbose: To activate or not the verbosity 158 | :param stop_on_error: Stop if any of the worker raises an exception 159 | """ 160 | 161 | logging.basicConfig(format="%(message)s", level=logging.DEBUG if verbose else logging.INFO) 162 | 163 | # Get enum from string 164 | engine = DisassemblerBackend[disassembler.upper()] 165 | 166 | # Check disassembler availability 167 | if not check_disassembler_availability(engine, disass_path): 168 | logger.error(f"Error trying to find disassembler {engine.name.lower()}") 169 | return 170 | 171 | root_path = Path(input_file) 172 | 173 | manager = Manager() 174 | ingress = manager.Queue() 175 | egress = manager.Queue() 176 | pool = Pool(threads) 177 | 178 | # Launch all workers 179 | for _ in range(threads): 180 | pool.apply_async(export_job, (ingress, egress, engine)) 181 | 182 | # Pre-fill ingress queue 183 | total = 0 184 | for file in recursive_file_iter(root_path): 185 | ingress.put(file) 186 | total += 1 187 | 188 | logger.info(f"Start exporting {total} binaries with {engine.name} backend") 189 | 190 | i = 0 191 | while True: 192 | item = egress.get() 193 | i += 1 194 | path, res = item 195 | 196 | # Check if the result is an exception 197 | if isinstance(res, Exception): 198 | logger.error(f"Error while processing {path}: {res}") 199 | if stop_on_error: 200 | logger.error(traceback.format_exception(res).decode()) 201 | pool.terminate() 202 | break 203 | else: 204 | res = False # set to false and just print KO 205 | 206 | # Print the result 207 | if res: 208 | pp_res = Bcolors.OKGREEN + "OK" + Bcolors.ENDC 209 | else: 210 | pp_res = Bcolors.FAIL + "KO" + Bcolors.ENDC 211 | logger.info(f"[{i}/{total}] {str(path) + '.BinExport'} [{pp_res}]") 212 | if i == total: 213 | break 214 | 215 | pool.terminate() 216 | 217 | 218 | if __name__ == "__main__": 219 | main() 220 | -------------------------------------------------------------------------------- /src/binexport/basic_block.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import weakref 3 | from functools import cached_property 4 | from typing import TYPE_CHECKING 5 | 6 | from binexport.utils import instruction_index_range, get_instruction_address 7 | from binexport.instruction import InstructionBinExport 8 | 9 | if TYPE_CHECKING: 10 | from binexport.program import ProgramBinExport 11 | from binexport.function import FunctionBinExport 12 | from binexport.binexport2_pb2 import BinExport2 13 | from binexport.types import Addr 14 | 15 | 16 | class BasicBlockBinExport: 17 | """ 18 | Basic block class. 19 | """ 20 | 21 | def __init__( 22 | self, 23 | program: weakref.ref[ProgramBinExport], 24 | function: weakref.ref[FunctionBinExport], 25 | pb_bb: BinExport2.BasicBlock, 26 | ): 27 | """ 28 | :param program: Weak reference to the program 29 | :param function: Weak reference to the function 30 | :param pb_bb: protobuf definition of the basic block 31 | """ 32 | 33 | super(BasicBlockBinExport, self).__init__() 34 | 35 | self._program = program 36 | self._function = function 37 | self.pb_bb = pb_bb 38 | 39 | self.addr: Addr = None #: basic bloc address 40 | self.bytes = b"" #: bytes of the basic block 41 | self._len = 0 #: Length of the basic block (number of instructions) 42 | 43 | # Ranges are in fact the true basic blocks but BinExport 44 | # doesn't have the same basic block semantic and merge multiple basic blocks into one. 45 | # For example: BB_1 -- unconditional_jmp --> BB_2 46 | # might be merged into a single basic block so the edge gets lost. 47 | for rng in pb_bb.instruction_index: 48 | for idx in instruction_index_range(rng): 49 | self.bytes += self.program.proto.instruction[idx].raw_bytes 50 | self._len += 1 51 | 52 | # The first instruction determines the basic block address 53 | if self.addr is None: 54 | self.addr = get_instruction_address(self.program.proto, idx) 55 | 56 | def __hash__(self) -> int: 57 | """ 58 | Make function hashable to be able to store them in sets (for parents, children) 59 | 60 | :return: address of the basic block 61 | """ 62 | return hash(self.addr) 63 | 64 | def __str__(self) -> str: 65 | return "\n".join(str(i) for i in self.instructions.values()) 66 | 67 | def __repr__(self) -> str: 68 | return "<%s:0x%x>" % (type(self).__name__, self.addr) 69 | 70 | def __len__(self) -> int: 71 | return self._len 72 | 73 | @property 74 | def program(self) -> ProgramBinExport: 75 | """ 76 | Wrapper on weak reference on ProgramBinExport 77 | 78 | :return: object :py:class:`ProgramBinExport`, program associated to the basic block 79 | """ 80 | return self._program() 81 | 82 | @property 83 | def function(self) -> FunctionBinExport: 84 | """ 85 | Wrapper on weak reference on FunctionBinExport 86 | 87 | :return: object :py:class:`FunctionBinExport`, function associated to the basic block 88 | """ 89 | return self._function() 90 | 91 | @cached_property 92 | def contiguous_ranges(self) -> list[tuple[Addr, bytes]]: 93 | """ 94 | The contiguous ranges of instructions contained in this basic block. That identifies 95 | the *real* basic blocks, as BinExport's basic blocks do not necessarily represent a 96 | contiguous block of instructions. 97 | 98 | :return: List of tuples (begin address, bytes), each of them representing a contiguous 99 | block of instructions. 100 | """ 101 | 102 | ranges = [] 103 | 104 | # Ranges are in fact the true basic blocks but BinExport 105 | # doesn't have the same basic block semantic and merge multiple basic blocks into one. 106 | # For example: BB_1 -- unconditional_jmp --> BB_2 107 | # might be merged into a single basic block so the edge gets lost. 108 | for rng in self.pb_bb.instruction_index: 109 | rng_bytes = b"" 110 | rng_addr = None 111 | for idx in instruction_index_range(rng): 112 | rng_bytes += self.program.proto.instruction[idx].raw_bytes 113 | 114 | # The first instruction determines the basic block address 115 | if rng_addr is None: 116 | rng_addr = get_instruction_address(self.program.proto, idx) 117 | ranges.append((rng_addr, rng_bytes)) 118 | 119 | return ranges 120 | 121 | @cached_property 122 | def instructions(self) -> dict[Addr, InstructionBinExport]: 123 | """ 124 | Returns a dict which is used to reference all the instructions in this basic 125 | block by their address. 126 | The object returned is by default cached, to erase the cache delete the attribute. 127 | 128 | :return: dictionary of addresses to instructions 129 | """ 130 | 131 | instructions = {} 132 | 133 | # Ranges are in fact the true basic blocks but BinExport 134 | # doesn't have the same basic block semantic and merge multiple basic blocks into one. 135 | # For example: BB_1 -- unconditional_jmp --> BB_2 136 | # might be merged into a single basic block so the edge gets lost. 137 | for rng in self.pb_bb.instruction_index: 138 | for idx in instruction_index_range(rng): 139 | inst_addr = get_instruction_address(self.program.proto, idx) 140 | 141 | instructions[inst_addr] = InstructionBinExport( 142 | self._program, self._function, inst_addr, idx 143 | ) 144 | 145 | return instructions 146 | -------------------------------------------------------------------------------- /src/binexport/binexport2.proto: -------------------------------------------------------------------------------- 1 | // Copyright 2011-2021 Google LLC 2 | // 3 | // Licensed under the Apache License, Version 2.0 (the "License"); 4 | // you may not use this file except in compliance with the License. 5 | // You may obtain a copy of the License at 6 | // 7 | // http://www.apache.org/licenses/LICENSE-2.0 8 | // 9 | // Unless required by applicable law or agreed to in writing, software 10 | // distributed under the License is distributed on an "AS IS" BASIS, 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | // See the License for the specific language governing permissions and 13 | // limitations under the License. 14 | 15 | // This file describes a compact representation for disassembled binaries. It is 16 | // loosely based on the PostgreSQL database schema used by BinNavi's 17 | // postgresql_tables.sql (https://git.io/vzlYw). 18 | // It is the output format for the BinExport IDA plugin and the BinDetego 19 | // disassembler and consumed by the BinDiff comparison engine. 20 | 21 | // The representation is generic to accommodate various source architectures. 22 | // In particular 32 and 64 bit versions of x86, ARM, PowerPC and MIPS have been 23 | // tested. 24 | // 25 | // Multiple levels of deduping have been applied to make the format more compact 26 | // and avoid redundant data duplication. Some of this due to hard-earned 27 | // experience trying to cope with intentionally obfuscated malicious binaries. 28 | // Note in particular that the same instruction may occur in multiple basic 29 | // blocks and the same basic block in multiple functions (instruction and basic 30 | // block sharing). Implemented naively, malware can use this to cause 31 | // combinatorial explosion in memory usage, DOSing the analyst. This format 32 | // should store every unique expression, mnemonic, operand, instruction and 33 | // basic block only once instead of duplicating the information for every 34 | // instance of it. 35 | // 36 | // This format does _not_ try to be 100% backwards compatible with the old 37 | // version. In particular, we do not store IDA's comment types, making lossless 38 | // porting of IDA comments impossible. We do however, store comments and 39 | // expression substitutions, so porting the actual data is possible, just not 40 | // the exact IDA type. 41 | // 42 | // While it would be more natural to use addresses when defining call graph and 43 | // flow graph edges and other such references, it is more efficient to employ 44 | // one more level of indirection and use indices into the basic block or 45 | // function arrays instead. This is because addresses will usually use most of 46 | // the available 64 bit space while indices will be much smaller and compress 47 | // much better (less randomly distributed). 48 | // 49 | // We omit all fields that are set to their default value anyways. Note that 50 | // this has two side effects: 51 | // - changing the defaults in this proto file will, in effect, change what's 52 | // read from disk 53 | // - the generated code has_* methods are somewhat less useful 54 | // WARNING: We omit the defaults manually in the code writing the data. Do not 55 | // change the defaults here without changing the code! 56 | // 57 | // TODO(cblichmann): Link flow graphs to call graph nodes. The connection is 58 | // there via the address, but tricky to extract. 59 | 60 | syntax = "proto2"; 61 | 62 | option java_package = "com.google.security.zynamics"; 63 | option java_outer_classname = "BinExport"; 64 | 65 | message BinExport2 { 66 | message Meta { 67 | reserved 5; // Pre-BinDiff 4.3 padding 68 | 69 | // Input binary filename including file extension but excluding file path. 70 | // example: "insider_gcc.exe" 71 | optional string executable_name = 1; 72 | 73 | // Application defined executable id. Often the SHA256 hash of the input 74 | // binary. 75 | optional string executable_id = 2; 76 | 77 | // Input architecture name, e.g. x86-32. 78 | optional string architecture_name = 3; 79 | 80 | // When did this file get created? Unix time. This may be used for some 81 | // primitive versioning in case the file format ever changes. 82 | optional int64 timestamp = 4; 83 | } 84 | 85 | message CallGraph { 86 | message Vertex { 87 | enum Type { 88 | // Regular function with full disassembly. 89 | NORMAL = 0; 90 | 91 | // This function is a well known library function. 92 | LIBRARY = 1; 93 | 94 | // Imported from a dynamic link library (e.g. dll). 95 | IMPORTED = 2; 96 | 97 | // A thunk function, forwarding its work via an unconditional jump. 98 | THUNK = 3; 99 | 100 | // An invalid function (a function that contained invalid code or was 101 | // considered invalid by some heuristics). 102 | INVALID = 4; 103 | } 104 | 105 | // The function's entry point address. 106 | optional uint64 address = 1; 107 | optional Type type = 2 [default = NORMAL]; 108 | 109 | // If the function has a user defined, real name it will be given here. 110 | // main() is a proper name, sub_BAADF00D is not (auto generated dummy 111 | // name). 112 | optional string mangled_name = 3; 113 | 114 | // Demangled name if the function is a mangled C++ function and we could 115 | // demangle it. 116 | optional string demangled_name = 4; 117 | 118 | // If this is a library function, what is its index in library arrays. 119 | optional int32 library_index = 5; 120 | 121 | // If module name, such as class name for DEX files, is present - index in 122 | // module table. 123 | optional int32 module_index = 6; 124 | } 125 | 126 | message Edge { 127 | // source and target index into the vertex repeated field. 128 | optional int32 source_vertex_index = 1; 129 | optional int32 target_vertex_index = 2; 130 | } 131 | 132 | // vertices == functions in the call graph. 133 | repeated Vertex vertex = 1; 134 | 135 | // edges == calls in the call graph. 136 | repeated Edge edge = 2; 137 | } 138 | 139 | // An operand consists of 1 or more expressions, linked together as a tree. 140 | message Expression { 141 | enum Type { 142 | SYMBOL = 1; 143 | IMMEDIATE_INT = 2; 144 | IMMEDIATE_FLOAT = 3; 145 | OPERATOR = 4; 146 | REGISTER = 5; 147 | SIZE_PREFIX = 6; 148 | DEREFERENCE = 7; 149 | } 150 | 151 | // IMMEDIATE_INT is by far the most common type and thus we can save some 152 | // space by omitting it as the default. 153 | optional Type type = 1 [default = IMMEDIATE_INT]; 154 | 155 | // Symbol for this expression. Interpretation depends on type. Examples 156 | // include: "eax", "[", "+" 157 | optional string symbol = 2; 158 | 159 | // If the expression can be interpreted as an integer value (IMMEDIATE_INT) 160 | // the value is given here. 161 | optional uint64 immediate = 3; 162 | 163 | // The parent expression. Example expression tree for the second operand of: 164 | // mov eax, b4 [ebx + 12] 165 | // "b4" --- "[" --- "+" --- "ebx" 166 | // \ "12" 167 | optional int32 parent_index = 4; 168 | 169 | // true if the expression has entry in relocation table 170 | optional bool is_relocation = 5; 171 | } 172 | 173 | // An instruction may have 0 or more operands. 174 | message Operand { 175 | // Contains all expressions constituting this operand. All expressions 176 | // should be linked into a single tree, i.e. there should only be one 177 | // expression in this list with parent_index == NULL and all others should 178 | // descend from that. Rendering order for expressions on the same tree level 179 | // (siblings) is implicitly given by the order they are referenced in this 180 | // repeated field. 181 | // Implicit: expression sequence 182 | repeated int32 expression_index = 1; 183 | } 184 | 185 | // An instruction has exactly 1 mnemonic. 186 | message Mnemonic { 187 | // Literal representation of the mnemonic, e.g.: "mov". 188 | optional string name = 1; 189 | } 190 | 191 | message Instruction { 192 | // This will only be filled for instructions that do not just flow from the 193 | // immediately preceding instruction. Regular instructions will have to 194 | // calculate their own address by adding raw_bytes.size() to the previous 195 | // instruction's address. 196 | optional uint64 address = 1; 197 | 198 | // If this is a call instruction and call targets could be determined 199 | // they'll be given here. Note that we may or may not have a flow graph for 200 | // the target and thus cannot use an index into the flow graph table here. 201 | // We could potentially use call graph nodes, but linking instructions to 202 | // the call graph directly does not seem a good choice. 203 | repeated uint64 call_target = 2; 204 | 205 | // Index into the mnemonic array of strings. Used for de-duping the data. 206 | // The default value is used for the most common mnemonic in the executable. 207 | optional int32 mnemonic_index = 3 [default = 0]; 208 | 209 | // Indices into the operand tree. On X86 this can be 0, 1 or 2 elements 210 | // long, 3 elements with VEX/EVEX. 211 | // Implicit: operand sequence 212 | repeated int32 operand_index = 4; 213 | 214 | // The unmodified input bytes corresponding to this instruction. 215 | optional bytes raw_bytes = 5; 216 | 217 | // Implicit: comment sequence 218 | repeated int32 comment_index = 6; 219 | } 220 | 221 | message BasicBlock { 222 | // This is a space optimization. The instructions for an individual basic 223 | // block will usually be in a continuous index range. Thus it is more 224 | // efficient to store the range instead of individual indices. However, this 225 | // does not hold true for all basic blocks, so we need to be able to store 226 | // multiple index ranges per block. 227 | message IndexRange { 228 | // These work like begin and end iterators, i.e. the sequence is 229 | // [begin_index, end_index). If the sequence only contains a single 230 | // element end_index will be omitted. 231 | optional int32 begin_index = 1; 232 | optional int32 end_index = 2; 233 | } 234 | 235 | // Implicit: instruction sequence 236 | repeated IndexRange instruction_index = 1; 237 | } 238 | 239 | message FlowGraph { 240 | message Edge { 241 | enum Type { 242 | CONDITION_TRUE = 1; 243 | CONDITION_FALSE = 2; 244 | UNCONDITIONAL = 3; 245 | SWITCH = 4; 246 | } 247 | 248 | // Source instruction will always be the last instruction of the source 249 | // basic block, target instruction the first instruction of the target 250 | // basic block. 251 | optional int32 source_basic_block_index = 1; 252 | optional int32 target_basic_block_index = 2; 253 | optional Type type = 3 [default = UNCONDITIONAL]; 254 | 255 | // Indicates whether this is a loop edge as determined by Lengauer-Tarjan. 256 | optional bool is_back_edge = 4 [default = false]; 257 | } 258 | 259 | // Basic blocks are sorted by address. 260 | repeated int32 basic_block_index = 1; 261 | 262 | // The flow graph's entry point address is the first instruction of the 263 | // entry_basic_block. 264 | optional int32 entry_basic_block_index = 3; 265 | 266 | repeated Edge edge = 2; 267 | } 268 | 269 | // Generic reference class used for address comments (deprecated), string 270 | // references and expression substitutions. It allows referencing from an 271 | // instruction, operand, expression subtree tuple to a de-duped string in the 272 | // string table. 273 | message Reference { 274 | // Index into the global instruction table. 275 | optional int32 instruction_index = 1; 276 | 277 | // Index into the operand array local to an instruction. 278 | optional int32 instruction_operand_index = 2 [default = 0]; 279 | 280 | // Index into the expression array local to an operand. 281 | optional int32 operand_expression_index = 3 [default = 0]; 282 | 283 | // Index into the global string table. 284 | optional int32 string_table_index = 4; 285 | } 286 | 287 | message DataReference { 288 | // Index into the global instruction table. 289 | optional int32 instruction_index = 1; 290 | 291 | // Address being referred. 292 | optional uint64 address = 2; 293 | } 294 | 295 | message Comment { 296 | enum Type { 297 | // A regular instruction comment. Typically displayed next to the 298 | // instruction disassembly. 299 | DEFAULT = 0; 300 | 301 | // A comment line that is typically displayed before (above) the 302 | // instruction it refers to. 303 | ANTERIOR = 1; 304 | 305 | // Like ANTERIOR, but a typically displayed after (below). 306 | POSTERIOR = 2; 307 | 308 | // Similar to an ANTERIOR comment, but applies to the beginning of an 309 | // identified function. Programs displaying the proto may choose to render 310 | // these differently (e.g. above an inferred function signature). 311 | FUNCTION = 3; 312 | 313 | // Named constants, bitfields and similar. 314 | ENUM = 4; 315 | 316 | // Named locations, usually the target of a jump. 317 | LOCATION = 5; 318 | 319 | // Data cross references. 320 | GLOBAL_REFERENCE = 6; 321 | 322 | // Local/stack variables. 323 | LOCAL_REFERENCE = 7; 324 | } 325 | 326 | // Index into the global instruction table. This is here to enable 327 | // comment processing without having to iterate over all instructions. 328 | // There is an N:M mapping of instructions to comments. 329 | optional int32 instruction_index = 1; 330 | 331 | // Index into the operand array local to an instruction. 332 | optional int32 instruction_operand_index = 2 [default = 0]; 333 | 334 | // Index into the expression array local to an operand, like in Reference. 335 | // This is not currently used, but allows to implement expression 336 | // substitutions. 337 | optional int32 operand_expression_index = 3 [default = 0]; 338 | 339 | // Index into the global string table. 340 | optional int32 string_table_index = 4; 341 | 342 | // Comment is propagated to all locations that reference the original 343 | // location. 344 | optional bool repeatable = 5; 345 | 346 | optional Type type = 6 [default = DEFAULT]; 347 | } 348 | 349 | message Section { 350 | // Section start address. 351 | optional uint64 address = 1; 352 | 353 | // Section size. 354 | optional uint64 size = 2; 355 | 356 | // Read flag of the section, True when section is readable. 357 | optional bool flag_r = 3; 358 | 359 | // Write flag of the section, True when section is writable. 360 | optional bool flag_w = 4; 361 | 362 | // Execute flag of the section, True when section is executable. 363 | optional bool flag_x = 5; 364 | } 365 | 366 | message Library { 367 | // If this library is statically linked. 368 | optional bool is_static = 1; 369 | 370 | // Address where this library was loaded, 0 if unknown. 371 | optional uint64 load_address = 2 [default = 0]; 372 | 373 | // Name of the library (format is platform-dependent). 374 | optional string name = 3; 375 | } 376 | 377 | message Module { 378 | // Name, such as Java class name. Platform-dependent. 379 | optional string name = 1; 380 | } 381 | 382 | optional Meta meta_information = 1; 383 | repeated Expression expression = 2; 384 | repeated Operand operand = 3; 385 | repeated Mnemonic mnemonic = 4; 386 | repeated Instruction instruction = 5; 387 | repeated BasicBlock basic_block = 6; 388 | repeated FlowGraph flow_graph = 7; 389 | optional CallGraph call_graph = 8; 390 | 391 | repeated string string_table = 9; 392 | 393 | // No longer written. This is here so that BinDiff can work with older 394 | // BinExport files. 395 | repeated Reference address_comment = 10 [deprecated = true]; 396 | 397 | // Rich comment index used for BinDiff's comment porting. 398 | repeated Comment comment = 17; 399 | repeated Reference string_reference = 11; 400 | repeated Reference expression_substitution = 12; 401 | repeated Section section = 13; 402 | 403 | repeated Library library = 14; 404 | repeated DataReference data_reference = 15; 405 | repeated Module module = 16; 406 | 407 | // Allow for future extensions. 408 | extensions 100000000 to max; 409 | } 410 | -------------------------------------------------------------------------------- /src/binexport/binexport2_pb2.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # Generated by the protocol buffer compiler. DO NOT EDIT! 3 | # source: binexport2.proto 4 | """Generated protocol buffer code.""" 5 | from google.protobuf.internal import builder as _builder 6 | from google.protobuf import descriptor as _descriptor 7 | from google.protobuf import descriptor_pool as _descriptor_pool 8 | from google.protobuf import symbol_database as _symbol_database 9 | 10 | # @@protoc_insertion_point(imports) 11 | 12 | _sym_db = _symbol_database.Default() 13 | 14 | 15 | DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( 16 | b'\n\x10\x62inexport2.proto"\xa5\x17\n\nBinExport2\x12*\n\x10meta_information\x18\x01 \x01(\x0b\x32\x10.BinExport2.Meta\x12*\n\nexpression\x18\x02 \x03(\x0b\x32\x16.BinExport2.Expression\x12$\n\x07operand\x18\x03 \x03(\x0b\x32\x13.BinExport2.Operand\x12&\n\x08mnemonic\x18\x04 \x03(\x0b\x32\x14.BinExport2.Mnemonic\x12,\n\x0binstruction\x18\x05 \x03(\x0b\x32\x17.BinExport2.Instruction\x12+\n\x0b\x62\x61sic_block\x18\x06 \x03(\x0b\x32\x16.BinExport2.BasicBlock\x12)\n\nflow_graph\x18\x07 \x03(\x0b\x32\x15.BinExport2.FlowGraph\x12)\n\ncall_graph\x18\x08 \x01(\x0b\x32\x15.BinExport2.CallGraph\x12\x14\n\x0cstring_table\x18\t \x03(\t\x12\x32\n\x0f\x61\x64\x64ress_comment\x18\n \x03(\x0b\x32\x15.BinExport2.ReferenceB\x02\x18\x01\x12$\n\x07\x63omment\x18\x11 \x03(\x0b\x32\x13.BinExport2.Comment\x12/\n\x10string_reference\x18\x0b \x03(\x0b\x32\x15.BinExport2.Reference\x12\x36\n\x17\x65xpression_substitution\x18\x0c \x03(\x0b\x32\x15.BinExport2.Reference\x12$\n\x07section\x18\r \x03(\x0b\x32\x13.BinExport2.Section\x12$\n\x07library\x18\x0e \x03(\x0b\x32\x13.BinExport2.Library\x12\x31\n\x0e\x64\x61ta_reference\x18\x0f \x03(\x0b\x32\x19.BinExport2.DataReference\x12"\n\x06module\x18\x10 \x03(\x0b\x32\x12.BinExport2.Module\x1aj\n\x04Meta\x12\x17\n\x0f\x65xecutable_name\x18\x01 \x01(\t\x12\x15\n\rexecutable_id\x18\x02 \x01(\t\x12\x19\n\x11\x61rchitecture_name\x18\x03 \x01(\t\x12\x11\n\ttimestamp\x18\x04 \x01(\x03J\x04\x08\x05\x10\x06\x1a\x9c\x03\n\tCallGraph\x12,\n\x06vertex\x18\x01 \x03(\x0b\x32\x1c.BinExport2.CallGraph.Vertex\x12(\n\x04\x65\x64ge\x18\x02 \x03(\x0b\x32\x1a.BinExport2.CallGraph.Edge\x1a\xf4\x01\n\x06Vertex\x12\x0f\n\x07\x61\x64\x64ress\x18\x01 \x01(\x04\x12\x37\n\x04type\x18\x02 \x01(\x0e\x32!.BinExport2.CallGraph.Vertex.Type:\x06NORMAL\x12\x14\n\x0cmangled_name\x18\x03 \x01(\t\x12\x16\n\x0e\x64\x65mangled_name\x18\x04 \x01(\t\x12\x15\n\rlibrary_index\x18\x05 \x01(\x05\x12\x14\n\x0cmodule_index\x18\x06 \x01(\x05"E\n\x04Type\x12\n\n\x06NORMAL\x10\x00\x12\x0b\n\x07LIBRARY\x10\x01\x12\x0c\n\x08IMPORTED\x10\x02\x12\t\n\x05THUNK\x10\x03\x12\x0b\n\x07INVALID\x10\x04\x1a@\n\x04\x45\x64ge\x12\x1b\n\x13source_vertex_index\x18\x01 \x01(\x05\x12\x1b\n\x13target_vertex_index\x18\x02 \x01(\x05\x1a\x90\x02\n\nExpression\x12\x38\n\x04type\x18\x01 \x01(\x0e\x32\x1b.BinExport2.Expression.Type:\rIMMEDIATE_INT\x12\x0e\n\x06symbol\x18\x02 \x01(\t\x12\x11\n\timmediate\x18\x03 \x01(\x04\x12\x14\n\x0cparent_index\x18\x04 \x01(\x05\x12\x15\n\ris_relocation\x18\x05 \x01(\x08"x\n\x04Type\x12\n\n\x06SYMBOL\x10\x01\x12\x11\n\rIMMEDIATE_INT\x10\x02\x12\x13\n\x0fIMMEDIATE_FLOAT\x10\x03\x12\x0c\n\x08OPERATOR\x10\x04\x12\x0c\n\x08REGISTER\x10\x05\x12\x0f\n\x0bSIZE_PREFIX\x10\x06\x12\x0f\n\x0b\x44\x45REFERENCE\x10\x07\x1a#\n\x07Operand\x12\x18\n\x10\x65xpression_index\x18\x01 \x03(\x05\x1a\x18\n\x08Mnemonic\x12\x0c\n\x04name\x18\x01 \x01(\t\x1a\x8f\x01\n\x0bInstruction\x12\x0f\n\x07\x61\x64\x64ress\x18\x01 \x01(\x04\x12\x13\n\x0b\x63\x61ll_target\x18\x02 \x03(\x04\x12\x19\n\x0emnemonic_index\x18\x03 \x01(\x05:\x01\x30\x12\x15\n\roperand_index\x18\x04 \x03(\x05\x12\x11\n\traw_bytes\x18\x05 \x01(\x0c\x12\x15\n\rcomment_index\x18\x06 \x03(\x05\x1a\x80\x01\n\nBasicBlock\x12<\n\x11instruction_index\x18\x01 \x03(\x0b\x32!.BinExport2.BasicBlock.IndexRange\x1a\x34\n\nIndexRange\x12\x13\n\x0b\x62\x65gin_index\x18\x01 \x01(\x05\x12\x11\n\tend_index\x18\x02 \x01(\x05\x1a\xe9\x02\n\tFlowGraph\x12\x19\n\x11\x62\x61sic_block_index\x18\x01 \x03(\x05\x12\x1f\n\x17\x65ntry_basic_block_index\x18\x03 \x01(\x05\x12(\n\x04\x65\x64ge\x18\x02 \x03(\x0b\x32\x1a.BinExport2.FlowGraph.Edge\x1a\xf5\x01\n\x04\x45\x64ge\x12 \n\x18source_basic_block_index\x18\x01 \x01(\x05\x12 \n\x18target_basic_block_index\x18\x02 \x01(\x05\x12<\n\x04type\x18\x03 \x01(\x0e\x32\x1f.BinExport2.FlowGraph.Edge.Type:\rUNCONDITIONAL\x12\x1b\n\x0cis_back_edge\x18\x04 \x01(\x08:\x05\x66\x61lse"N\n\x04Type\x12\x12\n\x0e\x43ONDITION_TRUE\x10\x01\x12\x13\n\x0f\x43ONDITION_FALSE\x10\x02\x12\x11\n\rUNCONDITIONAL\x10\x03\x12\n\n\x06SWITCH\x10\x04\x1a\x8d\x01\n\tReference\x12\x19\n\x11instruction_index\x18\x01 \x01(\x05\x12$\n\x19instruction_operand_index\x18\x02 \x01(\x05:\x01\x30\x12#\n\x18operand_expression_index\x18\x03 \x01(\x05:\x01\x30\x12\x1a\n\x12string_table_index\x18\x04 \x01(\x05\x1a;\n\rDataReference\x12\x19\n\x11instruction_index\x18\x01 \x01(\x05\x12\x0f\n\x07\x61\x64\x64ress\x18\x02 \x01(\x04\x1a\xd4\x02\n\x07\x43omment\x12\x19\n\x11instruction_index\x18\x01 \x01(\x05\x12$\n\x19instruction_operand_index\x18\x02 \x01(\x05:\x01\x30\x12#\n\x18operand_expression_index\x18\x03 \x01(\x05:\x01\x30\x12\x1a\n\x12string_table_index\x18\x04 \x01(\x05\x12\x12\n\nrepeatable\x18\x05 \x01(\x08\x12/\n\x04type\x18\x06 \x01(\x0e\x32\x18.BinExport2.Comment.Type:\x07\x44\x45\x46\x41ULT"\x81\x01\n\x04Type\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\x0c\n\x08\x41NTERIOR\x10\x01\x12\r\n\tPOSTERIOR\x10\x02\x12\x0c\n\x08\x46UNCTION\x10\x03\x12\x08\n\x04\x45NUM\x10\x04\x12\x0c\n\x08LOCATION\x10\x05\x12\x14\n\x10GLOBAL_REFERENCE\x10\x06\x12\x13\n\x0fLOCAL_REFERENCE\x10\x07\x1aX\n\x07Section\x12\x0f\n\x07\x61\x64\x64ress\x18\x01 \x01(\x04\x12\x0c\n\x04size\x18\x02 \x01(\x04\x12\x0e\n\x06\x66lag_r\x18\x03 \x01(\x08\x12\x0e\n\x06\x66lag_w\x18\x04 \x01(\x08\x12\x0e\n\x06\x66lag_x\x18\x05 \x01(\x08\x1a\x43\n\x07Library\x12\x11\n\tis_static\x18\x01 \x01(\x08\x12\x17\n\x0cload_address\x18\x02 \x01(\x04:\x01\x30\x12\x0c\n\x04name\x18\x03 \x01(\t\x1a\x16\n\x06Module\x12\x0c\n\x04name\x18\x01 \x01(\t*\x0b\x08\x80\xc2\xd7/\x10\x80\x80\x80\x80\x02\x42)\n\x1c\x63om.google.security.zynamicsB\tBinExport' 17 | ) 18 | 19 | _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) 20 | _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, "binexport2_pb2", globals()) 21 | if _descriptor._USE_C_DESCRIPTORS == False: 22 | 23 | DESCRIPTOR._options = None 24 | DESCRIPTOR._serialized_options = b"\n\034com.google.security.zynamicsB\tBinExport" 25 | _BINEXPORT2.fields_by_name["address_comment"]._options = None 26 | _BINEXPORT2.fields_by_name["address_comment"]._serialized_options = b"\030\001" 27 | _BINEXPORT2._serialized_start = 21 28 | _BINEXPORT2._serialized_end = 3002 29 | _BINEXPORT2_META._serialized_start = 758 30 | _BINEXPORT2_META._serialized_end = 864 31 | _BINEXPORT2_CALLGRAPH._serialized_start = 867 32 | _BINEXPORT2_CALLGRAPH._serialized_end = 1279 33 | _BINEXPORT2_CALLGRAPH_VERTEX._serialized_start = 969 34 | _BINEXPORT2_CALLGRAPH_VERTEX._serialized_end = 1213 35 | _BINEXPORT2_CALLGRAPH_VERTEX_TYPE._serialized_start = 1144 36 | _BINEXPORT2_CALLGRAPH_VERTEX_TYPE._serialized_end = 1213 37 | _BINEXPORT2_CALLGRAPH_EDGE._serialized_start = 1215 38 | _BINEXPORT2_CALLGRAPH_EDGE._serialized_end = 1279 39 | _BINEXPORT2_EXPRESSION._serialized_start = 1282 40 | _BINEXPORT2_EXPRESSION._serialized_end = 1554 41 | _BINEXPORT2_EXPRESSION_TYPE._serialized_start = 1434 42 | _BINEXPORT2_EXPRESSION_TYPE._serialized_end = 1554 43 | _BINEXPORT2_OPERAND._serialized_start = 1556 44 | _BINEXPORT2_OPERAND._serialized_end = 1591 45 | _BINEXPORT2_MNEMONIC._serialized_start = 1593 46 | _BINEXPORT2_MNEMONIC._serialized_end = 1617 47 | _BINEXPORT2_INSTRUCTION._serialized_start = 1620 48 | _BINEXPORT2_INSTRUCTION._serialized_end = 1763 49 | _BINEXPORT2_BASICBLOCK._serialized_start = 1766 50 | _BINEXPORT2_BASICBLOCK._serialized_end = 1894 51 | _BINEXPORT2_BASICBLOCK_INDEXRANGE._serialized_start = 1842 52 | _BINEXPORT2_BASICBLOCK_INDEXRANGE._serialized_end = 1894 53 | _BINEXPORT2_FLOWGRAPH._serialized_start = 1897 54 | _BINEXPORT2_FLOWGRAPH._serialized_end = 2258 55 | _BINEXPORT2_FLOWGRAPH_EDGE._serialized_start = 2013 56 | _BINEXPORT2_FLOWGRAPH_EDGE._serialized_end = 2258 57 | _BINEXPORT2_FLOWGRAPH_EDGE_TYPE._serialized_start = 2180 58 | _BINEXPORT2_FLOWGRAPH_EDGE_TYPE._serialized_end = 2258 59 | _BINEXPORT2_REFERENCE._serialized_start = 2261 60 | _BINEXPORT2_REFERENCE._serialized_end = 2402 61 | _BINEXPORT2_DATAREFERENCE._serialized_start = 2404 62 | _BINEXPORT2_DATAREFERENCE._serialized_end = 2463 63 | _BINEXPORT2_COMMENT._serialized_start = 2466 64 | _BINEXPORT2_COMMENT._serialized_end = 2806 65 | _BINEXPORT2_COMMENT_TYPE._serialized_start = 2677 66 | _BINEXPORT2_COMMENT_TYPE._serialized_end = 2806 67 | _BINEXPORT2_SECTION._serialized_start = 2808 68 | _BINEXPORT2_SECTION._serialized_end = 2896 69 | _BINEXPORT2_LIBRARY._serialized_start = 2898 70 | _BINEXPORT2_LIBRARY._serialized_end = 2965 71 | _BINEXPORT2_MODULE._serialized_start = 2967 72 | _BINEXPORT2_MODULE._serialized_end = 2989 73 | # @@protoc_insertion_point(module_scope) 74 | -------------------------------------------------------------------------------- /src/binexport/expression.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from functools import cached_property 3 | from typing import TYPE_CHECKING 4 | 5 | from binexport.binexport2_pb2 import BinExport2 6 | from binexport.types import ExpressionType 7 | from binexport.utils import logger 8 | 9 | if TYPE_CHECKING: 10 | from .program import ProgramBinExport 11 | from .function import FunctionBinExport 12 | from .instruction import InstructionBinExport 13 | 14 | 15 | def to_signed(n: int, mask: int) -> int: 16 | """ 17 | Signed representation of `n` using `mask` 18 | 19 | :return: the python int version of the signed integer `n` using the specified mask 20 | """ 21 | 22 | assert (mask + 1) & mask == 0, "Mask must be in the form 2^n - 1" 23 | n &= mask 24 | sign_bit = (mask + 1) >> 1 25 | return (n ^ sign_bit) - sign_bit 26 | 27 | 28 | class ExpressionBinExport: 29 | """ 30 | Class that represent an expression node in the expression tree for a specific 31 | operand. The tree is inverted (each node has an edge to its parent) 32 | """ 33 | 34 | __sz_lookup = { 35 | "b1": 1, 36 | "b2": 2, 37 | "b4": 4, 38 | "b8": 8, 39 | "b10": 10, 40 | "b16": 16, 41 | "b32": 32, 42 | "b64": 64, 43 | } 44 | __sz_name = { 45 | 1: "byte", 46 | 2: "word", 47 | 4: "dword", 48 | 8: "qword", 49 | 10: "b10", 50 | 16: "xmmword", 51 | 32: "ymmword", 52 | 64: "zmmword", 53 | } 54 | 55 | def __init__( 56 | self, 57 | program: ProgramBinExport, 58 | function: FunctionBinExport, 59 | instruction: InstructionBinExport, 60 | exp_idx: int, 61 | parent: ExpressionBinExport | None = None, 62 | ): 63 | """ 64 | :param program: reference to program 65 | :param function: reference to function 66 | :param instruction: reference to instruction 67 | :param exp_idx: expression index in the protobuf table 68 | :param parent: reference to the parent expression in the tree. 69 | None if it is the root. 70 | """ 71 | 72 | self._idx = exp_idx 73 | self.parent: ExpressionBinExport | None = parent #: parent expression if nested 74 | self.is_addr: bool = False #: whether the value is referring to an address 75 | self.is_data: bool = False #: whether the value is a reference to data 76 | 77 | # Expression object in the protobuf structure 78 | self.pb_expr = program.proto.expression[self._idx] 79 | 80 | self._parse_protobuf(program, function, instruction) 81 | 82 | def __hash__(self) -> int: 83 | return hash(self._idx) 84 | 85 | @property 86 | def type(self) -> ExpressionType: 87 | """ 88 | Returns the type as defined in `ExpressionType` of the expression, after the protobuf parsing 89 | """ 90 | 91 | return self._type 92 | 93 | @property 94 | def value(self) -> str | int | float: 95 | """ 96 | Returns the value of the expression, after the protobuf parsing 97 | 98 | :return: value of the expression 99 | """ 100 | return self._value 101 | 102 | @cached_property 103 | def depth(self) -> int: 104 | """ 105 | Returns the depth of the node in the tree (root is depth 0). 106 | """ 107 | if self.parent is None: 108 | return 0 109 | return self.parent.depth + 1 110 | 111 | def _parse_protobuf( 112 | self, 113 | program: ProgramBinExport, 114 | function: FunctionBinExport, 115 | instruction: InstructionBinExport, 116 | ) -> None: 117 | """ 118 | Low-level expression parser. It populates self._type and self._value 119 | """ 120 | if self.pb_expr.type == BinExport2.Expression.SYMBOL: 121 | self._value = self.pb_expr.symbol 122 | 123 | if self.pb_expr.symbol in program.fun_names: # It is a function name 124 | self._type = ExpressionType.FUNC_NAME 125 | else: # It is a local symbol (ex: var_, arg_) 126 | self._type = ExpressionType.VAR_NAME 127 | 128 | elif self.pb_expr.type == BinExport2.Expression.IMMEDIATE_INT: 129 | self._type = ExpressionType.IMMEDIATE_INT 130 | self._value = to_signed(self.pb_expr.immediate, program.mask) 131 | 132 | if self.pb_expr.immediate in instruction.data_refs: # Data 133 | self.is_addr = True 134 | self.is_data = True 135 | elif self.pb_expr.immediate in program or self.pb_expr.immediate in function: # Address 136 | self.is_addr = True 137 | 138 | elif self.pb_expr.type == BinExport2.Expression.IMMEDIATE_FLOAT: 139 | self._type = ExpressionType.IMMEDIATE_FLOAT 140 | self._value = self.pb_expr.immediate # Cast it to float 141 | 142 | elif self.pb_expr.type == BinExport2.Expression.OPERATOR: 143 | self._type = ExpressionType.SYMBOL 144 | self._value = self.pb_expr.symbol 145 | 146 | elif self.pb_expr.type == BinExport2.Expression.REGISTER: 147 | self._type = ExpressionType.REGISTER 148 | self._value = self.pb_expr.symbol 149 | 150 | elif self.pb_expr.type == BinExport2.Expression.SIZE_PREFIX: 151 | self._type = ExpressionType.SIZE 152 | self._value = self.__sz_lookup[self.pb_expr.symbol] 153 | 154 | elif self.pb_expr.type == BinExport2.Expression.DEREFERENCE: 155 | self._type = ExpressionType.SYMBOL 156 | self._value = self.pb_expr.symbol 157 | 158 | else: 159 | logger.error(f"Malformed protobuf message. Invalid expression type {self.pb_expr.type}") 160 | -------------------------------------------------------------------------------- /src/binexport/function.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import weakref 3 | import networkx 4 | from functools import cached_property 5 | from typing import TYPE_CHECKING 6 | 7 | from binexport.utils import get_basic_block_addr, logger 8 | from binexport.basic_block import BasicBlockBinExport 9 | from binexport.types import FunctionType 10 | 11 | if TYPE_CHECKING: 12 | from collections import abc 13 | from binexport.program import ProgramBinExport 14 | from binexport.binexport2_pb2 import BinExport2 15 | from binexport.types import Addr 16 | 17 | 18 | class FunctionBinExport: 19 | """ 20 | Function object. 21 | Also references its parents and children (function it calls). 22 | """ 23 | 24 | def __init__( 25 | self, 26 | program: weakref.ref[ProgramBinExport], 27 | *, 28 | pb_fun: BinExport2.FlowGraph | None = None, 29 | is_import: bool = False, 30 | addr: Addr | None = None, 31 | ): 32 | """ 33 | Constructor. Iterates the FlowGraph structure and initialize all the 34 | basic blocks and instruction accordingly. 35 | 36 | :param program: weak reference to program (used to navigate pb fields contained inside) 37 | :param pb_fun: FlowGraph protobuf structure 38 | :param is_import: whether or not it's an import function (if so does not initialize bb etc..) 39 | :param addr: address of the function (info avalaible in the call graph) 40 | """ 41 | super(FunctionBinExport, self).__init__() 42 | 43 | self.addr: Addr | None = addr #: address, None if imported function 44 | self.parents: set[FunctionBinExport] = set() #: set of function call this one 45 | self.children: set[FunctionBinExport] = set() #: set of functions called by this one 46 | 47 | # Private attributes 48 | self._graph = None # CFG. Loaded inside self.blocks 49 | self._type = None # Set by the Program constructor 50 | self._name = None # Set by the Program constructor 51 | self._program = program 52 | self._pb_fun = pb_fun 53 | self._enable_unloading = False 54 | self._basic_blocks = None 55 | 56 | if is_import: 57 | if self.addr is None: 58 | logger.error("Missing function address for imported function") 59 | return 60 | 61 | assert pb_fun is not None, "pb_fun must be provided" 62 | 63 | self.addr = get_basic_block_addr(self.program.proto, pb_fun.entry_basic_block_index) 64 | 65 | def __hash__(self) -> int: 66 | """ 67 | Make function hashable to be able to store them in sets (for parents, children) 68 | 69 | :return: address of the function 70 | """ 71 | return hash(self.addr) 72 | 73 | def __repr__(self) -> str: 74 | return "<%s: 0x%x>" % (type(self).__name__, self.addr) 75 | 76 | def __enter__(self) -> None: 77 | """Preload basic blocks and don't deallocate them until __exit__ is called""" 78 | 79 | self._enable_unloading = False 80 | self.preload() 81 | 82 | def __exit__(self, exc_type, exc_value, traceback) -> None: 83 | """Deallocate all the basic blocks""" 84 | 85 | self._enable_unloading = True 86 | self.unload() 87 | 88 | def preload(self) -> None: 89 | """Load in memory all the basic blocks""" 90 | 91 | self._basic_blocks = self.blocks 92 | 93 | def unload(self) -> None: 94 | """Unload from memory all the basic blocks""" 95 | 96 | if self._enable_unloading: 97 | self._basic_blocks = None 98 | 99 | def items(self) -> abc.ItemsView[Addr, BasicBlockBinExport]: 100 | """ 101 | Each function is associated to a dictionary with key-value 102 | Addr->BasicBlockBinExport. This returns items of the dictionary. 103 | """ 104 | return self.blocks.items() 105 | 106 | def keys(self) -> abc.KeysView[Addr]: 107 | """ 108 | Each function is associated to a dictionary with key-value : Addr, BasicBlockBinExport. This returns items 109 | of the dictionary 110 | """ 111 | return self.blocks.keys() 112 | 113 | def values(self) -> abc.ValuesView[BasicBlockBinExport]: 114 | """ 115 | Each function is associated to a dictionary with key-value : Addr, BasicBlockBinExport. This returns items 116 | of the dictionary. 117 | """ 118 | return self.blocks.values() 119 | 120 | def __getitem__(self, item: Addr) -> BasicBlockBinExport: 121 | """ 122 | Get a basic block object from its address. 123 | 124 | :param item: address 125 | :return: Basic block object 126 | """ 127 | return self.blocks[item] 128 | 129 | def __contains__(self, item: Addr) -> bool: 130 | """ 131 | Return if the address given correspond to a basic block head. 132 | 133 | :param item: basic block address 134 | :return: true if basic block address into this function 135 | """ 136 | return item in self.blocks 137 | 138 | @property 139 | def program(self) -> ProgramBinExport: 140 | """ 141 | :py:class:`ProgramBinExport` in which this function belongs to. 142 | """ 143 | return self._program() 144 | 145 | @property 146 | def blocks(self) -> dict[Addr, BasicBlockBinExport]: 147 | """ 148 | Returns a dict which is used to reference all basic blocks by their address. 149 | Calling this function will also load the CFG. 150 | By default the object returned is not cached, calling this function multiple times will 151 | create the same object multiple times. If you want to cache the object you 152 | should use the context manager of the function or calling the function `FunctionBinExport.load`. 153 | Ex: 154 | 155 | .. code-block:: python 156 | :linenos: 157 | 158 | # func: FunctionBinExport 159 | with func: # Loading all the basic blocks 160 | for bb_addr, bb in func.blocks.items(): # Blocks are already loaded 161 | pass 162 | # The blocks are still loaded 163 | for bb_addr, bb in func.blocks.items(): 164 | pass 165 | # here the blocks have been unloaded 166 | 167 | :return: dictionary of addresses to basic blocks 168 | """ 169 | 170 | # Check if the blocks are already loaded 171 | if self._basic_blocks is not None: 172 | return self._basic_blocks 173 | 174 | # Fast return if it is a imported function 175 | if self.is_import(): 176 | if self._graph is None: 177 | self._graph = networkx.DiGraph() 178 | return {} 179 | 180 | # Add a sanity check to prevent error, for some reason _pb_fun may be undefined 181 | if not self._pb_fun: 182 | return {} 183 | 184 | bblocks = {} # {addr : BasicBlockBinExport} 185 | load_graph = False 186 | if self._graph is None: 187 | self._graph = networkx.DiGraph() 188 | load_graph = True 189 | 190 | # Load the basic blocks 191 | bb_i2a = {} # Map {basic block index -> basic block address} 192 | for bb_idx in self._pb_fun.basic_block_index: 193 | basic_block = BasicBlockBinExport( 194 | self._program, weakref.ref(self), self.program.proto.basic_block[bb_idx] 195 | ) 196 | 197 | if basic_block.addr in bblocks: 198 | logger.error( 199 | f"0x{self.addr:x} basic block address (0x{basic_block.addr:x}) already in(idx:{bb_idx})" 200 | ) 201 | 202 | bblocks[basic_block.addr] = basic_block 203 | bb_i2a[bb_idx] = basic_block.addr 204 | if load_graph: 205 | self._graph.add_node(basic_block.addr) 206 | 207 | # Load the edges between blocks 208 | if load_graph: 209 | for edge in self._pb_fun.edge: 210 | # Source will always be in a basic block 211 | bb_src = bb_i2a[edge.source_basic_block_index] 212 | 213 | # Target might be a different function and not a basic block. 214 | # e.g. in case of a jmp to another function (or a `bl` in ARM) 215 | if edge.target_basic_block_index not in bb_i2a: 216 | continue 217 | 218 | bb_dst = bb_i2a[edge.target_basic_block_index] 219 | self._graph.add_edge(bb_src, bb_dst) 220 | 221 | return bblocks 222 | 223 | @property 224 | def graph(self) -> networkx.DiGraph: 225 | """ 226 | The networkx CFG associated to the function. 227 | """ 228 | if self._graph is None: 229 | _ = self.blocks # Load the CFG 230 | return self._graph 231 | 232 | @property 233 | def name(self) -> str: 234 | """ 235 | Name of the function if it exists otherwise like IDA with sub_XXX 236 | """ 237 | return self._name if self._name else "sub_%X" % self.addr 238 | 239 | @name.setter 240 | def name(self, name: str) -> None: 241 | """ 242 | Function name setter (available in the call graph of the pb object) 243 | 244 | :param name: name to give the function 245 | :return: None 246 | """ 247 | 248 | self._name = name 249 | 250 | @property 251 | def type(self) -> FunctionType: 252 | """ 253 | Type of the function as a FunctionType 254 | 255 | :return: type enum of the function 256 | """ 257 | return self._type 258 | 259 | @type.setter 260 | def type(self, value: FunctionType) -> None: 261 | """ 262 | Set the type of the function. 263 | 264 | :param value: type enum to give the function 265 | """ 266 | 267 | self._type = value 268 | 269 | def is_import(self) -> bool: 270 | """ 271 | Returns whether or not the function is an import 272 | """ 273 | return self.type == FunctionType.IMPORTED 274 | -------------------------------------------------------------------------------- /src/binexport/instruction.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import weakref 3 | from functools import cached_property 4 | from typing import TYPE_CHECKING 5 | 6 | from binexport.operand import OperandBinExport 7 | 8 | if TYPE_CHECKING: 9 | from .program import ProgramBinExport 10 | from .function import FunctionBinExport 11 | from .binexport2_pb2 import BinExport2 12 | from binexport.types import Addr 13 | 14 | 15 | class InstructionBinExport: 16 | """ 17 | Instruction class. It represents an instruction with its operands. 18 | """ 19 | 20 | def __init__( 21 | self, 22 | program: weakref.ref[ProgramBinExport], 23 | function: weakref.ref[FunctionBinExport], 24 | addr: Addr, 25 | i_idx: int, 26 | ): 27 | """ 28 | :param program: Weak reference to the program 29 | :param function: Weak reference to the function 30 | :param addr: address of the instruction (computed outside) 31 | :param i_idx: instruction index in the protobuf data structure 32 | """ 33 | self.addr: Addr = addr #: instruction address 34 | self._program = program 35 | self._function = function 36 | self._idx = i_idx 37 | self.data_refs: set[Addr] = self.program.data_refs[self._idx] #: Data references address 38 | self.bytes = self.pb_instr.raw_bytes #: bytes of the instruction (opcodes) 39 | self.disasm = f"{self.mnemonic} {', '.join(str(o) for o in self.operands)}" 40 | 41 | def __hash__(self) -> int: 42 | return hash(self.addr) 43 | 44 | def __str__(self) -> str: 45 | return "%s %s" % (self.mnemonic, ", ".join(str(o) for o in self.operands)) 46 | 47 | def __repr__(self) -> str: 48 | return f"<{type(self).__name__} {self.addr:#08x}: {self.mnemonic} {', '.join(str(x) for x in self.operands)}>" 49 | 50 | @property 51 | def program(self) -> ProgramBinExport: 52 | """ 53 | Program associated with this instruction. 54 | """ 55 | return self._program() 56 | 57 | @property 58 | def pb_instr(self) -> BinExport2.Instruction: 59 | """ 60 | Protobuf instruction object. 61 | """ 62 | return self.program.proto.instruction[self._idx] 63 | 64 | @property 65 | def mnemonic(self) -> str: 66 | """ 67 | Mnemonic string as gathered by binexport (with prefix). 68 | """ 69 | return self.program.proto.mnemonic[self.pb_instr.mnemonic_index].name 70 | 71 | @cached_property 72 | def operands(self) -> list[OperandBinExport]: 73 | """ 74 | Returns a list of the operands instanciated dynamically on-demand. 75 | The list is cached by default, to erase the cache delete the attribute. 76 | 77 | :return: list of operands 78 | """ 79 | 80 | return [ 81 | OperandBinExport(self._program, self._function, weakref.ref(self), op_idx) 82 | for op_idx in self.pb_instr.operand_index 83 | ] 84 | -------------------------------------------------------------------------------- /src/binexport/operand.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | from functools import cached_property 3 | from typing import TYPE_CHECKING 4 | 5 | from binexport.expression import ExpressionBinExport 6 | from binexport.types import ExpressionType 7 | 8 | if TYPE_CHECKING: 9 | import weakref 10 | from .program import ProgramBinExport 11 | from .function import FunctionBinExport 12 | from .instruction import InstructionBinExport 13 | from .binexport2_pb2 import BinExport2 14 | 15 | 16 | class OperandBinExport: 17 | """ 18 | Operand object. 19 | Provide access to the underlying expression. 20 | """ 21 | 22 | def __init__( 23 | self, 24 | program: weakref.ref[ProgramBinExport], 25 | function: weakref.ref[FunctionBinExport], 26 | instruction: weakref.ref[InstructionBinExport], 27 | op_idx: int, 28 | ): 29 | """ 30 | :param program: Weak reference to the program 31 | :param function: Weak reference to the function 32 | :param instruction: Weak reference to the instruction 33 | :param op_idx: operand index in protobuf structure 34 | """ 35 | self._program = program 36 | self._function = function 37 | self._instruction = instruction 38 | self._idx = op_idx 39 | 40 | def __str__(self) -> str: 41 | """ 42 | Formatted string of the operand (shown in-order) 43 | 44 | :return: string of the operand 45 | """ 46 | 47 | class Tree: 48 | def __init__(self, expr: ExpressionBinExport): 49 | self.children = [] 50 | self.expr = expr 51 | 52 | def __str__(self) -> str: 53 | if len(self.children) == 2: # Binary operator 54 | left = str(self.children[0]) 55 | right = str(self.children[1]) 56 | return f"{left}{self.expr.value}{right}" 57 | 58 | inv = {"{": "}", "[": "]", "!": ""} 59 | final_s = "" 60 | 61 | if self.expr.type != ExpressionType.SIZE: # Ignore SIZE 62 | if isinstance(self.expr.value, int): 63 | final_s += hex(self.expr.value) 64 | else: 65 | final_s += str(self.expr.value) 66 | 67 | final_s += ",".join(str(child) for child in self.children) 68 | 69 | if self.expr.type == ExpressionType.SYMBOL and self.expr.value in inv: 70 | final_s += inv[self.expr.value] 71 | 72 | return final_s 73 | 74 | tree = {} 75 | for expr in self.expressions: 76 | tree[expr] = Tree(expr) 77 | if expr.parent: 78 | tree[expr.parent].children.append(tree[expr]) 79 | else: 80 | root = expr 81 | if tree: 82 | return str(tree[root]) 83 | else: 84 | return "" 85 | 86 | def __repr__(self) -> str: 87 | return f"<{type(self).__name__} {str(self)}>" 88 | 89 | @property 90 | def program(self) -> ProgramBinExport: 91 | """ 92 | Program object associated to this operand. 93 | """ 94 | return self._program() 95 | 96 | @property 97 | def function(self) -> FunctionBinExport: 98 | """ 99 | Function object associated to this operand. 100 | """ 101 | 102 | return self._function() 103 | 104 | @property 105 | def instruction(self) -> InstructionBinExport: 106 | """ 107 | Instruction object associated to this operand. 108 | """ 109 | return self._instruction() 110 | 111 | @property 112 | def pb_operand(self) -> BinExport2.Operand: 113 | """ 114 | Protobuf operand object in the protobuf structure. 115 | """ 116 | return self.program.proto.operand[self._idx] 117 | 118 | @cached_property 119 | def expressions(self) -> list[ExpressionBinExport]: 120 | """ 121 | Iterates over all the operand expression in a pre-order manner 122 | (binary operator first). 123 | The list is cached by default, to erase the cache delete the attribute 124 | 125 | :return: list of expressions 126 | """ 127 | 128 | expr_dict = {} # {expression protobuf idx : ExpressionBinExport} 129 | for exp_idx in self.pb_operand.expression_index: 130 | parent = None 131 | if self.program.proto.expression[exp_idx].HasField("parent_index"): 132 | parent = expr_dict[self.program.proto.expression[exp_idx].parent_index] 133 | expr_dict[exp_idx] = ExpressionBinExport( 134 | self.program, self.function, self.instruction, exp_idx, parent 135 | ) 136 | return list(expr_dict.values()) 137 | -------------------------------------------------------------------------------- /src/binexport/program.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import os 3 | import pathlib 4 | import networkx 5 | import weakref 6 | from textwrap import dedent 7 | from collections import defaultdict 8 | from tempfile import TemporaryDirectory 9 | from subprocess import run, PIPE, DEVNULL 10 | from typing import TYPE_CHECKING 11 | 12 | from binexport.binexport2_pb2 import BinExport2 13 | from binexport.function import FunctionBinExport 14 | from binexport.types import FunctionType, DisassemblerBackend 15 | from binexport.utils import logger 16 | 17 | if TYPE_CHECKING: 18 | from binexport.types import Addr 19 | 20 | 21 | class ProgramBinExport(dict): 22 | """ 23 | Program class that wraps the binexport with high-level functions 24 | and an easy to use API. It inherits from a dict which is used to 25 | reference all functions based on their address. 26 | """ 27 | 28 | def __init__(self, file: pathlib.Path | str): 29 | """ 30 | :param file: BinExport file path 31 | """ 32 | super(ProgramBinExport, self).__init__() 33 | 34 | self._pb = BinExport2() 35 | 36 | self.path: pathlib.Path = pathlib.Path(file) #: Binexport file path 37 | 38 | with open(file, "rb") as f: 39 | self._pb.ParseFromString(f.read()) 40 | self.mask = 0xFFFFFFFF if self.architecture.endswith("32") else 0xFFFFFFFFFFFFFFFF 41 | self.fun_names: dict[str, FunctionBinExport] = {} #: dictionary function name -> name 42 | self.callgraph: networkx.DiGraph = networkx.DiGraph() #: program callgraph (as Digraph) 43 | 44 | # Make the data refs map {instruction index -> address referred} 45 | # dictionary of instruction index to set of refs 46 | self.data_refs: dict[int, set[Addr]] = defaultdict(set) 47 | for entry in self.proto.data_reference: 48 | self.data_refs[entry.instruction_index].add(entry.address) 49 | 50 | # Make the address comment (deprecated) 51 | self.addr_refs = {} 52 | for entry in self.proto.address_comment[::-1]: 53 | if entry.instruction_index in self.addr_refs: 54 | self.addr_refs[entry.instruction_index].append( 55 | self.proto.string_table[entry.string_table_index] 56 | ) 57 | else: 58 | self.addr_refs[entry.instruction_index] = [ 59 | self.proto.string_table[entry.string_table_index] 60 | ] 61 | 62 | # Make the string reference 63 | self.string_refs = {} 64 | for entry in self.proto.string_reference: 65 | self.string_refs[entry.instruction_index] = entry.string_table_index 66 | 67 | count_f = 0 68 | coll = 0 69 | # Load all the functions 70 | for i, pb_fun in enumerate(self.proto.flow_graph): 71 | f = FunctionBinExport(weakref.ref(self), pb_fun=pb_fun) 72 | if f.addr in self: 73 | logger.error(f"Address collision for 0x{f.addr:x}") 74 | coll += 1 75 | self[f.addr] = f 76 | count_f += 1 77 | 78 | count_imp = 0 79 | # Load the callgraph 80 | cg = self.proto.call_graph 81 | for node in cg.vertex: 82 | if node.address not in self and node.type == cg.Vertex.IMPORTED: 83 | self[node.address] = FunctionBinExport( 84 | weakref.ref(self), is_import=True, addr=node.address 85 | ) 86 | count_imp += 1 87 | if node.address not in self: 88 | logger.error(f"Missing function address: 0x{node.address:x} ({node.type})") 89 | continue 90 | 91 | self[node.address].type = FunctionType.from_proto(node.type) 92 | if node.demangled_name: 93 | self[node.address].name = node.demangled_name 94 | elif node.mangled_name: 95 | self[node.address].name = node.mangled_name 96 | 97 | for edge in cg.edge: 98 | src = cg.vertex[edge.source_vertex_index].address 99 | dst = cg.vertex[edge.target_vertex_index].address 100 | # Unsure that both src and dst exists (Sometimes SRE like Ghidra export function that doesn't exists) 101 | if src in self and dst in self: 102 | self.callgraph.add_edge(src, dst) 103 | self[src].children.add(self[dst]) 104 | self[dst].parents.add(self[src]) 105 | 106 | # Create a map of function names for quick lookup later on 107 | for f in self.values(): 108 | self.fun_names[f.name] = f 109 | 110 | logger.debug( 111 | f"total all:{count_f}, imported:{count_imp} collision:{coll} (total:{count_f + count_imp + coll})" 112 | ) 113 | 114 | def __repr__(self) -> str: 115 | return f"<{type(self).__name__}:{self.name}>" 116 | 117 | @staticmethod 118 | def open(export_file: pathlib.Path | str) -> ProgramBinExport: 119 | """ 120 | Open a BinExport file and return an instance of ProgramBinExport. 121 | 122 | :param export_file: BinExport file path 123 | :return: an instance of ProgramBinExport 124 | """ 125 | return ProgramBinExport(export_file) 126 | 127 | @staticmethod 128 | def from_binary_file( 129 | exec_file: pathlib.Path | str, 130 | output_file: str | pathlib.Path = "", 131 | open_export: bool = True, 132 | override: bool = False, 133 | backend: DisassemblerBackend = DisassemblerBackend.IDA, 134 | ) -> ProgramBinExport | bool: 135 | """ 136 | DEPRECATED: Use `ProgramBinExport.from_binary` instead.""" 137 | if not open_export: 138 | export_path = ProgramBinExport.generate( 139 | exec_file=exec_file, 140 | output_file=output_file, 141 | override=override, 142 | backend=backend, 143 | timeout=600, 144 | ) 145 | return export_path.exists() 146 | else: 147 | ProgramBinExport.from_binary(exec_file=exec_file, output_file=output_file, 148 | open_export=open_export, override=override, backend=backend) 149 | 150 | @staticmethod 151 | def from_binary( 152 | exec_file: pathlib.Path | str, 153 | output_file: str | pathlib.Path = "", 154 | override: bool = False, 155 | backend: DisassemblerBackend = DisassemblerBackend.IDA, 156 | timeout: int = 600, 157 | ) -> ProgramBinExport: 158 | """ 159 | Generate the .BinExport file for the given program and return an instance 160 | of ProgramBinExport. 161 | 162 | .. warning:: That function requires the module ``idascript`` 163 | 164 | :param exec_file: executable file path 165 | :param output_file: BinExport output file 166 | :param open_export: whether or not to open the binexport after export 167 | :param override: Override the .BinExport if already existing. (default false) 168 | :param backend: The backend to use. (Either 'IDA' or 'Ghidra') 169 | :return: an instance of ProgramBinExport if open_export is true, else boolean 170 | on whether it succeeded 171 | """ 172 | binexport_file = ProgramBinExport.generate( 173 | exec_file=exec_file, 174 | output_file=output_file, 175 | override=override, 176 | backend=backend, 177 | timeout=timeout, 178 | ) 179 | 180 | # In theory if reach here export file exists otherwise an exception has been raised 181 | if binexport_file.exists(): 182 | return ProgramBinExport.open(binexport_file) 183 | else: 184 | raise FileNotFoundError(f"Cannot open BinExport it does not exists: {binexport_file}") 185 | 186 | 187 | @staticmethod 188 | def generate( 189 | exec_file: pathlib.Path | str, 190 | output_file: str | pathlib.Path = "", 191 | override: bool = False, 192 | backend: DisassemblerBackend = DisassemblerBackend.IDA, 193 | timeout: int = 600, 194 | ) -> pathlib.Path: 195 | 196 | exec_file = pathlib.Path(exec_file) 197 | binexport_file = ( 198 | pathlib.Path(output_file) 199 | if output_file 200 | else pathlib.Path(str(exec_file) + ".BinExport") 201 | ) 202 | 203 | # If the binexport file already exists, do not want to override just return 204 | if binexport_file.exists(): 205 | if not override: 206 | return binexport_file 207 | else: 208 | binexport_file.unlink(missing_ok=False) # Remove file 209 | 210 | # Regenerate it! 211 | if backend == DisassemblerBackend.IDA: 212 | res = ProgramBinExport._from_ida(exec_file, binexport_file, timeout) 213 | elif backend == DisassemblerBackend.GHIDRA: 214 | res = ProgramBinExport._from_ghidra(exec_file, binexport_file, timeout) 215 | elif backend == DisassemblerBackend.BINARY_NINJA: 216 | res = ProgramBinExport._from_binary_ninja(exec_file, binexport_file, timeout) 217 | else: 218 | logger.error(f"Invalid backend '{backend}'") 219 | res = False 220 | 221 | # Check that it has properly been generated at the end 222 | match res, binexport_file.exists(): 223 | case True, True: 224 | return binexport_file # Successful! 225 | case False, True: 226 | logger.error(f"Disassembler failed but BinExport file generated: {binexport_file}") 227 | return binexport_file 228 | case True, False: 229 | raise FileNotFoundError(f"Disassembler succeeded but BinExport file missing: {binexport_file}") 230 | case False, False: 231 | raise FileNotFoundError(f"Disassembler {backend.name} failed, BinExport file missing: {binexport_file}") 232 | 233 | @staticmethod 234 | def _from_ida( 235 | exec_file: pathlib.Path, 236 | binexport_file: pathlib.Path, 237 | timeout: int = 600, 238 | ) -> bool: 239 | """ 240 | Generate the .BinExport file for the given program and return an instance 241 | of ProgramBinExport. 242 | 243 | .. warning:: That function requires the module ``idascript`` 244 | 245 | :param exec_file: executable file path 246 | :param binexport_file: BinExport output file 247 | :param timeout: Export timeout in seconds 248 | :return: whether it succeeded or not 249 | """ 250 | from idascript import IDA 251 | 252 | ida = IDA( 253 | exec_file, 254 | script_file=None, 255 | script_params=[ 256 | "BinExportAutoAction:BinExportBinary", 257 | f"BinExportModule:{binexport_file}", 258 | ], 259 | timeout=timeout, 260 | ) 261 | ida.start() 262 | retcode = ida.wait() 263 | 264 | if retcode == IDA.TIMEOUT_RETURNCODE: 265 | logger.warning(f"{exec_file.name} timed out after {timeout} seconds") 266 | return False 267 | 268 | return retcode == 0 269 | 270 | @staticmethod 271 | def _from_binary_ninja( 272 | exec_file: pathlib.Path, 273 | binexport_file: pathlib.Path, 274 | timeout: int = 600, 275 | ) -> bool: 276 | """ 277 | Generate the .BinExport file for the given program and return an instance 278 | of ProgramBinExport. 279 | 280 | .. warning:: That function requires the module ``binaryninja`` 281 | 282 | :param exec_file: executable file path 283 | :param binexport_file: BinExport output file 284 | :return: whether it succeeded or not 285 | """ 286 | try: 287 | import binaryninja 288 | except ModuleNotFoundError as e: 289 | logging.error("Cannot find module python `binaryninja`. Try running BINARY_NINJA_PATH/scripts/install_api.py") 290 | return False 291 | 292 | try: 293 | bv = binaryninja.load(exec_file) 294 | except Exception as err: 295 | logger.warning(f'Failed to analyze {exec_file}: {err}') 296 | return False 297 | 298 | cmd = next(filter(lambda cmd: cmd.name == "BinExport", binaryninja.PluginCommand), None) 299 | if not cmd: 300 | logger.warning(f'BinExport not installed') 301 | return False 302 | 303 | ctx = binaryninja.PluginCommandContext(bv) 304 | cmd.execute(ctx) 305 | 306 | # FIXME: How to obtain the return value? 307 | return True 308 | 309 | @staticmethod 310 | def _from_ghidra( 311 | exec_file: pathlib.Path, 312 | binexport_file: pathlib.Path, 313 | timeout: int = 600, 314 | ) -> bool: 315 | """ 316 | Generate the .BinExport file for the given program and return an instance 317 | of ProgramBinExport. 318 | 319 | .. warning:: That function requires Ghidra to be installed 320 | 321 | :param exec_file: executable file path 322 | :param binexport_file: BinExport output file 323 | :return: whether it succeeded or not 324 | """ 325 | 326 | # Check if the GHIDRA_PATH environment variable is set 327 | ghidra_dir = os.environ.get("GHIDRA_PATH") 328 | if not ghidra_dir: 329 | logger.error( 330 | "The 'GHIDRA_PATH' environment variable is not set. Please define it to proceed." 331 | ) 332 | return False 333 | 334 | # Check if the GHIDRA_PATH dir exists 335 | ghidra_dir = pathlib.Path(ghidra_dir) 336 | if not ghidra_dir.exists() or not ghidra_dir.is_dir(): 337 | logger.error(f"The path specified in 'GHIDRA_PATH' does not exist: {ghidra_dir}") 338 | return False 339 | 340 | # Small script to do the binexport 341 | ghidra_script = dedent( 342 | f""" 343 | from java.io import File 344 | try: 345 | from com.google.security.binexport import BinExportExporter 346 | except ImportError: 347 | print("BinExport plugin is not installed") 348 | exit() 349 | 350 | exporter = BinExportExporter() #Binary BinExport (v2) for BinDiff 351 | exporter.export(File("{binexport_file.absolute()}"), currentProgram, currentProgram.getMemory(), monitor) 352 | """ 353 | ) 354 | 355 | # Do everything in a TemporaryDirectory to avoid polluting the user filesystem 356 | with TemporaryDirectory() as tmpdirname: 357 | tmpdir = pathlib.Path(tmpdirname) 358 | ghidra_script_path = tmpdir / "BinExportGhidraScript.py" 359 | with open(ghidra_script_path, "w") as fp: 360 | fp.write(ghidra_script) 361 | 362 | proc = run( 363 | [ 364 | str(ghidra_dir / "support" / "analyzeHeadless"), 365 | tmpdirname, 366 | "tmpproj", 367 | "-scriptPath", 368 | tmpdirname, 369 | "-postScript", 370 | str(ghidra_script_path), 371 | "-import", 372 | str(exec_file.absolute()), 373 | ], 374 | stdout=PIPE, 375 | stderr=DEVNULL, 376 | timeout=timeout, 377 | ) 378 | 379 | if proc.returncode != 0: 380 | logger.warning( 381 | f"{exec_file.name} failed to export [ret:{proc.returncode}, binexport:{binexport_file.exists()}]" 382 | ) 383 | return False 384 | 385 | elif b"BinExport plugin is not installed" in proc.stdout: 386 | # Using exit(code) inside ghidra script do not propagate so we need to search through 387 | # the script output to detect an error 388 | logger.warning("BinExport plugin not found, please install it!") 389 | return False 390 | 391 | # if reach here assume running went fine. 392 | return True 393 | 394 | @property 395 | def proto(self) -> BinExport2: 396 | """ 397 | Returns the protobuf object associated to the program 398 | """ 399 | return self._pb 400 | 401 | @property 402 | def name(self) -> str: 403 | """ 404 | Return the name of the program (as exported by binexport) 405 | """ 406 | return self.proto.meta_information.executable_name 407 | 408 | @property 409 | def architecture(self) -> str: 410 | """ 411 | Returns the architecture suffixed with address size ex: x86_64, x86_32 412 | """ 413 | 414 | return self.proto.meta_information.architecture_name 415 | -------------------------------------------------------------------------------- /src/binexport/types.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import enum 3 | import enum_tools.documentation 4 | from typing import TypeAlias 5 | 6 | from binexport.binexport2_pb2 import BinExport2 7 | 8 | 9 | Addr: TypeAlias = int 10 | """An integer representing an address within a program""" 11 | 12 | 13 | @enum_tools.documentation.document_enum 14 | class FunctionType(enum.Enum): 15 | """ 16 | Function types as defined by IDA 17 | """ 18 | 19 | # fmt: off 20 | NORMAL = enum.auto() # doc: Normal function 21 | LIBRARY = enum.auto() # doc: library function 22 | IMPORTED = enum.auto() # doc: imported function (don't have content) 23 | THUNK = enum.auto() # doc: thunk function (trampoline to another function) 24 | INVALID = enum.auto() # doc: invalid function (as computed by IDA) 25 | # fmt: on 26 | 27 | @staticmethod 28 | def from_proto(function_type: BinExport2.CallGraph.Vertex.Type) -> FunctionType: 29 | mapping = { 30 | BinExport2.CallGraph.Vertex.Type.NORMAL: FunctionType.NORMAL, 31 | BinExport2.CallGraph.Vertex.Type.LIBRARY: FunctionType.LIBRARY, 32 | BinExport2.CallGraph.Vertex.Type.IMPORTED: FunctionType.IMPORTED, 33 | BinExport2.CallGraph.Vertex.Type.THUNK: FunctionType.THUNK, 34 | BinExport2.CallGraph.Vertex.Type.INVALID: FunctionType.INVALID, 35 | } 36 | 37 | return mapping.get(function_type, FunctionType.INVALID) 38 | 39 | 40 | @enum_tools.documentation.document_enum 41 | class ExpressionType(enum.Enum): 42 | """ 43 | Expression type derived from protobuf expression types. 44 | """ 45 | 46 | # fmt: off 47 | FUNC_NAME = enum.auto() # doc: function name 48 | VAR_NAME = enum.auto() # doc: variable name 49 | IMMEDIATE_INT = enum.auto() # doc: immediate value 50 | IMMEDIATE_FLOAT = enum.auto() # doc: float expression 51 | SYMBOL = enum.auto() # doc: symbol expression 52 | REGISTER = enum.auto() # doc: register expression 53 | SIZE = enum.auto() # doc: size expression (byte, dword ..) 54 | # fmt: on 55 | 56 | 57 | @enum_tools.documentation.document_enum 58 | class DisassemblerBackend(enum.Enum): 59 | """ 60 | List of dissasemblers supported. 61 | """ 62 | 63 | # fmt: off 64 | IDA = enum.auto() # doc: IDA backend 65 | GHIDRA = enum.auto() # doc: Ghidra backend 66 | BINARY_NINJA = enum.auto() # doc: BinaryNinja backend 67 | # fmt: on 68 | -------------------------------------------------------------------------------- /src/binexport/utils.py: -------------------------------------------------------------------------------- 1 | from __future__ import annotations 2 | import logging 3 | from collections.abc import Iterator 4 | from typing import TYPE_CHECKING 5 | 6 | if TYPE_CHECKING: 7 | from binexport.binexport2_pb2 import Binexport2 8 | from binexport.types import Addr 9 | 10 | 11 | def get_instruction_address(pb: "BinExport2", inst_idx: int) -> Addr: 12 | """ 13 | Low level binexport protobuf function to return the address of an instruction 14 | given its index in the protobuf. 15 | 16 | :param pb: binexport protobuf object 17 | :param inst_idx: index of the instruction 18 | :return: address of the instruction 19 | """ 20 | 21 | inst = pb.instruction[inst_idx] 22 | if inst.HasField("address"): 23 | return inst.address 24 | else: 25 | return backtrack_instruction_address(pb, inst_idx) 26 | 27 | 28 | def backtrack_instruction_address(pb: BinExport2, idx: int) -> int: 29 | """ 30 | Low level function to backtrack the instruction array for instruction that 31 | does not have the address field set 32 | 33 | :param pb: binexport protobuf object 34 | :param idx: index of the instruction 35 | :return: address of the instruction 36 | """ 37 | 38 | tmp_sz = 0 39 | tmp_idx = idx 40 | if tmp_idx == 0: 41 | return pb.instruction[tmp_idx].address 42 | while True: 43 | tmp_idx -= 1 44 | tmp_sz += len(pb.instruction[tmp_idx].raw_bytes) 45 | if pb.instruction[tmp_idx].HasField("address"): 46 | break 47 | return pb.instruction[tmp_idx].address + tmp_sz 48 | 49 | 50 | def get_basic_block_addr(pb: BinExport2, bb_idx: int) -> Addr: 51 | """ 52 | Low level function to retrieve the basic block address from its index. 53 | The function takes the first instruction of the basic block and retrieve 54 | its address. 55 | 56 | :param pb: binexport protobuf object 57 | :param bb_idx: index of the basic block 58 | :return: address of the basic block in the program 59 | """ 60 | 61 | inst = pb.basic_block[bb_idx].instruction_index[0].begin_index 62 | return get_instruction_address(pb, inst) 63 | 64 | 65 | def instruction_index_range(rng: Binexport2.BasicBlock.IndexRange) -> Iterator[int]: 66 | """ 67 | Low level function to iterate over the indices of a protobuf IndexRange. 68 | 69 | :param rng: binexport IndexRange object 70 | :return: iterator over the indices 71 | """ 72 | return range(rng.begin_index, (rng.end_index if rng.end_index else rng.begin_index + 1)) 73 | 74 | 75 | # Main logger object 76 | logger = logging.getLogger("python-binexport") 77 | --------------------------------------------------------------------------------