├── .gitignore ├── LICENSE ├── README.md ├── __init__.py ├── bnpyc ├── __init__.py ├── decompiler.py ├── disassembler.py ├── lifting.py ├── objects.py ├── pycview.py └── python.py ├── images ├── pycview1.png ├── pycview_functions.png └── pycview_objects.png ├── plugin.json └── requirements.txt /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Njörd 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ### BNPyc 2 | 3 | ## Binary ninja plugin for python bytecode (pyc) disassembly and analysis. 4 | 5 | Python versions from 3.0 to 3.10 are supported! 6 | > IL Lifting is not implemented yet, as a result ILs are not available. 7 | 8 | ## Installation 9 | 10 | Clone this repository into BinaryNinja plugin folder and install requirements with pip : 11 | 12 | ```shell 13 | cd BNPyc/ 14 | python3 -m pip install -r requirements.txt 15 | ``` 16 | 17 | ## Usage 18 | 19 | Choose any `.pyc` file and open it with binary ninja. 20 | 21 | ![](images/pycview1.png) 22 | Example with a simple for loop 23 | 24 | ## Features 25 | 26 | - Recursive functions detections and disassembly 27 | - Branchs annotations 28 | - Comparisons annotations 29 | - Inlined `co_consts` `co_names` `co_varnames` 30 | - Objects mapping 31 | -------------------------------------------------------------------------------- /__init__.py: -------------------------------------------------------------------------------- 1 | from . import bnpyc -------------------------------------------------------------------------------- /bnpyc/__init__.py: -------------------------------------------------------------------------------- 1 | from binaryninjaui import Menu, UIAction, UIActionHandler 2 | 3 | from .python import * 4 | from .pycview import PycView 5 | from .decompiler import DecompilerWidget 6 | 7 | Python.register() 8 | Python35.register() 9 | Python34.register() 10 | Python33.register() 11 | Python32.register() 12 | Python31.register() 13 | Python30.register() 14 | 15 | PycView.register() 16 | 17 | UIAction.registerAction('BNPyc decompiler') 18 | UIActionHandler.globalActions().bindAction('BNPyc decompiler', UIAction(DecompilerWidget.create_widget, DecompilerWidget.can_create_widget)) 19 | Menu.mainMenu('Tools').addAction('BNPyc decompiler', 'BNPyc decompiler') -------------------------------------------------------------------------------- /bnpyc/decompiler.py: -------------------------------------------------------------------------------- 1 | from binaryninja import BinaryView 2 | from binaryninjaui import WidgetPane 3 | 4 | 5 | from PySide6.QtCore import Qt 6 | from PySide6.QtWidgets import QHBoxLayout, QLabel, QTextEdit, QVBoxLayout, QWidget, QComboBox, QMessageBox 7 | from PySide6.QtGui import QColor, QPalette 8 | 9 | from pygments.lexers import PythonLexer 10 | from pygments.formatters import HtmlFormatter 11 | from pygments import highlight 12 | 13 | from typing import Optional 14 | 15 | import subprocess 16 | import shutil 17 | 18 | 19 | class CodeDisplay(QTextEdit): 20 | def __init__(self, text: str, parent: QWidget): 21 | text = CodeDisplay.parse_code(text) 22 | super().__init__(text, parent=parent) 23 | self.setReadOnly(True) 24 | self.resize(self.sizeHint()) 25 | self.setLineWrapMode(QTextEdit.LineWrapMode.NoWrap) 26 | self.setAutoFillBackground(True) 27 | 28 | palette = self.palette() 29 | palette.setColor(QPalette.Base, QColor.fromString('#272822')) # monokai background color 30 | self.setPalette(palette) 31 | 32 | def set_text(self, text: str): 33 | self.setText(CodeDisplay.parse_code(text)) 34 | 35 | @staticmethod 36 | def parse_code(text: str) -> str: 37 | return highlight(text, PythonLexer(), HtmlFormatter(style='monokai', full=True, noclasses=True)) 38 | 39 | 40 | class DecompilerWidget(QWidget): 41 | def __init__(self, bv: BinaryView): 42 | QWidget.__init__(self) 43 | 44 | self.bv = bv 45 | 46 | self.create_top_layout() 47 | self.create_code_layout() 48 | 49 | layout = QVBoxLayout() 50 | layout.addLayout(self.top_layout) 51 | layout.addWidget(self.code_layout) 52 | layout.setAlignment(Qt.AlignLeft) 53 | 54 | self.setLayout(layout) 55 | 56 | def create_top_layout(self): 57 | """Create the top layout""" 58 | self.top_layout = QHBoxLayout() 59 | self.top_layout.addWidget(QLabel('Decompiler : ')) 60 | self.create_combo_box() 61 | self.top_layout.addWidget(self.select_decompiler) 62 | 63 | def create_combo_box(self): 64 | """Create combo box""" 65 | self.select_decompiler = QComboBox() 66 | 67 | if shutil.which('pycdc'): 68 | self.select_decompiler.addItem('pycdc') 69 | 70 | if shutil.which('decompyle3'): 71 | self.select_decompiler.addItem('decompyle3') 72 | 73 | if shutil.which('uncompyle6'): 74 | self.select_decompiler.addItem('uncompyle6') 75 | 76 | if self.select_decompiler.count() == 0: 77 | msg_box = QMessageBox(self) 78 | msg_box.setWindowTitle('BNyc') 79 | msg_box.setText('No decompilers available, check your path!') 80 | msg_box.exec() 81 | 82 | self.select_decompiler.currentIndexChanged.connect(self.update_code) 83 | 84 | def create_code_layout(self): 85 | """Create the code layout with the current choosen decompiler""" 86 | self.code_layout = CodeDisplay('', self) 87 | self.update_code() 88 | 89 | def update_code(self): 90 | """Called the decompiler is changed""" 91 | code = self.get_code() 92 | if not code: 93 | code = '# An error occured during decompilation' 94 | 95 | self.code_layout.set_text(code) 96 | 97 | def get_code(self) -> Optional[str]: 98 | """try to decompile the pyc file """ 99 | decompiler = self.select_decompiler.currentText() 100 | 101 | if not decompiler: 102 | return None 103 | try: 104 | proc = subprocess.Popen([decompiler, self.bv.session_data['filename']], stdout=subprocess.PIPE, stderr=subprocess.PIPE) 105 | if proc.wait() != 0: 106 | return None 107 | return proc.stdout.read().decode() 108 | 109 | except OSError: 110 | return None 111 | finally: 112 | return None 113 | 114 | @staticmethod 115 | def create_widget(context): 116 | """Open the widget""" 117 | if context.context and context.binaryView and context.binaryView.session_data.get('pycinfos'): 118 | widget = DecompilerWidget(context.binaryView) 119 | pane = WidgetPane(widget, 'BNPyc decompiler') 120 | context.context.openPane(pane) 121 | 122 | @staticmethod 123 | def can_create_widget(context): 124 | """Check if we can open the widget""" 125 | return context.context and context.binaryView and context.binaryView.session_data.get('pycinfos') 126 | -------------------------------------------------------------------------------- /bnpyc/disassembler.py: -------------------------------------------------------------------------------- 1 | from binaryninja import BinaryView, InstructionInfo, BranchType, InstructionTextToken, InstructionTextTokenType 2 | from binaryninjaui import UIContext 3 | 4 | from typing import Tuple, List 5 | 6 | from .pycview import PycInfo 7 | 8 | """ 9 | The Disassembler for python bytecode version [3.6; 3.10+] 10 | """ 11 | class Disassembler: 12 | def __init__(self): 13 | self.bv: BinaryView = None 14 | self.loaded_function_names: List[str] = [] 15 | self.jump_instruction_length = 2 16 | 17 | 18 | def set_bv(self) -> bool: 19 | ac = UIContext.activeContext() 20 | if ac is None: 21 | ac = UIContext.allContexts()[0] 22 | 23 | cv = ac.getCurrentViewFrame() 24 | if cv is None: 25 | return False 26 | 27 | try: 28 | self.bv = cv.getCurrentBinaryView() 29 | except TypeError: 30 | return False 31 | 32 | if self.bv is None: 33 | return False 34 | 35 | return self.bv.session_data.get('pycinfos') != None # is it the right bv ? 36 | 37 | 38 | def setup(self): 39 | while not self.set_bv(): 40 | pass 41 | 42 | self.pycinfos: List[PycInfo] = self.bv.session_data['pycinfos'] 43 | self.opcodes = self.bv.session_data['opcodes'] 44 | self.extended_args = self.bv.session_data['extended_args'] 45 | 46 | 47 | def disasm(self, data: bytes, addr: int) -> InstructionInfo: 48 | self.setup() 49 | 50 | i_info = InstructionInfo() 51 | i_info.length = 2 52 | 53 | if data[0] in set(self.opcodes.hasjabs + self.opcodes.hasjrel + [self.opcodes.RETURN_VALUE, ]): 54 | i_info = self.add_jump_branchs(i_info, data, addr) 55 | 56 | elif self.opcodes.opname[data[0]] == 'EXTENDED_ARG': 57 | self.extended_args[addr] = data[1] 58 | 59 | return i_info 60 | 61 | 62 | def add_jump_branchs(self, i_info: InstructionInfo, data: bytes, addr: int) -> InstructionInfo: 63 | opcode = data[0] 64 | opname = self.opcodes.opname[opcode] 65 | base = self._base_of(addr) # we need to add the "base_address" of the function for absolutes jumps 66 | next_i = addr + self.jump_instruction_length 67 | 68 | value = self.get_value(data, addr) 69 | if self.has_extended_arg(addr) and self.pycinfos[0].version >= (3, 8, 0): 70 | value *= 2 71 | 72 | if self.add_jump_branchs_311(i_info, data, addr, value): 73 | return i_info # the instructions was already handled as a python 3.11 specific instruction 74 | 75 | elif opname == 'JUMP_ABSOLUTE': 76 | i_info.add_branch(BranchType.UnconditionalBranch, target=value + base) 77 | 78 | elif opname in ('POP_JUMP_IF_FALSE', 'JUMP_IF_FALSE_OR_POP'): 79 | i_info.add_branch(BranchType.TrueBranch, target=next_i) 80 | i_info.add_branch(BranchType.FalseBranch, target=value + base) 81 | 82 | elif opname in ('POP_JUMP_IF_TRUE', 'JUMP_IF_TRUE_OR_POP'): 83 | i_info.add_branch(BranchType.TrueBranch, target=value + base) 84 | i_info.add_branch(BranchType.FalseBranch, target=next_i) 85 | 86 | elif opname == 'JUMP_IF_FALSE': 87 | i_info.add_branch(BranchType.TrueBranch, target=next_i) 88 | i_info.add_branch(BranchType.FalseBranch, target=value + next_i) 89 | 90 | elif opname == 'JUMP_IF_TRUE': 91 | i_info.add_branch(BranchType.TrueBranch, target=value + next_i) 92 | i_info.add_branch(BranchType.FalseBranch, target=next_i) 93 | 94 | elif opname == 'JUMP_FORWARD': 95 | i_info.add_branch(BranchType.UnconditionalBranch, target=next_i + value) 96 | 97 | elif opname == 'FOR_ITER': 98 | i_info.add_branch(BranchType.TrueBranch, next_i) 99 | i_info.add_branch(BranchType.FalseBranch, next_i + value) 100 | 101 | elif opname == 'SETUP_LOOP': 102 | i_info.add_branch(BranchType.TrueBranch, target=next_i) 103 | i_info.add_branch(BranchType.FalseBranch, target=next_i + value) 104 | 105 | elif opname in ('SETUP_WITH', 'SETUP_ASYNC_WITH'): 106 | i_info.add_branch(BranchType.TrueBranch, target=next_i) 107 | i_info.add_branch(BranchType.FalseBranch, target=next_i + value) 108 | 109 | elif opname == 'SETUP_FINALLY': 110 | i_info.add_branch(BranchType.TrueBranch, target=next_i) 111 | i_info.add_branch(BranchType.FalseBranch, target=next_i + value) 112 | 113 | elif opname == 'CALL_FINALLY': # 3.8 specific 114 | i_info.add_branch(BranchType.TrueBranch, target=next_i) 115 | i_info.add_branch(BranchType.FalseBranch, target=next_i + value) 116 | 117 | elif opname == 'SETUP_EXCEPT': 118 | i_info.add_branch(BranchType.TrueBranch, target=next_i) 119 | i_info.add_branch(BranchType.FalseBranch, target=next_i + value) 120 | 121 | elif opname == 'RETURN_VALUE': 122 | i_info.add_branch(BranchType.FunctionReturn) 123 | 124 | 125 | return i_info 126 | 127 | def add_jump_branchs_311(self, i_info: InstructionInfo, data: bytes, addr: int, value: int) -> bool: 128 | """Handles python 3.11 specific opcodes 129 | Returns true if an instruction was handled, false otherwise 130 | """ 131 | opcode = data[0] 132 | opname = self.opcodes.opname[opcode] 133 | base = self._base_of(addr) # we need to add the "base_address" of the function for absolutes jumps 134 | next_i = addr + self.jump_instruction_length 135 | 136 | # see : https://docs.python.org/3/library/dis.html#opcode-JUMP_BACKWARD 137 | if opname in ('JUMP_BACKWARD', 'JUMP_BACKWARD_NO_INTERRUPT'): 138 | i_info.add_branch(BranchType.UnconditionalBranch, addr - value) 139 | 140 | if opname == 'POP_JUMP_FORWARD_IF_TRUE': 141 | i_info.add_branch(BranchType.TrueBranch, next_i + value) 142 | i_info.add_branch(BranchType.FalseBranch, next_i) 143 | elif opname == 'POP_JUMP_BACKWARD_IF_TRUE': 144 | i_info.add_branch(BranchType.TrueBranch, addr - value) 145 | i_info.add_branch(BranchType.FalseBranch, next_i) 146 | elif opname == 'POP_JUMP_FORWARD_IF_FALSE': 147 | i_info.add_branch(BranchType.TrueBranch, next_i) 148 | i_info.add_branch(BranchType.FalseBranch, addr + value) 149 | elif opname == 'POP_JUMP_BACKWARD_IF_FALSE': 150 | i_info.add_branch(BranchType.TrueBranch, next_i) 151 | i_info.add_branch(BranchType.FalseBranch, addr - value) 152 | elif opname == 'POP_JUMP_FORWARD_IF_NOT_NONE': 153 | i_info.add_branch(BranchType.TrueBranch, addr + value) 154 | i_info.add_branch(BranchType.FalseBranch, next_i) 155 | elif opname == 'POP_JUMP_BACKWARD_IF_NOT_NONE': 156 | i_info.add_branch(BranchType.TrueBranch, addr - value) 157 | i_info.add_branch(BranchType.FalseBranch, next_i) 158 | elif opname == 'POP_JUMP_FORWARD_IF_NONE': 159 | i_info.add_branch(BranchType.TrueBranch, next_i) 160 | i_info.add_branch(BranchType.FalseBranch, addr + value) 161 | elif opname == 'POP_JUMP_BACKWARD_IF_NONE': 162 | i_info.add_branch(BranchType.TrueBranch, next_i) 163 | i_info.add_branch(BranchType.FalseBranch, addr - value) 164 | elif opname == 'JUMP_IF_TRUE_OR_POP': # changed in version 3.11 165 | i_info.add_branch(BranchType.TrueBranch, addr + value) 166 | i_info.add_branch(BranchType.FalseBranch, next_i) 167 | elif opname == 'JUMP_IF_FALSE_OR_POP': # changed in version 3.11 168 | i_info.add_branch(BranchType.TrueBranch, next_i) 169 | i_info.add_branch(BranchType.FalseBranch, addr + value) 170 | else: 171 | return False 172 | 173 | return True 174 | 175 | def get_instruction_text(self, data: bytes, addr: int) -> Tuple[List[InstructionTextToken], int]: 176 | instruction = self.disasm(data, addr) 177 | if instruction is None: 178 | return None 179 | 180 | tokens = [] 181 | opcode = data[0] 182 | opname = self.opcodes.opname[opcode] 183 | tokens.append( 184 | InstructionTextToken(InstructionTextTokenType.InstructionToken, opname) 185 | ) 186 | 187 | # handle jumps 188 | if opcode in set(self.opcodes.hasjabs + self.opcodes.hasjrel): 189 | tokens.append( 190 | self.add_jump(data, addr) 191 | ) 192 | 193 | if opcode < self.opcodes.HAVE_ARGUMENT: 194 | return tokens, instruction.length 195 | 196 | tokens.append( 197 | InstructionTextToken(InstructionTextTokenType.TextToken, " ") 198 | ) 199 | 200 | if opcode in self.opcodes.hasname: 201 | x = self.get_value(data, addr) 202 | value = self.get_name_at(x, addr) 203 | 204 | if (opname == 'LOAD_NAME' or opname == 'LOAD_GLOBAL') and len(self.bv.get_functions_by_name(value)) != 0: 205 | self.loaded_function_names.append(value) 206 | elif opname == 'LOAD_METHOD': 207 | self.loaded_function_names.append(value) 208 | 209 | tokens.append( 210 | InstructionTextToken(InstructionTextTokenType.ArgumentNameToken, f'"{value[:50]}"') 211 | ) 212 | 213 | elif opcode in self.opcodes.hasconst: 214 | tokens.extend( 215 | self.add_const(data, addr) 216 | ) 217 | 218 | elif opcode in [self.opcodes.LOAD_FAST, self.opcodes.STORE_FAST, self.opcodes.DELETE_FAST]: 219 | x = self.get_value(data, addr) 220 | try: 221 | value = self.get_varname_at(x, addr) 222 | except IndexError: 223 | x = self._index_of(addr) + 1 224 | value = self.pycinfos[x].co.co_varnames[x] 225 | 226 | tokens.append( 227 | InstructionTextToken(InstructionTextTokenType.CharacterConstantToken, value) 228 | ) 229 | 230 | elif opcode == self.opcodes.COMPARE_OP: 231 | op = self.opcodes.cmp_op[data[1]] 232 | tokens.append( 233 | InstructionTextToken(InstructionTextTokenType.KeywordToken, ' ' + op) 234 | ) 235 | 236 | elif opcode == self.opcodes.EXTENDED_ARG: 237 | op = data[1] 238 | tokens.append( 239 | InstructionTextToken(InstructionTextTokenType.IntegerToken, ' ' + hex(op)) 240 | ) 241 | 242 | if opname in ('CALL_FUNCTION', 'CALL_FUNCTION_EX', 'CALL_FUNCTION_KW', 'CALL_METHOD') and self.loaded_function_names: 243 | try: 244 | tokens.append( 245 | InstructionTextToken(InstructionTextTokenType.DataSymbolToken, self.loaded_function_names[-1], 246 | self.bv.get_functions_by_name(self.loaded_function_names[-1])[0].lowest_address) 247 | ) 248 | self.loaded_function_names.pop() 249 | except IndexError: # no such function name 250 | pass 251 | 252 | return tokens, instruction.length 253 | 254 | 255 | def add_jump(self, data: bytes, addr: int) -> InstructionTextToken: 256 | opname = self.opcodes.opname[data[0]] 257 | next_i = addr + self.jump_instruction_length 258 | 259 | x = self.get_value(data, addr) 260 | if self.has_extended_arg(addr) and self.pycinfos[0].version >= (3, 8, 0): 261 | x *= 2 262 | 263 | if opname == 'JUMP_ABSOLUTE': 264 | return InstructionTextToken( 265 | InstructionTextTokenType.AddressDisplayToken, f' {hex(x)}', x 266 | ) 267 | elif opname in ('POP_JUMP_IF_FALSE', 'JUMP_IF_FALSE_OR_POP', 'POP_JUMP_IF_TRUE', 'JUMP_IF_TRUE_OR_POP', 'JUMP_IF_TRUE', 'JUMP_IF_FALSE'): 268 | return InstructionTextToken( 269 | InstructionTextTokenType.AddressDisplayToken, f' {hex(x)}', x 270 | ) 271 | 272 | # Relative jumps 273 | elif data[0] in self.opcodes.hasjrel: 274 | return InstructionTextToken( 275 | InstructionTextTokenType.AddressDisplayToken, f' {hex(x + next_i)}', x + next_i 276 | ) 277 | 278 | raise Exception(f'Not handled OPCODE : {opname}') 279 | 280 | def add_const(self, data: bytes, addr: int) -> Tuple[InstructionTextToken]: 281 | x = self.get_value(data, addr) 282 | 283 | value = self.get_const_at(x, addr) 284 | 285 | if isinstance(value, int): 286 | return InstructionTextToken( 287 | InstructionTextTokenType.IntegerToken, f'{value}'[:50] 288 | ), 289 | elif isinstance(value, float): 290 | return InstructionTextToken( 291 | InstructionTextTokenType.FloatingPointToken, f'{value}'[:50] 292 | ), 293 | elif isinstance(value, str): 294 | return InstructionTextToken( 295 | InstructionTextTokenType.CharacterConstantToken, f'"{value[:50]}"' 296 | ), 297 | elif isinstance(value, bytes): 298 | return InstructionTextToken( 299 | InstructionTextTokenType.CharacterConstantToken, f'{value}'[:50] 300 | ), 301 | elif isinstance(value, tuple) or isinstance(value, list): 302 | return InstructionTextToken( 303 | InstructionTextTokenType.StringToken, f'{value}'[:50] 304 | ), 305 | elif value is None: 306 | return InstructionTextToken( 307 | InstructionTextTokenType.DataSymbolToken, 'None' 308 | ), 309 | 310 | return InstructionTextToken( 311 | InstructionTextTokenType.DataSymbolToken, f'{str(type(value))[:50]}' 312 | ), InstructionTextToken( 313 | InstructionTextTokenType.TextToken, ' ' 314 | ), InstructionTextToken( 315 | InstructionTextTokenType.IntegerToken, f'{{{data[1]}}}' 316 | ) 317 | 318 | def has_extended_arg(self, addr: int) -> bool: 319 | """Check if the previous instruction has extended arg""" 320 | return (addr-2) in self.extended_args.keys() 321 | 322 | 323 | def get_extended_value(self, addr: int) -> int: 324 | """Get the EXTENDED_ARG value of instruction at addr""" 325 | if not self.has_extended_arg(addr): 326 | return 0 327 | 328 | return self.extended_args[addr - 2] + self.get_extended_value(addr - 2) << 8 329 | 330 | def get_value(self, data: bytes, addr: int) -> int: 331 | """Get the value + EXTENDED_ARG for the instruction at addr""" 332 | if len(data) < 2: 333 | return 0 334 | 335 | if not self.has_extended_arg(addr): 336 | return data[1] 337 | 338 | return data[1] + self.get_extended_value(addr) 339 | 340 | 341 | def get_name_at(self, index: int, addr: int) -> str: 342 | """Recovers co.co_names[i] according to the function in which the opcode is""" 343 | x = self._index_of(addr) 344 | if x == -1: 345 | return '' 346 | 347 | return self.pycinfos[x].co.co_names[index] 348 | 349 | 350 | def get_const_at(self, index: int, addr: int) -> object: 351 | """Recovers co.co_consts[i] according to the function in which the opcode is""" 352 | x = self._index_of(addr) 353 | if x == -1: 354 | return '' 355 | 356 | return self.pycinfos[x].co.co_consts[index] 357 | 358 | 359 | def get_varname_at(self, index: int, addr: int) -> str: 360 | """Recovers co.co_varnames[i] according to the function in which the opcode is""" 361 | x = self._index_of(addr) 362 | if x == -1: 363 | return '' 364 | 365 | return self.pycinfos[x].co.co_varnames[index] 366 | 367 | 368 | def _index_of(self, addr: int) -> int: 369 | for i, f in enumerate(self.bv.functions): 370 | for addr_range in f.address_ranges: 371 | if addr in addr_range: 372 | return i 373 | return -1 374 | 375 | def _base_of(self, addr: int) -> int: 376 | previous = 0 377 | base = 0 378 | for f in self.bv.session_data['functions']: 379 | base = f[1] 380 | if addr < base: 381 | return previous 382 | previous = base 383 | return base 384 | 385 | 386 | def get_nop(self) -> bytes: 387 | self.setup() 388 | 389 | return bytes([self.opcodes.NOP, 0]) 390 | 391 | 392 | def invert_branch(self, data: bytes, addr: int) -> bytes: 393 | opname = self.opcodes.opname[data[0]] 394 | 395 | if opname in ('JUMP_ABSOLUTE', 'JUMP_FORWARD'): 396 | return self.get_nop() 397 | 398 | elif opname == 'POP_JUMP_IF_FALSE': 399 | return bytes([self.opcodes.opmap['POP_JUMP_IF_TRUE'], data[1]]) 400 | 401 | elif opname == 'POP_JUMP_IF_TRUE': 402 | return bytes([self.opcodes.opmap['POP_JUMP_IF_FALSE'], data[1]]) 403 | 404 | elif opname == 'JUMP_IF_FALSE_OR_POP': 405 | return bytes([self.opcodes.opmap['JUMP_IF_TRUE_OR_POP'], data[1]]) 406 | 407 | elif opname == 'JUMP_IF_TRUE_OR_POP': 408 | return bytes([self.opcodes.opmap['JUMP_IF_FALSE_OR_POP'], data[1]]) 409 | 410 | return None 411 | 412 | """ 413 | The Disassembler for python bytecode version <= 3.5 414 | """ 415 | class Disassembler35(Disassembler): 416 | def __init__(self): 417 | super().__init__() 418 | self.jump_instruction_length = 3 419 | 420 | def disasm(self, data: bytes, addr: int) -> InstructionInfo: 421 | self.setup() 422 | 423 | i_info = InstructionInfo() 424 | i_info.length = 1 425 | 426 | if data[0] in set(self.opcodes.hasjabs + self.opcodes.hasjrel): 427 | i_info = self.add_jump_branchs(i_info, data, addr) 428 | i_info.length = self.jump_instruction_length 429 | 430 | elif data[0] == self.opcodes.RETURN_VALUE: 431 | i_info = self.add_jump_branchs(i_info, data, addr) 432 | 433 | if data[0] >= self.opcodes.HAVE_ARGUMENT: 434 | i_info.length = 3 435 | 436 | return i_info -------------------------------------------------------------------------------- /bnpyc/lifting.py: -------------------------------------------------------------------------------- 1 | from binaryninja import BinaryView, lowlevelil 2 | from binaryninjaui import UIContext 3 | 4 | from typing import Optional, List 5 | 6 | from .pycview import PycInfo 7 | 8 | class Lifter: 9 | def __init__(self): 10 | self.bv: BinaryView = None 11 | 12 | 13 | def set_bv(self) -> bool: 14 | ac = UIContext.activeContext() 15 | if ac is None: 16 | ac = UIContext.allContexts()[0] 17 | 18 | cv = ac.getCurrentViewFrame() 19 | if cv is None: 20 | return False 21 | 22 | try: 23 | self.bv = cv.getCurrentBinaryView() 24 | except TypeError: 25 | return False 26 | 27 | if self.bv is None: 28 | return False 29 | 30 | return self.bv.session_data.get('pycinfos') != None # is it the right bv ? 31 | 32 | 33 | def get_opcodes(self) -> object: 34 | return self.bv.session_data['opcodes'] 35 | 36 | 37 | def setup(self): 38 | while not self.set_bv(): 39 | pass 40 | 41 | self.pycinfos: List[PycInfo] = self.bv.session_data['pycinfos'] 42 | self.opcodes = self.get_opcodes() 43 | 44 | 45 | def lift(self, data: bytes, addr: int, il: lowlevelil.LowLevelILFunction) -> Optional[int]: 46 | self.setup() 47 | 48 | opcode = data[0] 49 | opname = self.opcodes.opname[opcode] 50 | 51 | if opname == 'NOP': 52 | expr = il.nop() 53 | else: 54 | expr = il.unimplemented() 55 | 56 | il.append(expr) 57 | 58 | return 2 59 | -------------------------------------------------------------------------------- /bnpyc/objects.py: -------------------------------------------------------------------------------- 1 | from binaryninja import BinaryView, Architecture, SegmentFlag, SectionSemantics, StructureBuilder, Type, DataRenderer, InstructionTextToken, InstructionTextTokenType, DisassemblyTextLine, Platform, log_info 2 | 3 | from enum import IntEnum 4 | 5 | class ObjectKind(IntEnum): 6 | STRING = 0 7 | INTEGER = 1 8 | FLOAT = 2 9 | NONE = 3 10 | ANY = 100 11 | 12 | class ObjectRenderer(DataRenderer): 13 | def perform_is_valid_for_data(self, ctxt, view: BinaryView, addr: int, type, context): 14 | try: 15 | var = view.get_data_var_at(addr) 16 | return var.name == 'PythonObject' 17 | except: 18 | return False 19 | 20 | def perform_get_lines_for_data(self, ctxt, view: BinaryView, addr: int, type, prefix, width, context): 21 | tokens = [] 22 | var = view.get_data_var_at(addr) 23 | 24 | tokens.append( 25 | InstructionTextToken(InstructionTextTokenType.StringToken, 'object ') 26 | ) 27 | 28 | tokens.append( 29 | InstructionTextToken(InstructionTextTokenType.TypeNameToken, var.value['name'] + b' ') 30 | ) 31 | 32 | kind = var.value['kind'] 33 | value = var.value['value'].replace(b'\x00', b'') 34 | 35 | instruction = None 36 | 37 | if kind == ObjectKind.STRING: 38 | instruction = InstructionTextToken(InstructionTextTokenType.StringToken, value) 39 | elif kind == ObjectKind.INTEGER: 40 | instruction = InstructionTextToken(InstructionTextTokenType.IntegerToken, value) 41 | elif kind == ObjectKind.FLOAT: 42 | instruction = InstructionTextToken(InstructionTextToken.FloatingPointToken, value) 43 | elif kind == ObjectKind.NONE: 44 | instruction = InstructionTextToken(InstructionTextTokenType.DataSymbolToken, 'None') 45 | else: 46 | instruction = InstructionTextToken(InstructionTextTokenType.CharacterConstantToken, value) 47 | 48 | tokens.append(instruction) 49 | 50 | return [DisassemblyTextLine(tokens, addr)] 51 | 52 | ObjectRenderer().register_type_specific() -------------------------------------------------------------------------------- /bnpyc/pycview.py: -------------------------------------------------------------------------------- 1 | from binaryninja import BinaryView, Architecture, SegmentFlag, SectionSemantics, StructureBuilder, Type, DataRenderer, InstructionTextToken, InstructionTextTokenType, DisassemblyTextLine, Platform, log_info 2 | 3 | from xdis import Code38, Code3, Code2, load_module 4 | import xdis 5 | 6 | from .objects import ObjectKind 7 | 8 | from types import CodeType 9 | from typing import NamedTuple, Tuple, List, Any 10 | import tempfile 11 | import struct 12 | 13 | class PycInfo(NamedTuple): 14 | version: Tuple[int, int, int] = None 15 | timestamp: int = None 16 | magic_int: int = None 17 | co: object = None 18 | is_pypy: bool = None 19 | source_size: int = None 20 | sip_hash: None = None 21 | 22 | 23 | class PycView(BinaryView): 24 | name = 'PycView' 25 | long_name = name 26 | 27 | @classmethod 28 | def is_valid_for_data(self, data) -> bool: 29 | magic = data.read(0, 4) 30 | return magic in xdis.magics.magics.values() 31 | 32 | 33 | def __init__(self, data): 34 | self.pycinfo = PycInfo(*load_module(data.file.original_filename, {})) 35 | 36 | original_filename = data.file.original_filename 37 | 38 | self._set_tmpfile() 39 | self.tmp = self._check_others_functions(self.pycinfo.co) 40 | self.code_size = self.tmpfile.tell() 41 | self.data_begin = self.code_size 42 | 43 | self.str_objects = [] 44 | self._loads_objects(self.pycinfo.co) 45 | 46 | self.data = self._get_view() 47 | 48 | BinaryView.__init__(self, file_metadata = self.data.file, parent_view = self.data) 49 | self.platform = PycView.get_platform(self.pycinfo.version[:2]) 50 | self.session_data['filename'] = original_filename 51 | 52 | log_info(f'[BNPyc] Using architecture {self.platform}') 53 | 54 | def _set_tmpfile(self) -> None: 55 | self.tmpfile = tempfile.NamedTemporaryFile('r+b', delete=False) # read write binary mode 56 | self.tmpfile.write(self.pycinfo.co.co_code) 57 | self.tmpfile.flush() 58 | self.funcs = [("", 0), ] # first function offset 59 | 60 | 61 | def _get_view(self) -> BinaryView: 62 | return BinaryView.open(self.tmpfile.name) 63 | 64 | 65 | def init(self) -> bool: 66 | self.session_data['pycinfos'] = [self.pycinfo, ] 67 | self.session_data['pycinfos'].extend(self.tmp) 68 | self.session_data['opcodes'] = xdis.get_opcode(self.pycinfo.version, self.pycinfo.is_pypy) 69 | self.session_data['functions'] = self.funcs 70 | self.session_data['extended_args'] = {} 71 | 72 | self.add_auto_segment(0, self.code_size, 0, self.code_size, SegmentFlag.SegmentContainsCode) 73 | self.add_auto_section("code", 0 , self.code_size, SectionSemantics.ReadOnlyCodeSectionSemantics) 74 | 75 | self.add_auto_segment(self.code_size, 76 | self.data.length - self.code_size, 77 | self.code_size, 78 | self.data.length - self.code_size, SegmentFlag.SegmentReadable 79 | ) 80 | 81 | for name, offset in self.funcs: 82 | func = self.create_user_function(offset, self.platform) 83 | func.name = name if name else func.name 84 | 85 | ## Adding objects 86 | with StructureBuilder.builder(self, 'object') as object_info: 87 | object_info.packed = True 88 | object_info.append(Type.array(Type.char(), 50), 'name') 89 | object_info.append(Type.array(Type.char(), 50), 'value') 90 | object_info.append(Type.int(4), 'kind') 91 | ObjectType = Type.structure_type(object_info) 92 | 93 | for offset in self.str_objects: 94 | self.define_data_var(offset, ObjectType, 'PythonObject') 95 | 96 | return True 97 | 98 | """ 99 | Recursively maps python object to memory 100 | """ 101 | def _loads_objects(self, code: object): 102 | recur = [] 103 | 104 | for name in code.co_names: 105 | self.str_objects.append(self.data_begin) 106 | 107 | self.tmpfile.write( 108 | self._build_object(name) 109 | ) 110 | self.data_begin += 104 111 | 112 | for c in code.co_consts: 113 | if self._is_code(c): 114 | recur.append(c) 115 | continue 116 | 117 | self.str_objects.append(self.data_begin) 118 | 119 | self.tmpfile.write( 120 | self._build_object(c) 121 | ) 122 | self.data_begin += 104 123 | 124 | for c in recur: 125 | self._loads_objects(c) 126 | 127 | self.tmpfile.flush() 128 | 129 | 130 | """ 131 | Recursively search for function in co.co_consts 132 | """ 133 | def _check_others_functions(self, code: object) -> List[PycInfo]: 134 | out = [] 135 | for c in code.co_consts: 136 | if self._is_code(c): 137 | self.funcs.append((c.co_name, self.tmpfile.tell())) 138 | self.tmpfile.write(c.co_code) 139 | self.tmpfile.flush() 140 | out.append(PycInfo(co = c)) 141 | out.extend(self._check_others_functions(c)) 142 | return out 143 | 144 | 145 | def _build_object(self, obj: Any) -> bytes: 146 | data = b'' 147 | 148 | data = str(type(obj)) 149 | data = data.ljust(50, '\x00') 150 | if len(data) > 50: 151 | data = data[:48] + '\'>' 152 | 153 | str_value = str(obj).ljust(50, '\x00') 154 | if len(str_value) > 50: 155 | str_value = str_value[:47] + '...' 156 | 157 | data += str_value 158 | 159 | if isinstance(obj, str): 160 | return data.encode() + struct.pack(' bool: 175 | return isinstance(c, (Code38, Code3, Code2, CodeType)) 176 | 177 | @staticmethod 178 | def get_platform(version: Tuple[int, int]) -> Platform: 179 | if version >= (3, 6): 180 | return Architecture['Python-bytecode'].standalone_platform 181 | if version == (3, 5): 182 | return Architecture['Python-bytecode35'].standalone_platform 183 | if version == (3, 4): 184 | return Architecture['Python-bytecode34'].standalone_platform 185 | if version == (3, 3): 186 | return Architecture['Python-bytecode33'].standalone_platform 187 | if version == (3, 2): 188 | return Architecture['Python-bytecode32'].standalone_platform 189 | if version == (3, 1): 190 | return Architecture['Python-bytecode31'].standalone_platform 191 | if version == (3, 0): 192 | return Architecture['Python-bytecode30'].standalone_platform 193 | 194 | raise Exception('Unsupported bytecode version !') 195 | 196 | def perform_get_address_size(self) -> int: 197 | return 8 198 | 199 | 200 | 201 | -------------------------------------------------------------------------------- /bnpyc/python.py: -------------------------------------------------------------------------------- 1 | from binaryninja import Architecture, RegisterInfo, InstructionInfo, InstructionTextToken, lowlevelil 2 | 3 | from typing import Tuple, Optional, List 4 | 5 | from .disassembler import Disassembler, Disassembler35 6 | 7 | """ 8 | The archictecture for python bytecode version [3.6; 3.10+] 9 | """ 10 | class Python(Architecture): 11 | name = 'Python-bytecode' 12 | 13 | regs = {'SP': RegisterInfo('SP', 2)} 14 | stack_pointer = 'SP' 15 | 16 | max_instr_length = 2 # changed in python3.6, each instruction is now 2 bytes long 17 | 18 | def __init__(self): 19 | super().__init__() 20 | self.disassembler = Disassembler() 21 | 22 | def get_instruction_info(self, data: bytes, addr: int) -> InstructionInfo: 23 | if not data: 24 | return None 25 | 26 | return self.disassembler.disasm(data, addr) 27 | 28 | 29 | def get_instruction_text(self, data, addr) -> Tuple[List[InstructionTextToken], int]: 30 | if not data: 31 | return None 32 | try: 33 | return self.disassembler.get_instruction_text(data, addr) 34 | except IndexError: 35 | return [], 2 36 | 37 | def get_instruction_low_level_il(self, data: bytes, addr: int, il: lowlevelil.LowLevelILFunction): 38 | return None 39 | 40 | 41 | def convert_to_nop(self, data: bytes, addr: int = 0) -> Optional[bytes]: 42 | if not data: 43 | return None 44 | 45 | return self.disassembler.get_nop() 46 | 47 | 48 | def invert_branch(self, data: bytes, addr: int = 0) -> Optional[bytes]: 49 | if not data: 50 | return None 51 | 52 | return self.disassembler.invert_branch(data, addr) 53 | 54 | 55 | class Python35(Python): 56 | name = 'Python-bytecode35' 57 | max_instr_length = 3 58 | 59 | def __init__(self): 60 | super().__init__() 61 | self.disassembler = Disassembler35() 62 | 63 | def get_instruction_text(self, data, addr) -> Tuple[List[InstructionTextToken], int]: 64 | if not data: 65 | return None 66 | try: 67 | return self.disassembler.get_instruction_text(data, addr) 68 | except IndexError: 69 | return [], 1 70 | 71 | class Python34(Python35): 72 | name = 'Python-bytecode34' 73 | 74 | class Python33(Python35): 75 | name = 'Python-bytecode33' 76 | 77 | class Python32(Python35): 78 | name = 'Python-bytecode32' 79 | 80 | class Python31(Python35): 81 | name = 'Python-bytecode31' 82 | 83 | class Python30(Python35): 84 | name = 'Python-bytecode30' -------------------------------------------------------------------------------- /images/pycview1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Njord0/BNPyc/2bad86ee84dd371cff84919809f94982b8fad688/images/pycview1.png -------------------------------------------------------------------------------- /images/pycview_functions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Njord0/BNPyc/2bad86ee84dd371cff84919809f94982b8fad688/images/pycview_functions.png -------------------------------------------------------------------------------- /images/pycview_objects.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Njord0/BNPyc/2bad86ee84dd371cff84919809f94982b8fad688/images/pycview_objects.png -------------------------------------------------------------------------------- /plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "pluginmetadataversion": 2, 3 | "name": "Python bytecode", 4 | "author": "Njörd", 5 | "type": [ 6 | "architecture", 7 | "binaryview" 8 | ], 9 | "api": [ 10 | "python3" 11 | ], 12 | "description": "Python bytecode (.pyc) support for Binary Ninja", 13 | "longdescription": "", 14 | "license": { 15 | "name": "MIT", 16 | "text": "Copyright 2024 Njörd\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE." 17 | }, 18 | "platforms": [ 19 | "Darwin", 20 | "Windows", 21 | "Linux" 22 | ], 23 | "installinstructions": { 24 | "Darwin": "Clone this repository into `~/Library/Application Support/Binary Ninja/plugins/`", 25 | "Windows": "Clone this repository into `%APPDATA%/Binary Ninja/plugins/`", 26 | "Linux": "Clone this repository into `~/.binaryninja/plugins/`" 27 | }, 28 | "version": "1.0.4", 29 | "minimumbinaryninjaversion": 2170 30 | } -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | xdis 2 | pygments --------------------------------------------------------------------------------