├── __init__.py ├── .gitignore ├── examples ├── firmware.bin ├── firmware.elf ├── main.cpp └── README.md ├── screenshots ├── hero.png └── open-with-options.png ├── binja_xtensa ├── test_mnemonics.csv.bz2 ├── torture_test.dump.bz2 ├── esp32_torture_test.dump.bz2 ├── test_mnemonic_text.dump.bz2 ├── fix_data.py ├── parse_rom_ld.py ├── binaryview.py ├── __init__.py ├── test_instruction.py ├── firmware_parser.py ├── known_symbols.py ├── disassembly.py ├── lifter.py └── instruction.py ├── LICENSE ├── plugin.json └── README.md /__init__.py: -------------------------------------------------------------------------------- 1 | from . import binja_xtensa 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | .cache 4 | -------------------------------------------------------------------------------- /examples/firmware.bin: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/examples/firmware.bin -------------------------------------------------------------------------------- /examples/firmware.elf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/examples/firmware.elf -------------------------------------------------------------------------------- /screenshots/hero.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/screenshots/hero.png -------------------------------------------------------------------------------- /screenshots/open-with-options.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/screenshots/open-with-options.png -------------------------------------------------------------------------------- /binja_xtensa/test_mnemonics.csv.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/binja_xtensa/test_mnemonics.csv.bz2 -------------------------------------------------------------------------------- /binja_xtensa/torture_test.dump.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/binja_xtensa/torture_test.dump.bz2 -------------------------------------------------------------------------------- /binja_xtensa/esp32_torture_test.dump.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/binja_xtensa/esp32_torture_test.dump.bz2 -------------------------------------------------------------------------------- /binja_xtensa/test_mnemonic_text.dump.bz2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/binja_xtensa/test_mnemonic_text.dump.bz2 -------------------------------------------------------------------------------- /examples/main.cpp: -------------------------------------------------------------------------------- 1 | #include 2 | 3 | void setup() { 4 | // put your setup code here, to run once: 5 | Serial.begin(115200); 6 | } 7 | 8 | void loop() { 9 | // put your main code here, to run repeatedly: 10 | delay(1000); 11 | Serial.println("Hello world"); 12 | } 13 | -------------------------------------------------------------------------------- /examples/README.md: -------------------------------------------------------------------------------- 1 | # Examples 2 | 3 | Want to try out the plugin, but don't have a firmware image laying around? Never 4 | fear, I've included built versions of a Hello World Arduino program for your 5 | testing. 6 | 7 | ## Files 8 | 9 | * `firmware.bin`: firmware image 10 | * `firmware.elf`: built elf 11 | * `main.cpp`: source code 12 | -------------------------------------------------------------------------------- /binja_xtensa/fix_data.py: -------------------------------------------------------------------------------- 1 | import binascii 2 | import csv 3 | 4 | 5 | with open("fixed.csv", "w") as wfile: 6 | with open("test_mnemonics.csv", "r") as file: 7 | reader = csv.reader(file) 8 | for row in reader: 9 | opcode, mnem = row 10 | # Need to byte-swap opcode 11 | data = binascii.unhexlify(opcode) 12 | reverse_data = bytearray(data) 13 | reverse_data.reverse() 14 | wfile.write(f"{binascii.hexlify(reverse_data).decode('utf-8')},{mnem}\n") 15 | -------------------------------------------------------------------------------- /binja_xtensa/parse_rom_ld.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | Script to parse ESP SDK linker script and save symbols for the ROM 4 | 5 | File you want from the SDK is called eagle.rom.addr.v6.ld 6 | 7 | This script will produce a known_symbols.py from it 8 | """ 9 | 10 | import json 11 | import re 12 | 13 | ROM_RE = re.compile( 14 | r'^\s*PROVIDE\s+\(\s*([a-zA-Z0-9_]+)\s*=\s*(0x[0-9a-fA-F]+)\s*\);$' 15 | ) 16 | 17 | symbols = {} 18 | 19 | with open("eagle.rom.addr.v6.ld", "r") as f: 20 | for line in f: 21 | m = ROM_RE.match(line) 22 | if m: 23 | symbol, addr = m.groups() 24 | addr = int(addr, 0) 25 | symbols[addr] = symbol 26 | 27 | 28 | with open("known_symbols.json", "w") as f: 29 | data = json.dumps(symbols) 30 | f.write("known_symbols = ") 31 | f.write(data) 32 | f.write("\n") 33 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright 2020-2021 Zack Orndorff 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of 4 | this software and associated documentation files (the "Software"), to deal in 5 | the Software without restriction, including without limitation the rights to 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 7 | the Software, and to permit persons to whom the Software is furnished to do so, 8 | subject to the following conditions: 9 | 10 | The above copyright notice and this permission notice shall be included in all 11 | copies or substantial portions of the Software. 12 | 13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 19 | -------------------------------------------------------------------------------- /plugin.json: -------------------------------------------------------------------------------- 1 | { 2 | "pluginmetadataversion": 2, 3 | "name": "Xtensa Architecture and ESP8266 Loader", 4 | "author": "Zack Orndorff", 5 | "type": [ 6 | "binaryview", 7 | "architecture" 8 | ], 9 | "api": [ 10 | "python3" 11 | ], 12 | "description": "Xtensa Architecture and ESP8266 Image Loader", 13 | "longdescription": "", 14 | "license": { 15 | "name": "MIT", 16 | "text": "Copyright 2020-2021 Zack Orndorff\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE." 17 | }, 18 | "platforms": [ 19 | "Darwin", 20 | "Windows", 21 | "Linux" 22 | ], 23 | "installinstructions": { 24 | "Darwin": "", 25 | "Linux": "", 26 | "Windows": "" 27 | }, 28 | "version": "0.5.1", 29 | "minimumbinaryninjaversion": 2846 30 | } 31 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # binja-xtensa: Architecture Plugin and ESP8266 Loader 2 | 3 | Tensilica Xtensa Architecture Plugin and ESP8266 Firmware Loader for Binary 4 | Ninja. 5 | 6 | ![screenshot of Binary Ninja showing setup and loop of a decompiled ESP8266 7 | Arduino project](https://raw.githubusercontent.com/zackorndorff/binja-xtensa/0.5/screenshots/hero.png) 8 | 9 | ## Features 10 | 11 | * Disassembly of nearly all Xtensa instructions 12 | * Lifting for most Xtensa instructions you'll see in ESP8266 Firmware 13 | * Support for Xtensa ELF files so they will be automatically recognized 14 | * Loader for ESP8266 raw firmware dumps. This support is a little finicky to 15 | use, as there's multiple partitions in the firmware dumps. By default it uses 16 | the last one with a detected header; you can adjust this via Open With 17 | Options 18 | * At the moment it doesn't completely map the sections properly, but it's a 19 | start :) 20 | 21 | ## What it doesn't do 22 | 23 | * It was written mostly as an exercise for the author. It's useful enough to 24 | share, but no promises it's useful for your project :) 25 | * Lift register windowing instructions (it disassembles most of them) 26 | * You need this for ESP32 support. It shouldn't be too bad to add, as long 27 | as you can figure out how to lift the windowed registers 28 | * Anything with the optional vector unit 29 | * Disassemble and lift most of the boolean instructions 30 | * Lift most floating point instructions 31 | * Deal with special registers (I figure you might as well look at the asm 32 | for that anyway) 33 | * Anything quickly. This is Python, and not particularly well optimized Python 34 | at that. If you're using this seriously, I recommend rewriting in C++ 35 | * Find `main` in a raw binary for you 36 | 37 | ## Installation 38 | 39 | Install via the Binary Ninja plugin manager. Alternatively, clone this 40 | repository into your Binary Ninja plugins directory. See the [official Binary 41 | Ninja documentation](https://docs.binary.ninja/guide/plugins.html) for more 42 | details. 43 | 44 | ## Using the ESP8266 Firmware Loader 45 | 46 | The default of picking the last usable partition works decent, but if you want 47 | more control, use Open With Options and change `Loader > Which Firmware` to the 48 | option corresponding to the address you want to load. 49 | 50 | I attempt to load in symbols from the SDK's linker script so some of the 51 | ROM-implemented functions are less mysterious. See 52 | [parse_rom_ld.py](binja_xtensa/parse_rom_ld.py) for the parsing code, 53 | [known_symbols.py](binja_xtensa/known_symbols.py) for the database it'll apply, 54 | and function `setup_esp8266_map` in 55 | [binaryview.py](binja_xtensa/binaryview.py#L17) for the code that applies it. 56 | This should probably be a load time option... but it's not at the moment :/ 57 | 58 | ![screenshot of Binary Ninja's Open With Options showing the Loader Which 59 | Firmware option](https://raw.githubusercontent.com/zackorndorff/binja-xtensa/0.5/screenshots/open-with-options.png) 60 | 61 | ## Future Work 62 | 63 | * Support register windowing instructions to support ESP32 firmware 64 | * Improve the raw firmware loader 65 | * Rewrite to be faster 66 | 67 | ## Why did you write this? 68 | 69 | 1. I was goofing around with ESP8266 and Arduino and was annoyed I didn't have 70 | an easy way to disassemble the built binaries 71 | 2. I hadn't written a full architecture plugin and I thought it'd be a good 72 | exercise 73 | 3. I got bored over COVID-19 lockdown in 2020 and needed something to do 74 | 75 | ## Testing 76 | 77 | There are some simple tests in 78 | [test_instruction.py](binja_xtensa/test_instruction.py), which are mostly just 79 | taking uniq'd output from objdump on some binaries I had laying around and 80 | making sure the output matches. They can be run with `python -m pytest` from the 81 | root of the project. 82 | 83 | ## License 84 | 85 | This project copyright Zack Orndorff (@zackorndorff) and is available under the 86 | MIT license. See [LICENSE](LICENSE). 87 | -------------------------------------------------------------------------------- /binja_xtensa/binaryview.py: -------------------------------------------------------------------------------- 1 | """ 2 | ESP8266 Firmware .bin BinaryView 3 | 4 | Using `firmware_parser.py`, we attempt to find binaries in the dump. By default 5 | we'll pick an interesting one (currently the last one with a detected header), 6 | but we present a load option to the user to allow picking a different one. 7 | """ 8 | import json 9 | import struct 10 | 11 | from binaryninja import Architecture, BinaryView, Settings, Symbol 12 | from binaryninja.enums import SectionSemantics, SegmentFlag, SymbolType 13 | 14 | from .firmware_parser import parse_firmware 15 | from .known_symbols import known_symbols 16 | 17 | def setup_esp8266_map(bv): 18 | """Define symbols for the ESP8266 ROM""" 19 | for addr, symbol in known_symbols.items(): 20 | addr = int(addr, 0) 21 | 22 | # https://github.com/esp8266/esp8266-wiki/wiki/Memory-Map 23 | rom_start = 0x40000000 24 | rom_end = 0x40010000 25 | 26 | bv.add_auto_segment(rom_start, rom_end - rom_start, 0, 0, 27 | SegmentFlag.SegmentContainsCode | 28 | SegmentFlag.SegmentContainsData | 29 | SegmentFlag.SegmentReadable | 30 | SegmentFlag.SegmentExecutable) 31 | 32 | bv.add_auto_section("esp8266_ROM", rom_start, rom_end - rom_start, 33 | SectionSemantics.ExternalSectionSemantics) 34 | 35 | if rom_start <= addr <= rom_end: 36 | sym_type = SymbolType.ImportedFunctionSymbol 37 | else: 38 | sym_type = SymbolType.ImportedDataSymbol 39 | 40 | bv.define_auto_symbol(Symbol( 41 | sym_type, 42 | addr, symbol)) 43 | 44 | 45 | class ESPFirmware(BinaryView): 46 | name = "ESPFirmware" 47 | long_name = "ESP Firmware" 48 | 49 | def __init__(self, data): 50 | BinaryView.__init__(self, file_metadata=data.file, parent_view=data) 51 | self.raw = data 52 | 53 | @classmethod 54 | def is_valid_for_data(cls, data): 55 | # These happen to be the two magic bytes used by firmware_parser.py 56 | if data.read(0, 1) in [b'\xe9', b'\xea']: 57 | return True 58 | return False 59 | 60 | @classmethod 61 | def _pick_default_firmware(cls, firmware_options): 62 | """Rudimentary heuristic for "interesting" binaries""" 63 | for idx, firm in reversed(list(enumerate(firmware_options))): 64 | if firm.name != "AppendedData": 65 | return idx, firm 66 | 67 | return 0, firmware_options[0] 68 | 69 | @classmethod 70 | def get_load_settings_for_data(cls, data): 71 | # This example was crucial in figuring out how to present load options 72 | # https://github.com/Vector35/binaryninja-api/blob/dev/python/examples/mappedview.py 73 | # It's also helpful to call Settings().serialize_schema() from the 74 | # Python console and examine the results. 75 | 76 | firmware_options = parse_firmware(data) 77 | default_firmware_idx, _ = cls._pick_default_firmware(firmware_options) 78 | 79 | ourEnum = ["option" + str(i) for i in range(len(firmware_options))] 80 | ourEnumDescriptions = [ 81 | f"{i.name} at {hex(i.bv_offset)}" 82 | for i in firmware_options] 83 | 84 | # TODO: actually JSON serialize this 85 | setting = f"""{{ 86 | "title": "Which Firmware", 87 | "type": "string", 88 | "description": "Which of the binaries in this file do you want?", 89 | "enum": {json.dumps(ourEnum)}, 90 | "enumDescriptions": {json.dumps(ourEnumDescriptions)}, 91 | "default": {json.dumps(ourEnum[default_firmware_idx])} 92 | }} 93 | """ 94 | 95 | print(setting) 96 | 97 | load_settings = Settings("esp_bv_settings") 98 | assert load_settings.register_group("loader", "Loader") 99 | assert load_settings.register_setting("loader.esp.whichFirmware", 100 | setting) 101 | return load_settings 102 | 103 | def perform_is_executable(self): 104 | return True 105 | 106 | def perform_get_entry_point(self): 107 | # This should be set by the the_firmware.load() if there is an entry 108 | # point. 109 | # Otherwise, for lack of a better choice, we end up with 0 110 | return self.entry_addr 111 | 112 | def perform_get_address_size(self): 113 | return 4 114 | 115 | def init(self): 116 | 117 | try: 118 | load_settings = self.get_load_settings(self.name) 119 | which_firmware = load_settings.get_string("loader.esp.whichFirmware", self) 120 | except: 121 | which_firmware = None 122 | 123 | firmware_options = parse_firmware(self.parent_view) 124 | 125 | try: 126 | prefix = "option" 127 | 128 | if which_firmware is None: 129 | try: 130 | which_firmware_idx, _ = self._pick_default_firmware(firmware_options) 131 | except: 132 | import traceback 133 | traceback.print_exc() 134 | raise 135 | which_firmware = prefix + str(which_firmware_idx) 136 | 137 | if not which_firmware.startswith(prefix): 138 | raise Exception("You didn't choose one of the firmware options") 139 | which_firmware = int(which_firmware[len(prefix):]) 140 | except: 141 | print("You didn't choose one of the firmware options") 142 | return False 143 | 144 | try: 145 | print("Using firmware index", which_firmware) 146 | the_firmware = firmware_options[which_firmware] 147 | except: 148 | print("You didn't choose one of the firmware options") 149 | return False 150 | 151 | self.platform = Architecture['xtensa'].standalone_platform 152 | self.arch = Architecture['xtensa'] 153 | self.entry_addr = 0 154 | 155 | # Will create segments and set entry_addr as needed. 156 | the_firmware.load(self, self.parent_view) 157 | 158 | if self.entry_addr != 0: 159 | for seg in self.segments: 160 | if (seg.start <= self.entry_addr <= seg.end) and seg.executable: 161 | #self.add_auto_segment(seg.start, seg.data_length, 162 | # seg.data_offset, seg.data_length, 163 | # SegmentFlag.SegmentContainsCode | 164 | # SegmentFlag.SegmentReadable | 165 | # SegmentFlag.SegmentExecutable) 166 | # It seems the ReadOnlyCodeSectionSemantics kicks off the 167 | # autoanalysis 168 | self.add_auto_section('entry_section', seg.start, 169 | seg.end - seg.start, 170 | SectionSemantics.ReadOnlyCodeSectionSemantics 171 | ) 172 | # I want to be able to find the entry point in the UI 173 | # I couldn't find a create_auto_function... maybe I didn't look hard 174 | # enough 175 | self.create_user_function(self.entry_addr) 176 | self.define_auto_symbol(Symbol( 177 | SymbolType.FunctionSymbol, 178 | self.entry_addr, 179 | "entry")) 180 | 181 | setup_esp8266_map(self) 182 | 183 | return True 184 | -------------------------------------------------------------------------------- /binja_xtensa/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Binary Ninja Xtensa (and ESP8266) support 3 | 4 | This package offers several features: 5 | 6 | Xtensa (little-endian): 7 | * correct length decoding for nearly all instructions 8 | * correct mnemonic decoding for nearly all instructions 9 | * fully correct disassembly for most instructions you'll see. In some places 10 | there are comments documenting skipped features. The disassembler took over 11 | 10 hours to write, and the firmware I'm looking at didn't have a lot of 12 | floating point instructions :) 13 | * objdump-equivalent disassembly for all the instructions in the firmware I 14 | had laying around (complete with a pytest scaffolding to test this) 15 | * lifting to BNIL for 80+ of the most common instructions. This is enough 16 | for useful decompilation, courtesy of the Binary Ninja core. 17 | * a CallingConvention that matches what the documentation says GCC does. 18 | After adding this and setting it default, Binary Ninja started recognizing 19 | a good amount of function arguments. 20 | * declared support for whatever ELF architecture the ESP8266 tools in 21 | platformio generate... so if you open up an elf it builds you, it should 22 | recognize the architecture. 23 | 24 | ESP8266-specific: 25 | * a BinaryView implementation that attempts to detect and load ESP8266 26 | firmware dumps. The current implementation is hacky, but it's able to 27 | examine multiple .bin dumps I've obtained through different methods. 28 | 29 | * These dumps sometimes contain multiple binaries (namely, a bootloader 30 | plus user code. I've added a "Open with Options" option to choose which 31 | of the binaries you want to look at. By default, it tries to pick the 32 | last binary containing parsed segments. 33 | 34 | What it doesn't yet do: 35 | * anything with windowed registers 36 | * anything with the optional vector unit 37 | * disassemble and lift most of the boolean instructions 38 | * lift most floating point instructions 39 | * deal with special registers (I figure you might as well look at the asm 40 | for that anyway) 41 | * help you find `main` in a raw binary :) 42 | 43 | I've abused Python metaprogramming throughout, so it should be possible to fix 44 | each of the above inaccuracies on a per-instruction basis if the need arises. 45 | I've attempted to document how that works. 46 | """ 47 | 48 | from binaryninja import (Architecture, BinaryViewType, CallingConvention, 49 | IntrinsicInfo, InstructionInfo, InstructionTextToken, 50 | RegisterInfo, log) 51 | from binaryninja.enums import (BranchType, Endianness, FlagRole, 52 | LowLevelILFlagCondition) 53 | 54 | from .instruction import Instruction 55 | from .disassembly import disassemble_instruction 56 | from .lifter import lift 57 | from .binaryview import ESPFirmware 58 | 59 | 60 | __all__ = ['XtensaLE'] 61 | 62 | 63 | class XtensaLE(Architecture): 64 | name = 'xtensa' 65 | endianness = Endianness.LittleEndian 66 | 67 | default_int_size = 4 68 | address_size = 4 69 | max_instr_length = 3 70 | 71 | # Uses for regs are from "CALL0 Register Usage and Stack Layout (8.1.2)" 72 | link_reg = 'a0' 73 | stack_pointer = 'a1' 74 | regs = { 75 | 'a0': RegisterInfo("a0", 4, 0), # ret addr 76 | 'a1': RegisterInfo("a1", 4, 0), # sp (callee-saved) 77 | 'a2': RegisterInfo("a2", 4, 0), # arg1 78 | 'a3': RegisterInfo("a3", 4, 0), # arg2 79 | 'a4': RegisterInfo("a4", 4, 0), # arg3 80 | 'a5': RegisterInfo("a5", 4, 0), # arg4 81 | 'a6': RegisterInfo("a6", 4, 0), # arg5 82 | 'a7': RegisterInfo("a7", 4, 0), # arg6 83 | 'a8': RegisterInfo("a8", 4, 0), # static chain (see section 8.1.8) 84 | 'a9': RegisterInfo("a9", 4, 0), 85 | 'a10': RegisterInfo("a10", 4, 0), 86 | 'a11': RegisterInfo("a11", 4, 0), 87 | 'a12': RegisterInfo("a12", 4, 0), # callee-saved 88 | 'a13': RegisterInfo("a13", 4, 0), # callee-saved 89 | 'a14': RegisterInfo("a14", 4, 0), # callee-saved 90 | 'a15': RegisterInfo("a15", 4, 0), # optional stack-frame pointer 91 | 'sar': RegisterInfo("sar", 1, 0), # Shift Address Register: Not a GPR 92 | } 93 | 94 | # Do we have flags? 95 | flags = {} 96 | flag_roles = {} 97 | flag_write_types = {} 98 | flags_written_by_flag_write_type = {} 99 | flags_required_for_flag_condition = {} 100 | 101 | intrinsics = { 102 | "memw": IntrinsicInfo([], []), 103 | "isync": IntrinsicInfo([], []), 104 | } 105 | 106 | def _decode_instruction(self, data, addr): 107 | insn = None 108 | try: 109 | insn = Instruction.decode(data) 110 | except: 111 | pass 112 | return insn 113 | 114 | def get_instruction_info(self, data, addr): 115 | insn = self._decode_instruction(data, addr) 116 | if not insn: 117 | return None 118 | result = InstructionInfo() 119 | result.length = insn.length 120 | if insn.length > 3 or insn.length < 0: 121 | raise Exception("Somehow we got here without setting length") 122 | 123 | # Add branches 124 | if insn.mnem in ["RET", "RET.N"]: 125 | result.add_branch(BranchType.FunctionReturn) 126 | 127 | # Section 3.8.4 "Jump and Call Instructions 128 | elif insn.mnem in ["J"]: 129 | result.add_branch(BranchType.UnconditionalBranch, 130 | insn.target_offset(addr)) 131 | elif insn.mnem in ["JX"]: 132 | result.add_branch(BranchType.IndirectBranch) 133 | 134 | elif insn.mnem in ["CALL0", "CALL4", "CALL8", "CALL12"]: 135 | result.add_branch(BranchType.CallDestination, 136 | insn.target_offset(addr)) 137 | elif insn.mnem in ["CALLX0", "CALLX4", "CALLX8", "CALLX12"]: 138 | pass 139 | #result.add_branch(BranchType.IndirectBranch) 140 | 141 | elif insn.mnem in ["SYSCALL"]: 142 | result.add_branch(BranchType.SystemCall) 143 | 144 | elif insn.mnem.replace(".", "_") in [k for k in Instruction._target_offset_map.keys() if 145 | k.startswith("B")]: # lol 146 | result.add_branch(BranchType.TrueBranch, insn.target_offset(addr)) 147 | result.add_branch(BranchType.FalseBranch, addr + insn.length) 148 | 149 | return result 150 | 151 | def get_instruction_text(self, data, addr): 152 | insn = self._decode_instruction(data, addr) 153 | if not insn: 154 | return None 155 | text = disassemble_instruction(insn, addr) 156 | return text, insn.length 157 | 158 | def get_instruction_low_level_il(self, data, addr, il): 159 | insn = self._decode_instruction(data, addr) 160 | if not insn: 161 | return None 162 | return lift(insn, addr, il) 163 | 164 | 165 | class XtensaCall0CallingConvention(CallingConvention): 166 | # a0 is dubiously caller saved... it's the ret addr / link register 167 | caller_saved_regs = ["a0", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9", 168 | "a10", "a11"] 169 | int_arg_regs = ["a2", "a3", "a4", "a5", "a6", "a7"] 170 | int_return_reg = "a2" 171 | high_int_return_reg = "a3" 172 | 173 | 174 | def register_stuff(): 175 | XtensaLE.register() 176 | 177 | # Register ourselves with the ELF loader 178 | BinaryViewType['ELF'].register_arch(94, Endianness.LittleEndian, 179 | Architecture['xtensa']) 180 | arch = Architecture['xtensa'] 181 | arch.register_calling_convention(XtensaCall0CallingConvention(arch, "default")) 182 | 183 | # If we register on the Architecture's standalone platform, it seems to use our 184 | # calling convention without showing __convention("default") on every function 185 | esp_plat = arch.standalone_platform 186 | esp_plat.default_calling_convention = arch.calling_conventions['default'] 187 | 188 | ESPFirmware.register() 189 | 190 | 191 | register_stuff() 192 | -------------------------------------------------------------------------------- /binja_xtensa/test_instruction.py: -------------------------------------------------------------------------------- 1 | import binascii 2 | import bz2 3 | from collections import namedtuple 4 | import csv 5 | import os 6 | import re 7 | 8 | import pytest 9 | 10 | from .instruction import Instruction, InstructionType, sign_extend 11 | from .disassembly import disassemble_instruction, tokens_to_text 12 | 13 | DIR = os.path.dirname(__file__) 14 | 15 | def test_decode_abs(): 16 | # RRR type 17 | # ABS ar, at 18 | # 0110 0000 rrrr 0001 tttt 0000 19 | # 60 r1 t0 20 | # ABS a7, a9 21 | # 60 71 90 => 907160 22 | INSN_ABS = binascii.unhexlify("907160") 23 | insn = Instruction.decode(INSN_ABS) 24 | assert insn.op0 == 0 25 | assert insn.op1 == 0 26 | assert insn.op2 == 6 27 | assert insn.r == 7 28 | assert insn.t == 9 29 | assert insn.s == 1 30 | assert insn.length == 3 31 | assert insn.mnem == "ABS" 32 | assert insn.instruction_type == InstructionType.RRR 33 | 34 | def test_decode_add(): 35 | """ 36 | ADD ar, as, at 37 | ADD a3, a2, a1 38 | 39 | * bit 23 40 | * 1000 # op2 41 | * 0000 # op1 42 | * 0011 # a3 is r 43 | * 0010 # a2 is s 44 | * 0001 # a1 is t 45 | * 0000 # op0 46 | * bit 0 47 | 48 | Thus our insn is 80 32 10, which must be byte swapped to 10 32 80 49 | """ 50 | #EveryInstR Group 51 | insn = Instruction.decode(binascii.unhexlify("103280")) 52 | assert insn.op0 == 0 53 | assert insn.op1 == 0 54 | assert insn.op2 == 8 55 | assert insn.r == 3 56 | assert insn.s == 2 57 | assert insn.t == 1 58 | assert insn.length == 3 59 | assert insn.mnem == "ADD" 60 | assert insn.instruction_type == InstructionType.RRR 61 | 62 | def test_add_narrow(): 63 | """ 64 | ADD.N ar, as, at 65 | * bit 15 66 | * rrrr 67 | * ssss 68 | * tttt 69 | * 1010 # op0 70 | * bit 0 71 | Requires Code Density Option 72 | 73 | ADD.N a9, a5, a3 74 | is then 1001 0101 0011 1010, or 953a, reversed to 3a95 75 | """ 76 | INSN_ADD_N = binascii.unhexlify("3a95") 77 | insn = Instruction.decode(INSN_ADD_N) 78 | assert insn.op0 == 0b1010 79 | assert insn.t == 3 80 | assert insn.s == 5 81 | assert insn.r == 9 82 | assert insn.length == 2 83 | assert insn.mnem == "ADD.N" 84 | 85 | def test_addi(): 86 | """ 87 | RRI8 type 88 | ADDI at, as, -128..127 89 | * bit 23 90 | * imm8 # check encoding of this 91 | * 1100 92 | * s 93 | * t 94 | * 0010 95 | * bit 0 96 | 97 | ADDI a11, a1, -2 98 | is then 99 | 1111 1110 1100 0001 1011 0010, or fe c1 b2, reversed to b2c1fe 100 | """ 101 | insn = Instruction.decode(binascii.unhexlify("b2c1fe")) 102 | assert insn.op0 == 0b0010 103 | assert insn.r == 0b1100 104 | assert insn.s == 1 105 | assert insn.t == 11 106 | # TODO: handle and test negative handling. I'd argue it should be a separate 107 | # value, as the decoded imm8 doesn't seem like a signed value 108 | #assert insn.imm8 == -2 109 | assert insn.imm8 == 0b11111110 110 | assert insn.length == 3 111 | assert insn.mnem == "ADDI" 112 | assert insn.instruction_type == InstructionType.RRI8 113 | 114 | 115 | test_mnemonics_data = [] 116 | with bz2.open(os.path.join(DIR, "test_mnemonics.csv.bz2"), "rt") as fp: 117 | reader = csv.reader(fp) 118 | for row in reader: 119 | opcode = row[0] 120 | mnem = row[1] 121 | opbytes = binascii.unhexlify(opcode) 122 | test_mnemonics_data.append((opbytes, mnem.strip())) 123 | 124 | 125 | def test_mnemonics_data_is_valid(): 126 | assert len(test_mnemonics_data) > 0 127 | assert len(test_mnemonics_data[0]) == 2 128 | 129 | def compare_mnem(one, two): 130 | to_compare = [] 131 | for it in (one, two): 132 | if (it.startswith("rsr.") or 133 | it.startswith("wsr.") or 134 | it.startswith("xsr.")): 135 | # Work around not having the register names for special regs 136 | it = it[:3] 137 | it = it.lower().strip() 138 | to_compare.append(it) 139 | one, two = to_compare 140 | return one == two 141 | 142 | 143 | @pytest.mark.parametrize("opbytes,mnem_expected", test_mnemonics_data) 144 | def test_mnem_from_file(opbytes, mnem_expected): 145 | insn = Instruction.decode(opbytes) 146 | assert insn.length == len(opbytes) 147 | assert compare_mnem(insn.mnem, mnem_expected) 148 | 149 | mtd_re = r'([0-9a-f]+):\s+([0-9a-f]+)\s+([a-z0-9.]+)\s+(.*)$' 150 | mtd_rec = re.compile(mtd_re) 151 | with bz2.open(os.path.join(DIR, "test_mnemonic_text.dump.bz2"), "rt") as fp: 152 | mnem_text_dump = fp.readlines() 153 | 154 | def bswap_opcode_string(opstr): 155 | data = binascii.unhexlify(opstr) 156 | reverse_data = bytearray(data) 157 | reverse_data.reverse() 158 | return binascii.hexlify(reverse_data).decode('utf-8') 159 | 160 | DisassLine = namedtuple('DisassLine', ['addr', 'opcode', 'mnem', 'rest']) 161 | 162 | def parse_test_data(data_lines): 163 | newdata = [] 164 | for line in data_lines: 165 | match_obj = mtd_rec.match(line) 166 | assert match_obj 167 | addr, opcode, mnem, rest = match_obj.groups() 168 | opcode = bswap_opcode_string(opcode) 169 | assert len(addr) 170 | assert len(opcode) 171 | assert len(mnem) 172 | newdata.append(DisassLine(addr, opcode, mnem, rest)) 173 | return newdata 174 | 175 | def test_mtd_re(): 176 | data = parse_test_data(mnem_text_dump) 177 | assert len(data) > 0 178 | assert len(data[0]) == 4 179 | 180 | def _normalize_insn(it): 181 | it = it.replace("\t", "").lower() 182 | tokens = [] 183 | for tok in it.split(): 184 | tok = tok.replace(",", "") 185 | if tok.startswith("0x"): 186 | tokens.append(str(sign_extend(int(tok, 0), 32))) 187 | else: 188 | tokens.append(tok) 189 | return ''.join(tokens) 190 | 191 | def compare_insn(one, two): 192 | one = _normalize_insn(one) 193 | two = _normalize_insn(two) 194 | 195 | return one == two 196 | 197 | def test_tokens_to_text(): 198 | INSN_ABS = binascii.unhexlify("907160") 199 | insn = Instruction.decode(INSN_ABS) 200 | disass_text = tokens_to_text(disassemble_instruction(insn, 0)) 201 | assert compare_insn(disass_text, "ABS a7, a9") 202 | assert compare_insn(disass_text, "abs a7, a9") 203 | 204 | mtd_data = parse_test_data(mnem_text_dump) 205 | # mnem_text_dump is a bunch of dumped disassembly, uniq'd on the mnem for 206 | # brevity 207 | @pytest.mark.parametrize("parsed_line", mtd_data) 208 | def test_mnem_text_dump(parsed_line): 209 | insn = Instruction.decode(binascii.unhexlify(parsed_line.opcode)) 210 | assert compare_mnem(insn.mnem, parsed_line.mnem) 211 | 212 | addr = int(parsed_line.addr, 16) 213 | disass_text = tokens_to_text(disassemble_instruction(insn, addr)) 214 | 215 | expected_insn_text = (parsed_line.mnem + " " + parsed_line.rest).strip() 216 | 217 | assert compare_insn(expected_insn_text, disass_text) 218 | 219 | with bz2.open(os.path.join(DIR, "torture_test.dump.bz2"), "rt") as fp: 220 | lots_text_dump = fp.readlines() 221 | lots_data = parse_test_data(lots_text_dump) 222 | # lots_text_dump is a bunch of dumped disassembly, uniq'd on the mnem for 223 | # brevity 224 | @pytest.mark.parametrize("parsed_line", lots_data) 225 | def test_lots_text_dump(parsed_line): 226 | insn = Instruction.decode(binascii.unhexlify(parsed_line.opcode)) 227 | assert compare_mnem(insn.mnem, parsed_line.mnem) 228 | 229 | addr = int(parsed_line.addr, 16) 230 | disass_text = tokens_to_text(disassemble_instruction(insn, addr)) 231 | 232 | expected_insn_text = (parsed_line.mnem + " " + parsed_line.rest).strip() 233 | 234 | assert compare_insn(expected_insn_text, disass_text) 235 | 236 | with bz2.open( os.path.join(DIR, "esp32_torture_test.dump.bz2"), "rt") as fp: 237 | esp32_lots_text_dump = fp.readlines() 238 | esp32_lots_data = parse_test_data(esp32_lots_text_dump) 239 | # lots_text_dump is a bunch of dumped disassembly, uniq'd on the mnem for 240 | # brevity 241 | @pytest.mark.parametrize("esp32_parsed_line", esp32_lots_data) 242 | def test_lots_text_dump(esp32_parsed_line): 243 | if esp32_parsed_line.mnem in ['rer', 'wer']: 244 | # I disagree with objdump here; the manual states that these insns take 245 | # arguments; objdump doesn't appear to think so? Also possible my 246 | # cleanup of the output broke the objdump results? 247 | pytest.xfail() 248 | insn = Instruction.decode(binascii.unhexlify(esp32_parsed_line.opcode)) 249 | assert compare_mnem(insn.mnem, esp32_parsed_line.mnem) 250 | 251 | addr = int(esp32_parsed_line.addr, 16) 252 | disass_text = tokens_to_text(disassemble_instruction(insn, addr)) 253 | 254 | expected_insn_text = (esp32_parsed_line.mnem + " " + 255 | esp32_parsed_line.rest).strip() 256 | 257 | assert compare_insn(expected_insn_text, disass_text) 258 | 259 | def test_rotw_positive(): 260 | rotw_insn = binascii.unhexlify("208040") # ROTW 2 261 | insn = Instruction.decode(rotw_insn) 262 | assert compare_mnem(insn.mnem, "ROTW") 263 | assert insn.rotw_simm4() == 2 264 | disass_text = tokens_to_text(disassemble_instruction(insn, 0x1000)) 265 | assert compare_insn(disass_text, "ROTW 2") 266 | 267 | def test_rotw_negative(): 268 | rotw_insn = binascii.unhexlify("f08040") # ROTW -1 269 | insn = Instruction.decode(rotw_insn) 270 | assert compare_mnem(insn.mnem, "ROTW") 271 | assert insn.rotw_simm4() == -1 272 | disass_text = tokens_to_text(disassemble_instruction(insn, 0x1000)) 273 | assert compare_insn(disass_text, "ROTW -1") 274 | 275 | # We didn't have any tests for the FPU, which lead to an undetected typo 276 | def test_mov_s_fpu(): 277 | movs_insn = binascii.unhexlify("0012fa") 278 | insn = Instruction.decode(movs_insn) 279 | assert compare_mnem(insn.mnem, "MOV.S") 280 | # Disassembly support does not yet exist 281 | #disass_text = tokens_to_text(disassemble_instruction(insn, 0x1000)) 282 | #assert compare_insn(disass_text, "MOV.S f1, f2") 283 | 284 | -------------------------------------------------------------------------------- /binja_xtensa/firmware_parser.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | """ 3 | ESP8266 firmware parser 4 | 5 | Very hacky at the moment. This logic is based on a quick reading of the 6 | following sources: 7 | * https://github.com/espressif/esptool/wiki/Firmware-Image-Format 8 | * https://richard.burtons.org/2015/05/17/decompiling-the-esp8266-boot-loader-v1-3b3/ 9 | * https://boredpentester.com/reversing-esp8266-firmware-part-3/ (that whole 10 | series really) 11 | 12 | These firmware dumps seem to contain multiple binaries. So we have a rudimentary 13 | heuristic to find a couple binaries, which we pass back in a list to the 14 | binaryview to present to the user as options. 15 | """ 16 | 17 | import binascii 18 | import struct 19 | 20 | from binaryninja import BinaryViewType 21 | from binaryninja.enums import SegmentFlag 22 | 23 | class InvalidFormat(Exception): 24 | pass 25 | 26 | class ESPSegment: 27 | header_fmt = " 0x1000: 246 | return 247 | try: 248 | f2 = EAFile.parse(bv, 0x1000) 249 | firmware_options.append(f2) 250 | except InvalidFormat: 251 | print("Could not find following EAFile") 252 | 253 | try: 254 | f3 = E9File.parse(bv, 0x1000) 255 | firmware_options.append(f3) 256 | except InvalidFormat: 257 | print("Could not find following E9File") 258 | 259 | next_addr = firmware_options[-1].bv_offset + firmware_options[-1].outer_size 260 | if (next_addr < bv.end): 261 | firmware_options.append(AppendedData.parse(bv, next_addr)) 262 | 263 | return firmware_options 264 | 265 | def main(): 266 | TEST_FIRMWARE = "" 267 | bv = BinaryViewType['Raw'].open(TEST_FIRMWARE) 268 | if not bv: 269 | print("Could not open bv") 270 | return 271 | print() 272 | print() 273 | data = parse_firmware(bv) 274 | print(data) 275 | 276 | if __name__ == '__main__': 277 | main() 278 | -------------------------------------------------------------------------------- /binja_xtensa/known_symbols.py: -------------------------------------------------------------------------------- 1 | known_symbols = {"1073767424": "rom_iq_est_disable", "1073772548": "rom_phy_reset_req", "1073764360": "sip_send", "1073741840": "_DebugExceptionVector", "1073743892": "cmd_parse", "1073780760": "MD5Init", "1073756188": "UartRegReadProc", "1073741856": "_NMIExceptionVector", "1073794396": "__truncdfsf2", "1073768496": "rom_set_noise_floor", "1073780788": "MD5Update", "1073756220": "uartAttach", "1073788992": "pbkdf2_sha1", "1073774668": "rom_rfcal_rxiq", "1073741904": "_UserExceptionVector", "1073771536": "rom_pbus_enter_debugmode", "1073764452": "sip_to_host_chain_append", "1073766504": "slc_set_host_io_max_window", "1073752176": "rtc_enter_sleep", "1073768564": "rom_start_noisefloor", "1073760376": "SPIUnlock", "1073766420": "slc_pause_from_host", "1073762428": "lldesc_set_owner", "1073741952": "_ResetVector", "1073743212": "_xtos_restore_intlevel", "1073766540": "slc_init_credit", "1073764036": "sip_post_init", "1073791136": "strncpy", "1073748132": "ets_memset", "1073764520": "sip_get_ptr", "1073772572": "rom_restart_cal", "1073766572": "slc_add_credits", "1073751752": "dtm_set_intr_mask", "1073789108": "hmac_sha1_vector", "1073776824": "rom_rfcal_txiq_cover", "1073766592": "rom_abs_temp", "1073748164": "ets_memmove", "1073753804": "ets_delay_us", "1073766608": "rom_chip_v5_disable_cca", "1073748180": "ets_memcmp", "1073772764": "rom_write_rfpll_sdm", "1073790796": "strlen", "1073760492": "SPIReadModeCnfig", "1073793264": "__muldf3", "1073769172": "rom_tx_mac_enable", "1073762556": "sip_post", "1073780992": "MD5Final", "1073762688": "sip_alloc_to_host_evt", "1073761324": "SPIParamCfg", "1073766668": "rom_chip_v5_sense_backoff", "1073771224": "rom_i2c_writeReg", "1073788292": "SHA1Init", "1073770780": "rom_chip_v5_rx_init", "1073767472": "rom_iq_est_enable", "1073756452": "uart_baudrate_detect", "1073780296": "base64_decode", "1073770892": "rom_chip_v5_tx_init", "1073754456": "ets_wdt_restore", "1073766748": "rom_dc_iq_est", "1073783136": "hmac_md5_vector", "1073743248": "_xtos_cause3_handler", "1073772904": "rom_rfpll_set_freq", "1073751656": "rtc_set_sleep_mode", "1073754480": "ets_wdt_init", "1073752808": "ets_bzero", "1073791360": "__addsf3", "1073760644": "SPIEraseChip", "1073768844": "rom_stop_tx_tone", "1073799568": "memset", "1073751180": "ets_install_putc2", "1073748376": "mem_init", "1073753160": "ets_timer_setfn", "1073754548": "roundup2", "1073766840": "rom_en_pwdet", "1073754560": "multofup", "1073755552": "FilePacketSendReqMsgProc", "1073799244": "memmove", "1073765776": "slc_enable", "1073790372": "_xtos_ints_off", "1073748448": "mem_free", "1073751120": "ets_install_external_printf", "1073772284": "rom_pbus_xpd_tx_off", "1073752556": "rtc_intr_handler", "1073788328": "SHA1Update", "1073742932": "_xtos_set_exception_handler", "1073760768": "SPIEraseSector", "1073744388": "remove_head_space", "1073766460": "slc_resume_from_host", "1073756692": "Uart_Init", "1073799708": "__udivsi3", "1073773096": "rom_cal_tos_v50", "1073754672": "UartConnCheck", "1073762868": "sip_to_host_evt_send_done", "1073766968": "rom_get_bb_atten", "1073760692": "SPIEraseBlock", "1073752640": "ets_rtc_int_register", "1073789512": "sha1_prf", "1073760844": "SPIWrite", "1073744480": "convert_para_str", "1073769232": "rom_ana_inf_gating_en", "1073775204": "rom_rfcal_rxiq_set_reg", "1073791592": "__subsf3", "1073777264": "rom_rfcal_txiq_set_reg", "1073772648": "rom_rfpll_reset", "1073745344": "ets_set_idle_cb", "1073752712": "ets_strcpy", "1073777772": "rom_set_txbb_atten", "1073752728": "ets_strncpy", "1073771164": "rom_i2c_readReg_Mask", "1073741936": "_DoubleExceptionVector", "1073799844": "__floatunsisf", "1073752744": "ets_strcmp", "1073799852": "__floatsisf", "1073729776": "rcons", "1073752760": "ets_strncmp", "1073792312": "__adddf3", "1073763008": "sip_reclaim_tx_data_pkt", "1073752776": "ets_strlen", "1073783500": "hmac_md5", "1073779412": "aes_decrypt", "1073752792": "ets_strstr", "1073799972": "xthal_window_spill", "1073767132": "rom_get_fm_sar_dout", "1073745360": "ets_task", "1073779428": "aes_decrypt_deinit", "1073799912": "__floatunsidf", "1073763944": "sip_get_state", "1073783532": "sha1_vector", "1073799920": "__floatsidf", "1073752824": "ets_str2macaddr", "1073779456": "rijndaelKeySetupEnc", "1073769224": "phy_get_romfuncs", "1073771276": "rom_i2c_writeReg_Mask", "1073798384": "__umulsidi3", "1073795856": "__udivdi3", "1073761052": "SPIRead", "1073799968": "xthal_window_spill_nw", "1073744676": "conv_str_decimal", "1073799976": "_rom_store_table", "1073756976": "uart_tx_one_char", "1073789756": "wepkey_64", "1073748800": "mem_malloc", "1073799648": "strstr", "1073761092": "SPIEraseArea", "1073742988": "_xtos_l1int_handler", "1073763148": "sip_reclaim_from_host_cmd", "1073757028": "uart_rx_one_char_block", "1073754984": "UartDwnLdProc", "1073777516": "rom_rxiq_cover_mg_mp", "1073752948": "ets_char2xdigit", "1073771388": "rom_pbus_debugmode", "1073762112": "lldesc_build_chain", "1073775496": "rom_rfcal_txcap", "1073757068": "uart_rx_one_char", "1073767316": "rom_get_noisefloor", "1073767344": "rom_get_power_db", "1073757116": "uart_rx_intr_handler", "1073761360": "gpio_init", "1073771112": "rom_i2c_readReg", "1073753036": "ets_getc", "1073741988": "_ResetHandler", "1073791964": "__mulsf3", "1073798112": "_xtos_alloca_handler", "1073798116": "_xtos_syscall_handler", "1073762384": "lldesc_num2link", "1073773552": "rom_pbus_dco___SA2", "1073760424": "SPILock", "1073798136": "_xtos_p_none", "1073794232": "__fixdfsi", "1073798140": "_xtos_set_intlevel", "1073759232": "SPI_write_status", "1073779728": "aes_unwrap", "1073798168": "_xtos_set_min_intlevel", "1073765404": "slc_reattach", "1073749036": "mem_calloc", "1073757232": "UartRxString", "1073751096": "ets_install_uart_printf", "1073768628": "rom_start_tx_tone", "1073759292": "SPI_write_enable", "1073790016": "wepkey_128", "1073798212": "_xtos_unhandled_exception", "1073771592": "rom_pbus_exit_debugmode", "1073763404": "sip_install_rx_ctrl_cb", "1073769552": "rom_set_channel_freq", "1073749176": "eprintf_init_buf", "1073798228": "_xtos_return_from_exc", "1073749080": "mem_zalloc", "1073783652": "SHA1Transform", "1073763420": "sip_install_rx_data_cb", "1073776144": "rom_rfcal_txiq", "1073753188": "timer_insert", "1073749100": "mem_realloc", "1073748500": "mem_trim", "1073771644": "rom_pbus_force_test", "1073757312": "send_packet", "1073767556": "rom_linear_to_db", "1073798280": "__divsi3", "1073759372": "Wait_SPI_Idle", "1073751188": "est_get_printf_buf_remain_len", "1073792196": "__fixunssfsi", "1073751196": "est_reset_printf_buf_len", "1073768988": "rom_txtone_linear_pwr", "1073730816": "Td4s", "1073755844": "MemDwnLdStartMsgProc", "1073745080": "conv_str_hex", "1073759424": "Enable_QMode", "1073753284": "ets_timer_arm", "1073751244": "ets_printf", "1073761488": "gpio_output_set", "1073733136": "UartDev", "1073771736": "rom_pbus_rd", "1073761520": "gpio_input_get", "1073767976": "rom_rxiq_get_mis", "1073757428": "SendMsg", "1073779964": "base64_encode", "1073794304": "__fixunsdfsi", "1073756324": "uart_buff_switch", "1073761540": "gpio_register_set", "1073757448": "recv_packet", "1073767692": "rom_set_txclk_en", "1073749268": "eprintf", "1073790248": "_xtos_set_interrupt_handler_arg", "1073800072": "_rom_store", "1073777972": "rom_set_txiq_cal", "1073798456": "xthal_get_ccount", "1073798464": "xthal_set_ccompare", "1073751364": "ets_uart_printf", "1073749320": "eprintf_to_host", "1073771852": "rom_pbus_set_rxgain", "1073742892": "_start", "1073767760": "rom_set_rxclk_en", "1073798488": "xthal_get_intread", "1073761628": "gpio_register_get", "1073798496": "xthal_set_intclear", "1073798504": "rc4_skip", "1073757896": "uart_rx_readbuff", "1073790320": "_xtos_set_interrupt_handler", "1073743220": "_xtos_set_vpri", "1073751416": "ets_external_printf", "1073793940": "__divdf3", "1073753472": "ets_timer_done", "1073790340": "_xtos_ints_on", "1073765456": "slc_init_attach", "1073761672": "gpio_intr_pending", "1073766636": "rom_chip_v5_enable_cca", "1073761680": "gpio_pin_intr_state_set", "1073743256": "_xtos_c_wrapper_handler", "1073749408": "ets_write_char", "1073728752": "Te0", "1073767844": "rom_mhz2ieee", "1073741872": "_KernelExceptionVector", "1073753512": "ets_timer_handler_isr", "1073754352": "ets_wdt_disable", "1073772104": "rom_pbus_workmode", "1073765816": "slc_select_tohost_gpio_mode", "1073765824": "slc_select_tohost_gpio", "1073772448": "rom_pbus_xpd_tx_on__low_gain", "1073790408": "strcmp", "1073761740": "gpio_intr_ack", "1073778128": "rijndaelKeySetupDec", "1073751520": "rtc_get_reset_reason", "1073748148": "ets_memcpy", "1073765860": "slc_send_to_host_chain", "1073798204": "_xtos_unhandled_interrupt", "1073743344": "srand", "1073745832": "ets_isr_unmask", "1073794556": "__extendsfdf2", "1073743360": "rand", "1073729792": "Td0", "1073745412": "ets_run", "1073751980": "save_tsf_us", "1073772048": "rom_pbus_set_txgain", "1073745444": "ets_post", "1073761832": "gpio_intr_handler_register", "1073767008": "rom_get_corr_power", "1073759812": "spi_flash_attach", "1073788488": "SHA1Final", "1073751628": "software_reset", "1073751084": "ets_install_putc1", "1073743440": "__muldi3", "1073755736": "FlashDwnLdStopReqMsgProc", "1073794656": "__divdi3", "1073753704": "ets_timer_init", "1073799784": "__umodsi3", "1073759864": "Cache_Read_Enable", "1073763964": "sip_init_attach", "1073756064": "UartConnectProc", "1073798788": "bzero", "1073789480": "hmac_sha1", "1073743496": "xthal_bcopy", "1073755788": "FlashDwnLdParamCfgMsgProc", "1073761936": "gpio_pin_wakeup_enable", "1073766036": "slc_from_host_chain_recycle", "1073751708": "dtm_params_init", "1073778340": "aes_decrypt_init", "1073798824": "memcmp", "1073744156": "get_first_seg", "1073757868": "RcvMsg", "1073759176": "SPI_read_status", "1073774260": "rom_rfcal_pwrctrl", "1073743556": "xthal_memcpy", "1073790664": "strcpy", "1073772236": "rom_pbus_xpd_rx_on", "1073751760": "dtm_get_intr_mask", "1073762004": "gpio_pin_wakeup_disable", "1073751772": "dtm_set_params", "1073755888": "MemPacketSendReqMsgProc", "1073757940": "UartGetCmdLn", "1073762044": "gpio_intr_test", "1073749760": "ets_vprintf", "1073753860": "ets_update_cpu_frequency", "1073753868": "ets_get_cpu_frequency", "1073766160": "slc_to_host_chain_recycle", "1073727252": "flashchip", "1073766180": "slc_from_host_chain_fetch", "1073792648": "__subdf3", "1073753908": "ets_wdt_get_mode", "1073768248": "rom_sar_init", "1073772352": "rom_pbus_xpd_tx_on", "1073798984": "memcpy", "1073758028": "GetUartDevice", "1073755448": "FlashDwnLdStartMsgProc", "1073758040": "SelectSpiFunction", "1073796976": "__umoddi3", "1073753064": "ets_putc", "1073745780": "ets_intr_lock", "1073756028": "MemDwnLdStopReqMsgProc", "1073745792": "ets_intr_unlock", "1073753408": "ets_timer_disarm", "1073770372": "rom_chip_50_set_channel", "1073745800": "ets_isr_attach", "1073768332": "rom_set_ana_inf_tx_scale", "1073769112": "rom_tx_mac_disable", "1073745816": "ets_isr_mask", "1073754016": "ets_wdt_enable", "1073751972": "save_rxbcn_mactime", "1073790888": "strncmp", "1073772168": "rom_pbus_xpd_rx_off", "1073780652": "md5_vector", "1073751992": "ets_enter_sleep", "1073745852": "ets_set_user_start", "1073768392": "rom_set_loopback_gain", "1073798476": "xthal_get_ccompare", "1073743692": "xthal_copy123", "1073756076": "UartRegWriteProc", "1073745900": "main", "1073760240": "Cache_Read_Disable"} 2 | -------------------------------------------------------------------------------- /binja_xtensa/disassembly.py: -------------------------------------------------------------------------------- 1 | """ 2 | Xtensa disassembly rendering 3 | 4 | The idea is instruction.py handles instruction decoding, then to get 5 | human-readable disassembly, we call disassemble_instruction from this file. 6 | 7 | The lifter should *not* need the information in this file. If it does, move that 8 | computation into the decoder. 9 | """ 10 | from binaryninja import InstructionTextToken 11 | from binaryninja.enums import InstructionTextTokenType 12 | 13 | from .instruction import Instruction, InstructionType, sign_extend 14 | 15 | # Helpers to generate Binary Ninja InstructionTextTokens, since the names are 16 | # so long. We also do some cosmetic transformations of the encoded immediates 17 | # here. 18 | _MNEM_ALIGN = 8 19 | def _get_space(mnem_length): 20 | # Vertically align the first operand where possible 21 | spaces = 1 if mnem_length >= _MNEM_ALIGN else _MNEM_ALIGN - mnem_length 22 | return InstructionTextToken(InstructionTextTokenType.TextToken, 23 | " " * spaces) 24 | 25 | def _get_comma(): 26 | return InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ", ") 27 | 28 | def _get_reg_tok(reg_name): 29 | return InstructionTextToken(InstructionTextTokenType.RegisterToken, 30 | reg_name) 31 | 32 | def _get_imm8_tok(val): 33 | return InstructionTextToken(InstructionTextTokenType.IntegerToken, 34 | str(val), val, size=1) 35 | 36 | def _get_imm32_tok(val): 37 | return InstructionTextToken(InstructionTextTokenType.IntegerToken, 38 | str(val), val, size=4) 39 | 40 | def _get_imm4(insn, _): 41 | val = insn.imm4 42 | return _get_imm8_tok(val) 43 | 44 | def _get_imm8(insn, _): 45 | val = insn.imm8 46 | return _get_imm8_tok(val) 47 | 48 | def _get_simm8(insn, _): 49 | val = sign_extend(insn.imm8, 8) 50 | return _get_imm8_tok(val) 51 | 52 | def _get_simm8_s8(insn, _): 53 | val = sign_extend(insn.imm8, 8) 54 | val <<= 8 55 | return InstructionTextToken(InstructionTextTokenType.IntegerToken, 56 | str(val), val, size=4) 57 | 58 | def _get_rotw_simm4(insn, _): 59 | return _get_imm8_tok(insn.rotw_simm4()) 60 | 61 | def _get_addi_n_imm(insn, _): 62 | val = insn.inline0(_) 63 | return InstructionTextToken(InstructionTextTokenType.IntegerToken, 64 | str(val), val, size=4) 65 | 66 | def _get_possible_address_token(addr): 67 | return InstructionTextToken(InstructionTextTokenType.PossibleAddressToken, 68 | hex(addr)[2:], addr, size=4) 69 | def _get_target_offset(insn, addr): 70 | val = insn.target_offset(addr) 71 | return _get_possible_address_token(val) 72 | 73 | def _get_mem_offset(insn, addr): 74 | val = insn.mem_offset(addr) 75 | return _get_possible_address_token(val) 76 | 77 | def _get_b4const(insn, _): 78 | val = insn.b4const() 79 | return InstructionTextToken(InstructionTextTokenType.IntegerToken, 80 | str(val), val, size=4) 81 | 82 | def _get_b4constu(insn, _): 83 | val = insn.b4constu() 84 | return InstructionTextToken(InstructionTextTokenType.IntegerToken, 85 | str(val), val, size=4) 86 | 87 | # I wanted the mechanical instruction -> disassembly process to be as easy to 88 | # write as possible. Thus, it's structured so I can take the example instruction 89 | # out of the manual and type it in here with slight modification, and it'll 90 | # mostly work. Then I just have to check for nonobvious differences and move on 91 | # to the next instruction. 92 | 93 | # This table defines the logic that backs up each of those things from the 94 | # manual. 95 | 96 | # each of these should return a binja InstructionTextToken 97 | _disassembly_fmts = { 98 | "ar": lambda insn, _: _get_reg_tok("a" + str(insn.r)), 99 | "as": lambda insn, _: _get_reg_tok("a" + str(insn.s)), 100 | "at": lambda insn, _: _get_reg_tok("a" + str(insn.t)), 101 | 102 | "fr": lambda insn, _: _get_reg_tok("f" + str(insn.r)), 103 | "fs": lambda insn, _: _get_reg_tok("f" + str(insn.s)), 104 | "ft": lambda insn, _: _get_reg_tok("f" + str(insn.t)), 105 | 106 | "bt": lambda insn, _: _get_reg_tok("b" + str(insn.t)), 107 | "bs": lambda insn, _: _get_reg_tok("b" + str(insn.s)), 108 | "br": lambda insn, _: _get_reg_tok("b" + str(insn.r)), 109 | 110 | "s": lambda insn, _: _get_imm8_tok(insn.s), 111 | "t": lambda insn, _: _get_imm8_tok(insn.t), 112 | 113 | "imm4": _get_imm4, 114 | 115 | "imm8": _get_imm8, 116 | "simm8": _get_simm8, 117 | "simm8_s8": _get_simm8_s8, # simm8 shifted left by 8 118 | 119 | "rotw_simm4": _get_rotw_simm4, 120 | 121 | "target_offset": _get_target_offset, 122 | "mem_offset": _get_mem_offset, 123 | 124 | "b4const": _get_b4const, 125 | "b4constu": _get_b4constu, 126 | 127 | # Oddball 128 | # Probably should have been an inline0... but I hadn't hacked that in yet 129 | # when I dealt with ADDI.N 130 | "addi_n_imm": _get_addi_n_imm, 131 | } 132 | def _dis(fmt_str, *args): 133 | """Helper to create disassembly functions for different formats 134 | 135 | See below to see how it's used. 136 | """ 137 | def inner(insn, addr): 138 | fmts = fmt_str.split() 139 | tokens = [] 140 | tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, 141 | insn.mnem)) 142 | tokens.append(_get_space(len(insn.mnem))) 143 | for idx, fmt in enumerate(fmts): 144 | if idx > 0: 145 | tokens.append(_get_comma()) 146 | 147 | # For one-off encodings, I wanted a way to specify that in the _dis 148 | # invocation for the instruction. These "inline" encodings are 149 | # similar to the ones in the decoder, but they're distinct at a 150 | # programmatic level; they just share a name and are used together 151 | # :) 152 | if fmt.startswith("inline"): 153 | tok_idx = int(fmt[len("inline"):]) 154 | try: 155 | token_func = args[tok_idx] 156 | except IndexError: 157 | token_func = getattr(insn, fmt) 158 | else: 159 | token_func = _disassembly_fmts[fmt] 160 | 161 | tokens.append(token_func(insn, addr)) 162 | return tokens 163 | return inner 164 | 165 | def disassemble_instruction(insn, addr): 166 | """Return Binary Ninja InstructionTextTokens for instruction 167 | 168 | So to disassemble an instruction, we call Instruction.decode with the bytes, 169 | then we call disassemble_instruction with the returned instruction and the 170 | address it's loaded at. 171 | """ 172 | func = None 173 | try: 174 | func = globals()["_disassemble_" + insn.mnem.replace(".", "_")] 175 | except KeyError: 176 | pass 177 | 178 | if func: 179 | return func(insn, addr) 180 | if insn.instruction_type == InstructionType.RRR: 181 | return _disassemble_rrr(insn, addr) 182 | elif insn.instruction_type == InstructionType.RRRN: 183 | return _disassemble_rrrn(insn, addr) 184 | elif insn.instruction_type == InstructionType.RRI8: 185 | return _disassemble_rri8(insn, addr) 186 | else: 187 | # Fallback for when we don't have a fallback for a particular 188 | # instruction type. 189 | # If I had to rewrite this, I'd remove the type-fallbacks and just show 190 | # a warning in fallback cases, as we do here. 191 | text = [] 192 | text.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, 193 | insn.mnem)) 194 | text.append(_get_space(len(insn.mnem))) 195 | text.append(InstructionTextToken(InstructionTextTokenType.TextToken, 196 | "unimplemented_disass")) 197 | return text 198 | 199 | def tokens_to_text(token_list): 200 | """Convert a list of binja tokens to plain text 201 | 202 | Mostly useful for testing 203 | """ 204 | for tok in token_list: 205 | assert tok.value is not None 206 | return ''.join([tok.text for tok in token_list]) 207 | 208 | def _disassemble_RSR(insn, addr): 209 | mnem = insn.mnem + "." + insn.get_sr_name() 210 | tokens = [] 211 | tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken, 212 | mnem)) 213 | tokens.append(_get_space(len(mnem))) 214 | fmts = ["at"] 215 | for idx, fmt in enumerate(fmts): 216 | if idx > 0: 217 | tokens.append(_get_comma()) 218 | 219 | if fmt.startswith("inline"): 220 | tok_idx = int(fmt[len("inline"):]) 221 | token_func = args[tok_idx] 222 | else: 223 | token_func = _disassembly_fmts[fmt] 224 | 225 | tokens.append(token_func(insn, addr)) 226 | return tokens 227 | 228 | _disassemble_WSR = _disassemble_XSR = _disassemble_RSR 229 | 230 | # As I mentioned in the decoding code, instruction formats aren't too useful in 231 | # Xtensa... but we do fall back to these for a few simple instructions. It's 232 | # almost easier to list an instruction below than it is to verify the default is 233 | # correct. 234 | _disassemble_rrr = _dis("ar as at") 235 | _disassemble_rrrn = _dis("ar as at") 236 | _disassemble_rri8 = _dis("at as simm8") 237 | 238 | # Overrides for exceptions to the instruction type 239 | _disassemble_ABS = _dis("ar at") 240 | _disassemble_ABS_S = _dis("fr fs") 241 | _disassemble_ADD_S = _dis("fr fs ft") 242 | _disassemble_ADDI_N = _dis("ar as addi_n_imm") 243 | _disassemble_ADDMI = _dis("at as simm8_s8") 244 | _disassemble_ALL4 = _dis("bt bs") 245 | _disassemble_ALL8 = _dis("bt bs") 246 | _disassemble_ANDB = _dis("br bs bt") 247 | _disassemble_ANDBC = _dis("br bs bt") 248 | _disassemble_ANY4 = _dis("bt bs") 249 | _disassemble_ANY8 = _dis("bt bs") 250 | 251 | _disassemble_BALL = _dis("as at target_offset") 252 | _disassemble_BANY = _dis("as at target_offset") 253 | _disassemble_BBC = _dis("as at target_offset") 254 | _disassemble_BBCI = _dis("as inline0 target_offset", 255 | lambda insn, _: _get_imm8_tok(insn.inline0(_))) 256 | _disassemble_BBS = _dis("as at target_offset") 257 | 258 | _disassemble_BBSI = _dis("as inline0 target_offset", 259 | lambda insn, _: _get_imm8_tok(insn.inline0(_))) 260 | _disassemble_BEQ = _dis("as at target_offset") 261 | _disassemble_BEQI = _dis("as b4const target_offset") 262 | _disassemble_BEQZ = _dis("as target_offset") 263 | _disassemble_BEQZ_N = _dis("as target_offset") 264 | _disassemble_BF = _dis("bs target_offset") 265 | _disassemble_BGE = _dis("as at target_offset") 266 | _disassemble_BGEI = _dis("as b4const target_offset") 267 | _disassemble_BGEU = _dis("as at target_offset") 268 | _disassemble_BGEUI = _dis("as b4constu target_offset") 269 | _disassemble_BGEZ = _dis("as target_offset") 270 | _disassemble_BLT = _dis("as at target_offset") 271 | _disassemble_BLTI = _dis("as b4const target_offset") 272 | _disassemble_BLTU = _dis("as at target_offset") 273 | _disassemble_BLTUI = _dis("as b4constu target_offset") 274 | _disassemble_BLTZ = _dis("as target_offset") 275 | _disassemble_BNALL = _dis("as at target_offset") 276 | _disassemble_BNE = _dis("as at target_offset") 277 | _disassemble_BNEI = _dis("as b4const target_offset") 278 | _disassemble_BNEZ = _dis("as target_offset") 279 | _disassemble_BNEZ_N = _dis("as target_offset") 280 | _disassemble_BNONE = _dis("as at target_offset") 281 | 282 | _disassemble_BREAK = _dis("s t") 283 | _disassemble_BREAK_N = _dis("s") 284 | _disassemble_BT = _dis("bs target_offset") 285 | 286 | _disassemble_CALL0 = _dis("target_offset") 287 | _disassemble_CALL4 = _dis("target_offset") 288 | _disassemble_CALL8 = _dis("target_offset") 289 | _disassemble_CALL12 = _dis("target_offset") 290 | 291 | _disassemble_CALLX0 = _dis("as") 292 | _disassemble_CALLX4 = _dis("as") 293 | _disassemble_CALLX8 = _dis("as") 294 | _disassemble_CALLX12 = _dis("as") 295 | 296 | _disassemble_CEIL_S = _dis("ar fs t") 297 | # Skipping CLAMPS, I don't care about floats 298 | # Skipping DHI, DHU, DHWB, DHWBI, DII, DIU, DIWB, DIWBI, DPFL, DPFR, DPFRO, 299 | # DPFW, DPFWO, they deal with data caching, which is an extension 300 | _disassemble_DSYNC = _dis("") # Just the mnem 301 | _disassemble_ENTRY = _dis("as inline0", 302 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 303 | _disassemble_ESYNC = _dis("") # Just the mnem 304 | _disassemble_EXCW = _dis("") # Just the mnem 305 | _disassemble_EXTUI = _dis("ar at inline0 inline1", 306 | lambda insn, _: _get_imm8_tok(insn.extui_shiftimm()), 307 | lambda insn, _: _get_imm8_tok(insn.inline1(_))) 308 | _disassemble_EXTW = _dis("") 309 | # Skipping float stuff 310 | _disassemble_IDTLB = _dis("as") 311 | # Skipping IHI, IHU, III 312 | _disassemble_IITLB = _dis("as") 313 | # Skipping IIU 314 | _disassemble_ILL = _dis("") 315 | _disassemble_ILL_N = _dis("") 316 | # Skipping IPF, IPFL 317 | _disassemble_ISYNC = _dis("") 318 | _disassemble_J = _dis("target_offset") 319 | _disassemble_JX = _dis("as") 320 | _disassemble_L8UI = _dis("at as imm8") 321 | _disassemble_L16SI = _dis("at as inline0", 322 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 323 | _disassemble_L16UI = _dis("at as inline0", 324 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 325 | _disassemble_L32AI = _dis("at as inline0", 326 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 327 | # Skipping windowed L32E 328 | _disassemble_L32I = _dis("at as inline0", 329 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 330 | _disassemble_L32I_N = _dis("at as inline0", 331 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 332 | _disassemble_L32R = _dis("at mem_offset") 333 | # Skipping LDCT 334 | # Skipping LDDEC,LDINC; they're MAC16 335 | # Skipping LICT, LICW, instruction cache option 336 | # Skipping LOOP, LOOPGTZ, LOOPNEZ, loop option 337 | # Skipping LSI, LSIU, LSX, LSXU, MADD_S (floats) 338 | _disassemble_MEMW = _dis("") 339 | _disassemble_MOVI = _dis("at inline0", 340 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 341 | _disassemble_MOVI_N = _dis("as inline0", 342 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 343 | _disassemble_MOV_N = _dis("at as") 344 | _disassemble_MOVSP = _dis("at as") 345 | _disassemble_NEG = _dis("ar at") 346 | _disassemble_NOP = _dis("") 347 | _disassemble_NOP_N = _dis("") 348 | _disassemble_NSA = _dis("at as") 349 | _disassemble_NSAU = _dis("at as") 350 | _disassemble_PDTLB = _dis("at as") 351 | _disassemble_PITLB = _dis("at as") 352 | _disassemble_RDTLB0 = _dis("at as") 353 | _disassemble_RDTLB1 = _dis("at as") 354 | _disassemble_RER = _dis("at as") 355 | _disassemble_RET = _dis("") # Equivalent in function to "JX a0" 356 | _disassemble_RET_N = _dis("") # Same function as RET 357 | _disassemble_RETW = _dis("") 358 | _disassemble_RETW_N = _dis("") 359 | _disassemble_RFDD = _dis("") 360 | _disassemble_RFDE = _dis("") 361 | _disassemble_RFDO = _dis("") 362 | _disassemble_RFWO = _dis("") 363 | _disassemble_RFWU = _dis("") 364 | _disassemble_RFE = _dis("") 365 | _disassemble_RFI = _dis("s") 366 | _disassemble_RITLB0 = _dis("at as") 367 | _disassemble_RITLB1 = _dis("at as") 368 | _disassemble_ROTW = _dis("rotw_simm4") 369 | _disassemble_RSIL = _dis("at s") 370 | _disassemble_RSYNC = _dis("") 371 | 372 | _disassemble_S8I = _dis("at as imm8") 373 | _disassemble_S16I = _dis("at as inline0", 374 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 375 | _disassemble_S32I = _dis("at as inline0", 376 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 377 | _disassemble_S32I_N = _dis("at as inline0", 378 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 379 | _disassemble_S32RI = _dis("at as inline0", 380 | lambda insn, _: _get_imm32_tok(insn.inline0(_))) 381 | _disassemble_SEXT = _dis("ar as inline0", 382 | lambda insn, _: _get_imm8_tok(insn.t + 7)) 383 | _disassemble_SIMCALL = _dis("") 384 | _disassemble_SLL = _dis("ar as") 385 | _disassemble_SLLI = _dis("ar as inline0", 386 | lambda insn, _: _get_imm8_tok(insn.inline0(_))) 387 | _disassemble_SRA = _dis("ar at") 388 | _disassemble_SRAI = _dis("ar at inline0", 389 | lambda insn, _: _get_imm8_tok(insn.inline0(_))) 390 | _disassemble_SRL = _dis("ar at") 391 | _disassemble_SRLI = _dis("ar at s") 392 | _disassemble_SSA8B = _dis("as") 393 | _disassemble_SSA8L = _dis("as") 394 | _disassemble_SSAI = _dis("inline0", 395 | lambda insn, _: _get_imm8_tok(insn.inline0(_))) 396 | _disassemble_SSL = _dis("as") 397 | _disassemble_SSR = _dis("as") 398 | _disassemble_SYSCALL = _dis("") 399 | _disassemble_WAITI = _dis("s") 400 | _disassemble_WDTLB = _dis("at as") 401 | _disassemble_WER = _dis("at as") 402 | _disassemble_WITLB = _dis("at as") 403 | _disassemble_WER = _dis("at as") 404 | # _disassemble_WUR = _dis("at sr") # sr not yet handled 405 | -------------------------------------------------------------------------------- /binja_xtensa/lifter.py: -------------------------------------------------------------------------------- 1 | """ 2 | Xtensa lifting to BNIL 3 | 4 | Here we provide a `lift` function that takes a decoded instruction and an 5 | address where that instruction is, and we return BNIL. 6 | """ 7 | from binaryninja import Architecture, LowLevelILLabel 8 | 9 | from .instruction import sign_extend 10 | 11 | def _reg_name(insn, fmt): 12 | """Get the concrete register for a particular part of an instruction 13 | 14 | For example, if the docs say an instruction writes to "as", we call this 15 | function, which will check the `s` decoded control signal (say it's "7") and 16 | return "a7" for passing to BNIL. 17 | """ 18 | if fmt.startswith("a"): 19 | rest = fmt[1:] 20 | val = getattr(insn, rest, None) 21 | if val is None: 22 | raise Exception("Could not find property " + fmt) 23 | return "a" + str(val) 24 | else: 25 | # When we lift boolean instructions, we'll need to add support for "b" 26 | # registers, etc. 27 | raise Exception("Unimplemented reg name fmt: " + fmt) 28 | 29 | def lift(insn, addr, il): 30 | """Dispatch function for lifting 31 | 32 | Looks up _lift_MNEM() in the current global namespace (I think that's just 33 | the module level?) and calls it if it exists, otherwise we say the 34 | instruction is unimplemented. 35 | """ 36 | try: 37 | # We replace the "." in mnemonics with a "_", as we do in several other 38 | # places in the code. 39 | # At some point, this should become a property of the Instruction. 40 | func = globals()["_lift_" + insn.mnem.replace(".", "_")] 41 | except KeyError: 42 | il.append(il.unimplemented()) 43 | return insn.length 44 | 45 | return func(insn, addr, il) 46 | 47 | # Helpers for some shared code between instructions 48 | 49 | def _lift_cond(cond, insn, addr, il): 50 | """Helper for lifting conditional jumps 51 | 52 | We pass in an IL condition (LowLevelILExpr) and this function lifts a IL 53 | conditional that will jump to `insn.target_offset(addr)` if the condition is 54 | true, otherwise we continue to the next instruction. 55 | """ 56 | true_label = il.get_label_for_address(Architecture['xtensa'], 57 | insn.target_offset(addr)) 58 | false_label = il.get_label_for_address(Architecture['xtensa'], 59 | addr + insn.length) 60 | must_mark_true = False 61 | if true_label is None: 62 | true_label = LowLevelILLabel() 63 | must_mark_true = True 64 | 65 | must_mark_false = False 66 | if false_label is None: 67 | false_label = LowLevelILLabel() 68 | must_mark_false = True 69 | 70 | il.append( 71 | il.if_expr(cond, 72 | true_label, 73 | false_label 74 | )) 75 | if must_mark_true: 76 | il.mark_label(true_label) 77 | il.append(il.jump(il.const(4, insn.target_offset(addr)))) 78 | if must_mark_false: 79 | il.mark_label(false_label) 80 | il.append(il.jump(il.const(4, addr + insn.length))) 81 | return insn.length 82 | 83 | def _lift_cmov(cond, insn, addr, il): 84 | """Helper for lifting conditional moves 85 | 86 | We pass in an IL condition (LowLevelILExpr) and this function lifts a move 87 | from as to ar if the condition is true. In either case we then continue with 88 | the next instruction after the (potential) move. 89 | """ 90 | true_label = LowLevelILLabel() 91 | false_label = LowLevelILLabel() 92 | il.append(il.if_expr(cond, true_label, false_label)) 93 | il.mark_label(true_label) 94 | il.append(il.set_reg(4, _reg_name(insn, "ar"), 95 | il.reg(4, _reg_name(insn, "as")))) 96 | il.mark_label(false_label) 97 | return insn.length 98 | 99 | def _lift_addx(x_bits, insn, addr, il): 100 | """Helper for ADDX2, ADDX4, ADDX8""" 101 | il.append( 102 | il.set_reg(4, _reg_name(insn, "ar"), 103 | il.add(4, 104 | il.shift_left(4, 105 | il.reg(4, _reg_name(insn, "as")), 106 | il.const(4, x_bits)), 107 | il.reg(4, _reg_name(insn, "at"))))) 108 | return insn.length 109 | 110 | def _lift_subx(x_bits, insn, addr, il): 111 | """Helper for SUBX2, SUBX4, SUBX8""" 112 | il.append( 113 | il.set_reg(4, _reg_name(insn, "ar"), 114 | il.sub(4, 115 | il.shift_left(4, 116 | il.reg(4, _reg_name(insn, "as")), 117 | il.const(4, x_bits)), 118 | il.reg(4, _reg_name(insn, "at"))))) 119 | return insn.length 120 | 121 | # From here on down, I lifted instructions in priority order of how much 122 | # analysis it would get me. So I started with branches and common math and 123 | # worked my way down the frequency list. 124 | 125 | def _lift_CALL0(insn, addr, il): 126 | dest = il.const(4, insn.target_offset(addr)) 127 | il.append( 128 | il.call(dest)) 129 | return insn.length 130 | 131 | def _lift_CALLX0(insn, addr, il): 132 | dest = il.reg(4, _reg_name(insn, "as")) 133 | il.append( 134 | il.call(dest)) 135 | return insn.length 136 | 137 | def _lift_RET(insn, addr, il): 138 | dest = il.reg(4, 'a0') 139 | il.append(il.ret(dest)) 140 | return insn.length 141 | 142 | _lift_RET_N = _lift_RET 143 | 144 | def _lift_L32I_N(insn, addr, il): 145 | _as = il.reg(4, _reg_name(insn, "as")) 146 | imm = il.const(4, insn.inline0(addr)) 147 | va = il.add(4, _as, imm) 148 | il.append( 149 | il.set_reg(4, _reg_name(insn, "at"), 150 | il.load(4, va) 151 | )) 152 | return insn.length 153 | 154 | def _lift_L32R(insn, addr, il): 155 | va = il.const(4, insn.mem_offset(addr)) 156 | il.append( 157 | il.set_reg(4, _reg_name(insn, "at"), 158 | il.load(4, va) 159 | )) 160 | return insn.length 161 | 162 | def _lift_S32I_N(insn, addr, il): 163 | _as = il.reg(4, _reg_name(insn, "as")) 164 | imm = il.const(4, insn.inline0(addr)) 165 | va = il.add(4, _as, imm) 166 | il.append( 167 | il.store(4, va, il.reg(4, "a" + str(insn.t)))) 168 | return insn.length 169 | 170 | def _lift_MOVI_N(insn, addr, il): 171 | il.append( 172 | il.set_reg(4, _reg_name(insn, "as"), 173 | il.const(4, insn.inline0(addr)) 174 | )) 175 | return insn.length 176 | 177 | def _lift_MOV_N(insn, addr, il): 178 | il.append( 179 | il.set_reg(4, _reg_name(insn, "at"), 180 | il.reg(4, _reg_name(insn, "as")) 181 | )) 182 | return insn.length 183 | 184 | def _lift_ADDI(insn, addr, il): 185 | il.append( 186 | il.set_reg(4, _reg_name(insn, "at"), 187 | il.add(4, 188 | il.reg(4, _reg_name(insn, "as")), 189 | il.const(4, insn.simm8()) 190 | ))) 191 | return insn.length 192 | 193 | def _lift_L8UI(insn, addr, il): 194 | va = il.add(4, 195 | il.reg(4, _reg_name(insn, "as")), 196 | il.const(4, insn.imm8)) 197 | il.append( 198 | il.set_reg(4, _reg_name(insn, "at"), 199 | il.zero_extend(4, 200 | il.load(1, va)))) 201 | return insn.length 202 | 203 | def _lift_S32I(insn, addr, il): 204 | va = il.add(4, 205 | il.reg(4, _reg_name(insn, "as")), 206 | il.const(4, insn.inline0(addr))) 207 | il.append( 208 | il.store(4, va, il.reg(4, _reg_name(insn, "at")))) 209 | return insn.length 210 | 211 | def _lift_L32I(insn, addr, il): 212 | va = il.add(4, 213 | il.reg(4, _reg_name(insn, "as")), 214 | il.const(4, insn.inline0(addr))) 215 | il.append(il.set_reg(4, _reg_name(insn, "at"), 216 | il.load(4, va))) 217 | return insn.length 218 | 219 | def _lift_L16SI(insn, addr, il): 220 | va = il.add(4, 221 | il.reg(4, _reg_name(insn, "as")), 222 | il.const(4, insn.inline0(addr))) 223 | il.append(il.set_reg(4, _reg_name(insn, "at"), 224 | il.sign_extend(4, il.load(2, va)))) 225 | return insn.length 226 | 227 | def _lift_L16UI(insn, addr, il): 228 | va = il.add(4, 229 | il.reg(4, _reg_name(insn, "as")), 230 | il.const(4, insn.inline0(addr))) 231 | il.append(il.set_reg(4, _reg_name(insn, "at"), 232 | il.zero_extend(4, il.load(2, va)))) 233 | return insn.length 234 | 235 | def _lift_J(insn, addr, il): 236 | il.append(il.jump(il.const(4, insn.target_offset(addr)))) 237 | return insn.length 238 | 239 | def _lift_CALLX0(insn, addr, il): 240 | il.append( 241 | il.call(il.reg(4, _reg_name(insn, "as")))) 242 | return insn.length 243 | 244 | def _lift_JX(insn, addr, il): 245 | il.append(il.jump(il.reg(4, _reg_name(insn, "as")))) 246 | return insn.length 247 | 248 | def _lift_S8I(insn, addr, il): 249 | il.append(il.store(1, il.add(4, 250 | il.reg(4, _reg_name(insn, "as")), 251 | il.const(4, insn.imm8)), 252 | il.low_part(1, il.reg(4, _reg_name(insn, "at"))))) 253 | return insn.length 254 | 255 | def _lift_MOVI(insn, addr, il): 256 | il.append(il.set_reg(4, _reg_name(insn, "at"), 257 | il.const(4, insn.inline0(addr)))) 258 | return insn.length 259 | 260 | def _lift_EXTUI(insn, addr, il): 261 | inp = il.reg(4, _reg_name(insn, "at")) 262 | 263 | mask = (2 ** insn.inline1(addr)) - 1 264 | mask_il = il.const(4, mask) 265 | 266 | shiftimm = insn.extui_shiftimm() 267 | if shiftimm: 268 | shift_il = il.const(1, shiftimm) 269 | shifted = il.logical_shift_right(4, inp, shift_il) 270 | anded = il.and_expr(4, shifted, mask_il) 271 | else: 272 | # If we don't have to shift (thus shiftimm should be 0), then don't emit 273 | # the IL for it 274 | anded = il.and_expr(4, inp, mask_il) 275 | 276 | il.append(il.set_reg(4, _reg_name(insn, "ar"), 277 | anded 278 | )) 279 | return insn.length 280 | 281 | def _lift_OR(insn, addr, il): 282 | il.append( 283 | il.set_reg(4, _reg_name(insn, "ar"), 284 | il.or_expr(4, 285 | il.reg(4, _reg_name(insn, "as")), 286 | il.reg(4, _reg_name(insn, "at")) 287 | ))) 288 | return insn.length 289 | 290 | def _lift_MEMW(insn, addr, il): 291 | il.append( 292 | il.intrinsic([], "memw", []) 293 | ) 294 | return insn.length 295 | 296 | def _lift_ADDI_N(insn, addr, il): 297 | il.append( 298 | il.set_reg(4, _reg_name(insn, "ar"), 299 | il.add(4, 300 | il.reg(4, _reg_name(insn, "as")), 301 | il.const(4, insn.inline0(addr)) 302 | ))) 303 | return insn.length 304 | 305 | def _lift_SLLI(insn, addr, il): 306 | il.append( 307 | il.set_reg(4, _reg_name(insn, "ar"), 308 | il.shift_left(4, 309 | il.reg(4, _reg_name(insn, "as")), 310 | il.const(1, insn.inline0(addr)) 311 | ))) 312 | return insn.length 313 | 314 | def _lift_ADD_N(insn, addr, il): 315 | il.append( 316 | il.set_reg(4, _reg_name(insn, "ar"), 317 | il.add(4, 318 | il.reg(4, _reg_name(insn, "as")), 319 | il.reg(4, _reg_name(insn, "at")) 320 | ))) 321 | return insn.length 322 | 323 | def _lift_BEQZ_N(insn, addr, il): 324 | cond = il.compare_equal(4, il.reg(4, _reg_name(insn, "as")), il.const(4, 0)) 325 | return _lift_cond(cond, insn, addr, il) 326 | 327 | def _lift_AND(insn, addr, il): 328 | il.append( 329 | il.set_reg(4, _reg_name(insn, "ar"), 330 | il.and_expr(4, 331 | il.reg(4, _reg_name(insn, "as")), 332 | il.reg(4, _reg_name(insn, "at")) 333 | ))) 334 | return insn.length 335 | 336 | _lift_BEQZ = _lift_BEQZ_N 337 | 338 | def _lift_L16UI(insn, addr, il): 339 | va = il.add(4, 340 | il.reg(4, _reg_name(insn, "as")), 341 | il.const(4, insn.inline0(addr))) 342 | il.append( 343 | il.set_reg(4, _reg_name(insn, "at"), 344 | il.zero_extend(4, il.load(2, va)))) 345 | return insn.length 346 | 347 | def _lift_BNEZ(insn, addr, il): 348 | cond = il.compare_not_equal(4, 349 | il.reg(4, _reg_name(insn, "as")), 350 | il.const(4, 0)) 351 | return _lift_cond(cond, insn, addr, il) 352 | 353 | _lift_BNEZ_N = _lift_BNEZ 354 | 355 | def _lift_BEQZ(insn, addr, il): 356 | cond = il.compare_equal(4, 357 | il.reg(4, _reg_name(insn, "as")), 358 | il.const(4, 0)) 359 | return _lift_cond(cond, insn, addr, il) 360 | 361 | _lift_BEQZ_N = _lift_BEQZ 362 | 363 | def _lift_BNEI(insn, addr, il): 364 | cond = il.compare_not_equal(4, 365 | il.reg(4, _reg_name(insn, "as")), 366 | il.const(4, insn.b4const())) 367 | return _lift_cond(cond, insn, addr, il) 368 | 369 | def _lift_BEQI(insn, addr, il): 370 | cond = il.compare_equal(4, 371 | il.reg(4, _reg_name(insn, "as")), 372 | il.const(4, insn.b4const())) 373 | return _lift_cond(cond, insn, addr, il) 374 | 375 | def _lift_BALL(insn, addr, il): 376 | cond = il.compare_equal(4, 377 | il.and_expr(4, 378 | il.reg(4, _reg_name(insn, "at")), 379 | il.not_expr(4, il.reg(4, _reg_name(insn, "as"))) 380 | ), 381 | il.const(4, 0)) 382 | return _lift_cond(cond, insn, addr, il) 383 | 384 | def _lift_BNALL(insn, addr, il): 385 | cond = il.compare_not_equal(4, 386 | il.and_expr(4, 387 | il.reg(4, _reg_name(insn, "at")), 388 | il.not_expr(4, il.reg(4, _reg_name(insn, "as"))) 389 | ), 390 | il.const(4, 0)) 391 | return _lift_cond(cond, insn, addr, il) 392 | 393 | def _lift_BANY(insn, addr, il): 394 | cond = il.compare_not_equal(4, 395 | il.and_expr(4, 396 | il.reg(4, _reg_name(insn, "as")), 397 | il.reg(4, _reg_name(insn, "at")) 398 | ), 399 | il.const(4, 0)) 400 | return _lift_cond(cond, insn, addr, il) 401 | 402 | def _lift_BNONE(insn, addr, il): 403 | cond = il.compare_equal(4, 404 | il.and_expr(4, 405 | il.reg(4, _reg_name(insn, "as")), 406 | il.reg(4, _reg_name(insn, "at")) 407 | ), 408 | il.const(4, 0)) 409 | return _lift_cond(cond, insn, addr, il) 410 | 411 | def _lift_BBC(insn, addr, il): 412 | cond = il.compare_equal(4, 413 | il.test_bit(4, 414 | il.reg(4, _reg_name(insn, "as")), 415 | # Strictly speaking we're supposed to check the 416 | # low 5 bits of at. I don't really see the need 417 | # to clutter the UI with it 418 | 419 | # Also: TODO: figure out which way Binja numbers 420 | # the bits 421 | il.reg(4, _reg_name(insn, "at")) 422 | ), 423 | il.const(4, 0)) 424 | return _lift_cond(cond, insn, addr, il) 425 | 426 | def _lift_BBS(insn, addr, il): 427 | cond = il.test_bit(4, 428 | il.reg(4, _reg_name(insn, "as")), 429 | # Strictly speaking we're supposed to check the 430 | # low 5 bits of at. I don't really see the need 431 | # to clutter the UI with it 432 | il.reg(4, _reg_name(insn, "at"))) 433 | return _lift_cond(cond, insn, addr, il) 434 | 435 | def _lift_BBCI(insn, addr, il): 436 | cond = il.compare_equal(4, 437 | il.test_bit(4, 438 | il.reg(4, _reg_name(insn, "as")), 439 | # Also: TODO: figure out which way Binja numbers 440 | # the bits 441 | il.const(4, insn.inline0(addr)) 442 | ), 443 | il.const(4, 0)) 444 | return _lift_cond(cond, insn, addr, il) 445 | 446 | def _lift_BBSI(insn, addr, il): 447 | cond = il.test_bit(4, 448 | il.reg(4, _reg_name(insn, "as")), 449 | il.const(4, insn.inline0(addr))) 450 | return _lift_cond(cond, insn, addr, il) 451 | 452 | def _lift_BEQ(insn, addr, il): 453 | cond = il.compare_equal(4, 454 | il.reg(4, _reg_name(insn, "as")), 455 | il.reg(4, _reg_name(insn, "at"))) 456 | return _lift_cond(cond, insn, addr, il) 457 | 458 | def _lift_BNE(insn, addr, il): 459 | cond = il.compare_not_equal(4, 460 | il.reg(4, _reg_name(insn, "as")), 461 | il.reg(4, _reg_name(insn, "at"))) 462 | return _lift_cond(cond, insn, addr, il) 463 | 464 | def _lift_BGE(insn, addr, il): 465 | cond = il.compare_signed_greater_equal(4, 466 | il.reg(4, _reg_name(insn, "as")), 467 | il.reg(4, _reg_name(insn, "at")) 468 | ) 469 | return _lift_cond(cond, insn, addr, il) 470 | 471 | def _lift_BGEU(insn, addr, il): 472 | cond = il.compare_unsigned_greater_equal(4, 473 | il.reg(4, _reg_name(insn, "as")), 474 | il.reg(4, _reg_name(insn, "at")) 475 | ) 476 | return _lift_cond(cond, insn, addr, il) 477 | 478 | def _lift_BGEI(insn, addr, il): 479 | cond = il.compare_signed_greater_equal(4, 480 | il.reg(4, _reg_name(insn, "as")), 481 | il.const(4, insn.b4const()) 482 | ) 483 | return _lift_cond(cond, insn, addr, il) 484 | 485 | def _lift_BGEUI(insn, addr, il): 486 | cond = il.compare_unsigned_greater_equal(4, 487 | il.reg(4, _reg_name(insn, "as")), 488 | il.const(4, insn.b4constu()) 489 | ) 490 | return _lift_cond(cond, insn, addr, il) 491 | 492 | def _lift_BGEZ(insn, addr, il): 493 | cond = il.compare_signed_greater_equal(4, 494 | il.reg(4, _reg_name(insn, "as")), 495 | il.const(4, 0)) 496 | return _lift_cond(cond, insn, addr, il) 497 | 498 | def _lift_BLT(insn, addr, il): 499 | cond = il.compare_signed_less_than(4, 500 | il.reg(4, _reg_name(insn, "as")), 501 | il.reg(4, _reg_name(insn, "at")) 502 | ) 503 | return _lift_cond(cond, insn, addr, il) 504 | 505 | def _lift_BLTU(insn, addr, il): 506 | cond = il.compare_unsigned_less_than(4, 507 | il.reg(4, _reg_name(insn, "as")), 508 | il.reg(4, _reg_name(insn, "at")) 509 | ) 510 | return _lift_cond(cond, insn, addr, il) 511 | 512 | def _lift_BLTI(insn, addr, il): 513 | cond = il.compare_signed_less_than(4, 514 | il.reg(4, _reg_name(insn, "as")), 515 | il.const(4, insn.b4const()) 516 | ) 517 | return _lift_cond(cond, insn, addr, il) 518 | 519 | def _lift_BLTUI(insn, addr, il): 520 | cond = il.compare_unsigned_less_than(4, 521 | il.reg(4, _reg_name(insn, "as")), 522 | il.const(4, insn.b4constu()) 523 | ) 524 | return _lift_cond(cond, insn, addr, il) 525 | 526 | def _lift_BLTZ(insn, addr, il): 527 | cond = il.compare_signed_less_than(4, 528 | il.reg(4, _reg_name(insn, "as")), 529 | il.const(4, 0)) 530 | return _lift_cond(cond, insn, addr, il) 531 | 532 | def _lift_SUB(insn, addr, il): 533 | il.append( 534 | il.set_reg(4, _reg_name(insn, "ar"), 535 | il.sub(4, 536 | il.reg(4, _reg_name(insn, "as")), 537 | il.reg(4, _reg_name(insn, "at")) 538 | ))) 539 | return insn.length 540 | 541 | def _lift_ADD(insn, addr, il): 542 | il.append( 543 | il.set_reg(4, _reg_name(insn, "ar"), 544 | il.add(4, 545 | il.reg(4, _reg_name(insn, "as")), 546 | il.reg(4, _reg_name(insn, "at")) 547 | ))) 548 | return insn.length 549 | 550 | def _lift_XOR(insn, addr, il): 551 | il.append( 552 | il.set_reg(4, _reg_name(insn, "ar"), 553 | il.xor_expr(4, 554 | il.reg(4, _reg_name(insn, "as")), 555 | il.reg(4, _reg_name(insn, "at")) 556 | ))) 557 | return insn.length 558 | 559 | def _lift_S16I(insn, addr, il): 560 | va = il.add(4, 561 | il.reg(4, _reg_name(insn, "as")), 562 | il.const(4, insn.inline0(addr)) 563 | ) 564 | il.append( 565 | il.store(2, va, 566 | il.low_part(2, il.reg(4, _reg_name(insn, "at"))))) 567 | return insn.length 568 | 569 | def _lift_SRAI(insn, addr, il): 570 | il.append( 571 | il.set_reg(4, _reg_name(insn, "ar"), 572 | il.arith_shift_right(4, 573 | il.reg(4, _reg_name(insn, "at")), 574 | il.const(4, insn.inline0(addr))))) 575 | return insn.length 576 | 577 | def _lift_ADDX2(insn, addr, il): 578 | return _lift_addx(1, insn, addr, il) 579 | 580 | def _lift_ADDX4(insn, addr, il): 581 | return _lift_addx(2, insn, addr, il) 582 | 583 | def _lift_ADDX8(insn, addr, il): 584 | return _lift_addx(3, insn, addr, il) 585 | 586 | def _lift_SUBX2(insn, addr, il): 587 | return _lift_subx(1, insn, addr, il) 588 | 589 | def _lift_SUBX4(insn, addr, il): 590 | return _lift_subx(2, insn, addr, il) 591 | 592 | def _lift_SUBX8(insn, addr, il): 593 | return _lift_subx(3, insn, addr, il) 594 | 595 | def _lift_SRLI(insn, addr, il): 596 | il.append( 597 | il.set_reg(4, _reg_name(insn, "ar"), 598 | il.logical_shift_right(4, 599 | il.reg(4, _reg_name(insn, "at")), 600 | il.const(4, insn.s)))) 601 | return insn.length 602 | 603 | def _lift_ADDMI(insn, addr, il): 604 | constant = sign_extend(insn.imm8, 8) << 8 605 | il.append( 606 | il.set_reg(4, _reg_name(insn, "at"), 607 | il.add(4, 608 | il.reg(4, _reg_name(insn, "as")), 609 | il.const(4, constant)))) 610 | return insn.length 611 | 612 | def _lift_MULL(insn, addr, il): 613 | il.append( 614 | il.set_reg(4, _reg_name(insn, "ar"), 615 | il.mult(4, 616 | il.reg(4, _reg_name(insn, "as")), 617 | il.reg(4, _reg_name(insn, "at"))))) 618 | return insn.length 619 | 620 | def _lift_NEG(insn, addr, il): 621 | il.append( 622 | il.set_reg(4, _reg_name(insn, "ar"), 623 | il.neg_expr(4, il.reg(4, _reg_name(insn, "at"))))) 624 | return insn.length 625 | 626 | def _lift_SYSCALL(insn, addr, il): 627 | il.append(il.system_call()) 628 | return insn.length 629 | 630 | def _lift_MOVEQZ(insn, addr, il): 631 | cond = il.compare_equal(4, 632 | il.reg(4, _reg_name(insn, "at")), 633 | il.const(4, 0)) 634 | return _lift_cmov(cond, insn, addr, il) 635 | 636 | def _lift_MOVNEZ(insn, addr, il): 637 | cond = il.compare_not_equal(4, 638 | il.reg(4, _reg_name(insn, "at")), 639 | il.const(4, 0)) 640 | return _lift_cmov(cond, insn, addr, il) 641 | 642 | def _lift_MOVGEZ(insn, addr, il): 643 | cond = il.compare_signed_greater_equal(4, 644 | il.reg(4, _reg_name(insn, "at")), 645 | il.const(4, 0)) 646 | return _lift_cmov(cond, insn, addr, il) 647 | 648 | def _lift_MOVLTZ(insn, addr, il): 649 | cond = il.compare_signed_less_than(4, 650 | il.reg(4, _reg_name(insn, "at")), 651 | il.const(4, 0)) 652 | return _lift_cmov(cond, insn, addr, il) 653 | 654 | def _lift_SSL(insn, addr, il): 655 | il.append(il.set_reg(1, "sar", 656 | il.sub(1, 657 | il.const(1, 32), 658 | il.low_part(1, il.reg(4, _reg_name(insn, "as"))) 659 | ))) 660 | return insn.length 661 | 662 | def _lift_SSR(insn, addr, il): 663 | il.append(il.set_reg(1, "sar", 664 | il.low_part(1, il.reg(4, _reg_name(insn, "as"))))) 665 | return insn.length 666 | 667 | def _lift_SSAI(insn, addr, il): 668 | il.append(il.set_reg(1, "sar", 669 | il.const(1, insn.inline0(addr)))) 670 | return insn.length 671 | 672 | def _lift_SLL(insn, addr, il): 673 | il.append(il.set_reg(4, _reg_name(insn, "ar"), 674 | il.shift_left(4, 675 | il.reg(4, _reg_name(insn, "as")), 676 | il.reg(1, "sar")))) 677 | return insn.length 678 | 679 | def _lift_SRL(insn, addr, il): 680 | il.append(il.set_reg(4, _reg_name(insn, "ar"), 681 | il.logical_shift_right(4, 682 | il.reg(4, _reg_name(insn, "at")), 683 | il.reg(1, "sar")))) 684 | return insn.length 685 | 686 | def _lift_SRC(insn, addr, il): 687 | operand = il.reg_split(8, 688 | _reg_name(insn, "as"), 689 | _reg_name(insn, "at")) 690 | il.append(il.set_reg(4, _reg_name(insn, "ar"), 691 | il.low_part(4, 692 | il.logical_shift_right(8, 693 | operand, 694 | il.reg(1, "sar")) 695 | ))) 696 | return insn.length 697 | 698 | def _lift_SSA8L(insn, addr, il): 699 | il.append(il.set_reg(1, "sar", 700 | # Low part is not strictly correct... but good enough 701 | il.shift_left(1, 702 | il.low_part(1, il.reg(4, _reg_name(insn, "as"))), 703 | il.const(1, 3)))) 704 | return insn.length 705 | 706 | def _lift_SRA(insn, addr, il): 707 | il.append(il.set_reg(4, _reg_name(insn, "ar"), 708 | il.arith_shift_right(4, 709 | il.reg(4, _reg_name(insn, "at")), 710 | il.reg(1, "sar")))) 711 | return insn.length 712 | 713 | def _lift_ISYNC(insn, addr, il): 714 | il.append( 715 | il.intrinsic([], "isync", []) 716 | ) 717 | return insn.length 718 | 719 | def _lift_ILL(insn, addr, il): 720 | # TODO: pick a proper trap constant 721 | il.append(il.trap(0)) 722 | return insn.length 723 | 724 | def _lift_MUL16S(insn, addr, il): 725 | il.append( 726 | il.set_reg(4, _reg_name(insn, "ar"), 727 | il.mult(4, 728 | il.sign_extend(4, 729 | il.low_part(2, 730 | il.reg(4, _reg_name(insn, "as")))), 731 | il.sign_extend(4, 732 | il.low_part(2, 733 | il.reg(4, _reg_name(insn, "at")))) 734 | ))) 735 | return insn.length 736 | 737 | def _lift_MUL16U(insn, addr, il): 738 | il.append( 739 | il.set_reg(4, _reg_name(insn, "ar"), 740 | il.mult(4, 741 | il.zero_extend(4, 742 | il.low_part(2, 743 | il.reg(4, _reg_name(insn, "as")))), 744 | il.zero_extend(4, 745 | il.low_part(2, 746 | il.reg(4, _reg_name(insn, "at")))) 747 | ))) 748 | return insn.length 749 | 750 | def _lift_NOP(insn, addr, il): 751 | il.append(il.nop()) 752 | return insn.length 753 | 754 | _lift_NOP_N = _lift_NOP -------------------------------------------------------------------------------- /binja_xtensa/instruction.py: -------------------------------------------------------------------------------- 1 | """ 2 | Xtensa instruction decoder 3 | 4 | This was created in roughly 10 hours over the course of a weekend with the 5 | Xtensa manual in one window and Vim in the other. If you plan to make changes, I 6 | suggest looking at section 7.3.1 "Opcode Maps" in the Xtensa manual, as the code 7 | follows it directly (which explains the odd order of instructions). Overall, it 8 | near-exactly matches the manual, with the exception of a few simplifications 9 | involving instructions I didn't care about, and also fixing (<5) errors in the 10 | manual. 11 | 12 | The separation of concerns between instruction decoding, disassembly, and 13 | lifting is roughly as follows: anything that can be done without knowing the 14 | address is done as part of instruction decoding. There might be a couple places 15 | where I declare the computation with a lambda in decoding, which is called 16 | during disassembly with the address. Anyway, all the decoding are static 17 | methods. 18 | 19 | When I got to actual disassembly, I ran into a few issues where yes I had 20 | decoded the instruction per the type "RRR", "RRI8", etc, but the immediate was 21 | further encoded. In some cases (say, making a signed value from the imm8), I've 22 | added methods to the Instruction class that will do that transformation. In more 23 | instruction-specific cases, I added the ability to define a lambda inline that 24 | does the specified transformation to the immediate (say it's stored shifted 25 | right by a couple bits). In many cases, I've called it "inline0", then that is 26 | referenced by the "inline0" in the disassembly code, as well as in the lifting 27 | code. 28 | 29 | Actual instruction decoding starts in Instruction.decode. 30 | 31 | Link to the Xtensa docs/manual I was referencing: 32 | https://0x04.net/~mwk/doc/xtensa.pdf 33 | 34 | """ 35 | from enum import Enum 36 | 37 | 38 | # https://stackoverflow.com/a/32031543 39 | def sign_extend(value, bits): 40 | sign_bit = 1 << (bits - 1) 41 | return (value & (sign_bit - 1)) - (value & sign_bit) 42 | 43 | 44 | class InstructionType(Enum): 45 | RRR = 1 46 | RSR = 2 47 | CALLX = 3 48 | RRI4 = 4 49 | RRI8 = 5 50 | RI16 = 6 51 | CALL = 7 52 | BRI8 = 8 53 | BRI12 = 9 54 | RRRN = 10 55 | RI7 = 11 56 | RI6 = 12 57 | 58 | 59 | def mnem(_mnem, func, validity_predicate=None, **outer_kwargs): 60 | """Not public, just need the DSL to be prettier without _""" 61 | def inner(insn, *args, **kwargs): 62 | insn.mnem = _mnem 63 | getattr(Instruction, "_decode_fmt_" + func)( 64 | insn, *args, **kwargs 65 | ) 66 | if validity_predicate and not validity_predicate(insn): 67 | insn.valid = False 68 | else: 69 | insn.valid = True 70 | if outer_kwargs: 71 | for key, value in outer_kwargs.items(): 72 | if key.startswith("inline"): 73 | bound = value.__get__(insn, insn.__class__) 74 | setattr(insn, key, bound) 75 | return insn 76 | return inner 77 | 78 | 79 | def _decode_components(insn, insn_bytes, components): 80 | for comp in components: 81 | setattr(insn, comp, globals()["decode_" + comp](insn_bytes)) 82 | 83 | 84 | # lambdas to decode the various control signals 85 | decode_op0 = lambda insn_bytes: insn_bytes[0] & 0xf 86 | decode_op1 = lambda insn_bytes: (insn_bytes[2]) & 0xf 87 | decode_op2 = lambda insn_bytes: (insn_bytes[2] >> 4) & 0xf 88 | decode_t = lambda insn_bytes: (insn_bytes[0] >> 4) & 0xf 89 | decode_s = lambda insn_bytes: insn_bytes[1] & 0xf 90 | decode_r = lambda insn_bytes: (insn_bytes[1] >> 4) & 0xf 91 | decode_n = lambda insn_bytes: (insn_bytes[0] >> 4) & 3 92 | decode_m = lambda insn_bytes: (insn_bytes[0] >> 6) & 3 93 | decode_sr = lambda insn_bytes: insn_bytes[1] 94 | decode_imm4 = lambda insn_bytes: (insn_bytes[2] >> 4) & 0xf 95 | decode_imm8 = lambda insn_bytes: insn_bytes[2] 96 | decode_imm12 = lambda insn_bytes: (insn_bytes[2] << 4) + ((insn_bytes[1] >> 4) & 0xf) 97 | decode_imm16 = lambda insn_bytes: (insn_bytes[2] << 8) + insn_bytes[1] 98 | decode_imm7 = lambda insn_bytes: (((insn_bytes[0] >> 4) & 0b111) << 4) + ((insn_bytes[1] >> 4) & 0xf) 99 | decode_imm6 = lambda insn_bytes: (((insn_bytes[0] >> 4) & 0b11) << 4) + ((insn_bytes[1] >> 4) & 0xf) 100 | decode_offset = lambda insn_bytes: ( 101 | (insn_bytes[2] << 10) + 102 | (insn_bytes[1] << 2) + 103 | ((insn_bytes[0] >> 6) & 0b11) 104 | ) 105 | decode_i = lambda insn_bytes: (insn_bytes[0] >> 7) & 1 106 | decode_z = lambda insn_bytes: (insn_bytes[0] >> 6) & 1 107 | 108 | 109 | class Instruction: 110 | 111 | # Instruction class starts with a bunch of utility methods. For the actual 112 | # decoding, see the "decode" classmethod. 113 | def __init__(self): 114 | self.op0 = None 115 | self.op1 = None 116 | self.op2 = None 117 | self.r = None 118 | self.s = None 119 | self.sr = None 120 | self.t = None 121 | self.n = None 122 | self.m = None 123 | self.i = None 124 | self.z = None 125 | self.imm4 = None 126 | self.imm6 = None 127 | self.imm7 = None 128 | self.imm8 = None 129 | self.imm12 = None 130 | self.imm16 = None 131 | self.offset = None 132 | self.length = None 133 | self.valid = None 134 | self.instruction_type = None 135 | 136 | # These are simple transformations done to immediate values and such. 137 | # Usually based on a line in the docs that say "the assembler will do such 138 | # and such to the immediate" 139 | def extui_shiftimm(self): 140 | if self.mnem != "EXTUI": 141 | return None 142 | return ((self.op1 & 1) << 4) + self.s 143 | 144 | def simm6(self): 145 | if self.imm6 is None: 146 | return None 147 | return sign_extend(self.imm6, 8) 148 | 149 | def simm8(self): 150 | if self.imm8 is None: 151 | return None 152 | return sign_extend(self.imm8, 8) 153 | 154 | def simm12(self): 155 | if self.imm12 is None: 156 | return None 157 | return sign_extend(self.imm12, 12) 158 | 159 | def rotw_simm4(self): 160 | """Parse immediate for the ROTW instruction 161 | 162 | The ROTW instruction has a signed imm4 in the "t" slot. 163 | """ 164 | if self.t is None: 165 | return None 166 | return sign_extend(self.t, 4) 167 | 168 | # For PC-relative instructions, we need the address to compute the 169 | # "target_offset". In non-branching cases, I've tried to instead call it a 170 | # "mem_offset" (although I suspect I missed a couple). 171 | def offset_imm6(self, addr): 172 | return addr + 4 + self.imm6 173 | 174 | def offset_simm6(self, addr): 175 | return addr + 4 + self.simm6() 176 | 177 | def offset_simm8(self, addr): 178 | return addr + 4 + self.simm8() 179 | 180 | def offset_simm12(self, addr): 181 | return addr + 4 + self.simm12() 182 | 183 | def offset_call(self, addr): 184 | return (addr & 0xfffffffc) + (sign_extend(self.offset, 18) << 2) + 4 185 | 186 | def offset_j(self, addr): 187 | return addr + 4 + sign_extend(self.offset, 18) 188 | 189 | _target_offset_map = { 190 | "BALL": "offset_simm8", 191 | "BANY": "offset_simm8", 192 | "BBC": "offset_simm8", 193 | "BBCI": "offset_simm8", 194 | "BBS": "offset_simm8", 195 | "BBSI": "offset_simm8", 196 | "BEQ": "offset_simm8", 197 | "BEQI": "offset_simm8", 198 | "BEQZ": "offset_simm12", 199 | "BEQZ_N": "offset_imm6", 200 | "BF": "offset_simm8", 201 | "BGE": "offset_simm8", 202 | "BGEI": "offset_simm8", 203 | "BGEU": "offset_simm8", 204 | "BGEUI": "offset_simm8", 205 | "BGEZ": "offset_simm12", 206 | "BLT": "offset_simm8", 207 | "BLTI": "offset_simm8", 208 | "BLTU": "offset_simm8", 209 | "BLTUI": "offset_simm8", 210 | "BLTZ": "offset_simm12", 211 | "BNALL": "offset_simm8", 212 | "BNE": "offset_simm8", 213 | "BNEI": "offset_simm8", 214 | "BNEZ": "offset_simm12", 215 | "BNEZ_N": "offset_imm6", 216 | "BNONE": "offset_simm8", 217 | "BT": "offset_simm8", 218 | "CALL0": "offset_call", 219 | "CALL4": "offset_call", 220 | "CALL8": "offset_call", 221 | "CALL12": "offset_call", 222 | "J": "offset_j", 223 | } 224 | def target_offset(self, addr): 225 | try: 226 | mapped = self._target_offset_map[self.mnem.replace(".", "_")] 227 | except KeyError: 228 | return None 229 | func = getattr(self, mapped, None) 230 | if not func: 231 | raise Exception(f"Invalid handler for insn {self.mnem} in _target_offset_map") 232 | return func(addr) 233 | 234 | def offset_l32r(self, addr): 235 | enc = sign_extend(self.imm16 | 0xFFFF0000, 32) << 2 236 | return (enc + addr + 3) & 0xFFFFFFFC 237 | 238 | # mem_offset is roughly the same as target_offset, but for data accesses and 239 | # not jumps 240 | _mem_offset_map = { 241 | "L32R": "offset_l32r", 242 | } 243 | def mem_offset(self, addr): 244 | try: 245 | mapped = self._mem_offset_map[self.mnem.replace(".", "_")] 246 | except KeyError: 247 | return None 248 | func = getattr(self, mapped, None) 249 | if not func: 250 | raise Exception(f"Invalid handler for insn {self.mnem} in _mem_offset_map") 251 | return func(addr) 252 | 253 | # In a few places, an immediate is an index into these lookup tables. The 254 | # RTN in the docs calls it "B4CONST", so I do too. 255 | _b4const_vals = [ 256 | -1, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 32, 64, 128, 256, 257 | ] 258 | _b4constu_vals = [ 259 | 32768, 65536, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 32, 64, 128, 256, 260 | ] 261 | _b4const_map = { 262 | "BEQI": "r", 263 | "BGEI": "r", 264 | "BLTI": "r", 265 | "BNEI": "r", 266 | } 267 | _b4constu_map = { 268 | "BGEUI": "r", 269 | "BLTUI": "r", 270 | } 271 | def b4const(self): 272 | try: 273 | comp = self._b4const_map[self.mnem] 274 | except KeyError: 275 | raise 276 | 277 | enc = getattr(self, comp) 278 | return self._b4const_vals[enc] 279 | 280 | def b4constu(self): 281 | try: 282 | comp = self._b4constu_map[self.mnem] 283 | except KeyError: 284 | raise 285 | 286 | enc = getattr(self, comp) 287 | return self._b4constu_vals[enc] 288 | 289 | # Table 5-128 Numerical List of Special Registers 290 | # This allows us to render "RSR.REGNAME" versus RSR at, 291 | _special_reg_map = { 292 | 0: "LBEG", 293 | 1: "LEND", 294 | 2: "LCOUNT", 295 | 3: "SAR", 296 | 4: "BR", 297 | 5: "LITBASE", 298 | 12: "SCOMPARE1", 299 | 16: "ACCLO", 300 | 17: "ACCHI", 301 | 32: "M0", 302 | 33: "M1", 303 | 34: "M2", 304 | 35: "M3", 305 | 72: "WindowBase", 306 | 73: "WindowStart", 307 | 83: "PTEVADDR", 308 | 89: "MMID", 309 | 90: "RASID", 310 | 91: "ITLBCFG", 311 | 92: "DTLBCFG", 312 | 96: "IBREAKENABLE", 313 | 98: "CACHEATTR", 314 | 99: "ATOMCTL", 315 | 104: "DDR", 316 | 106: "MEPC", 317 | 107: "MEPS", 318 | 108: "MESAVE", 319 | 109: "MESR", 320 | 110: "MECR", 321 | 111: "MEVADDR", 322 | 128: "IBREAKA0", 323 | 129: "IBREAKA1", 324 | 144: "DBREAKA0", 325 | 145: "DBREAKA1", 326 | 160: "DBREAKC0", 327 | 161: "DBREAKC1", 328 | 177: "EPC1", 329 | 178: "EPC2", 330 | 179: "EPC3", 331 | 180: "EPC4", 332 | 181: "EPC5", 333 | 182: "EPC6", 334 | 183: "EPC7", 335 | 192: "DEPC", 336 | 194: "EPS2", 337 | 195: "EPS3", 338 | 196: "EPS4", 339 | 197: "EPS5", 340 | 198: "EPS6", 341 | 199: "EPS7", 342 | 209: "EXCSAVE1", 343 | 210: "EXCSAVE2", 344 | 211: "EXCSAVE3", 345 | 212: "EXCSAVE4", 346 | 213: "EXCSAVE5", 347 | 214: "EXCSAVE6", 348 | 215: "EXCSAVE7", 349 | 224: "CPENABLE", 350 | 226: "INTERRUPT", # Also known as INTSET 351 | 227: "INTCLEAR", 352 | 228: "INTENABLE", 353 | 230: "PS", 354 | 231: "VECBASE", 355 | 232: "EXCCAUSE", 356 | 233: "DEBUGCAUSE", 357 | 234: "CCOUNT", 358 | 235: "PRID", 359 | 236: "ICOUNT", 360 | 237: "ICOUNTLEVEL", 361 | 238: "EXCVADDR", 362 | 240: "CCOMPARE0", 363 | 241: "CCOMPARE1", 364 | 242: "CCOMPARE2", 365 | 244: "MISC0", 366 | 245: "MISC1", 367 | 246: "MISC2", 368 | 247: "MISC3", 369 | } 370 | 371 | def get_sr_name(self): 372 | if self.mnem not in ["RSR", "WSR", "XSR"]: 373 | return None 374 | try: 375 | return self._special_reg_map[self.sr] 376 | except KeyError: 377 | return str(self.sr) 378 | 379 | # For instruction decoding, we follow the tables in xtensa.pdf 380 | # (7.3.1 Opcode Maps) 381 | # We begin with Table 7-192 Whole Opcode Space. This switches off op0 to 382 | # subtables, which we then filter through to sub-sub-tables, etc. 10 hours 383 | # later, we made it to the bottom :) 384 | _op0_map = [ 385 | "QRST", "L32R", "LSAI", "LSCI", 386 | "MAC16", "CALLN", "SI", "B", 387 | "L32I_N", "S32I_N", "ADD_N", "ADDI_N", 388 | "ST2", "ST3", None, None, # None is reserved 389 | ] 390 | @classmethod 391 | def decode(cls, insn_bytes): 392 | insn = Instruction() 393 | return cls._do_tbl_layer(insn, insn_bytes, "op0", cls._op0_map) 394 | 395 | # At each "layer" of the tables, we look up some control signal. In this 396 | # case, it was op0. op0 has 4 bits for a 16 entry table. We can do one of 397 | # two things: a sub-table or a leaf (instruction). By the magic of Python 398 | # metaprogramming, we lookup the classmethod _decode_, which we 399 | # implement either as a function for a table layer, or we use the mnem 400 | # helper to indicate it's a leaf function. 401 | 402 | # These are the actual instructions found in the first table. Arguments to 403 | # mnem are mnemonic, instruction type, an optional predicate specifying if 404 | # the encoding is valid (for when the manual says t must be 0 or something), 405 | # and then "inline" kwargs that end up defining methods for disassembly and 406 | # lifting to use. 407 | _decode_L32R = mnem("L32R", "RI16") # op0, t, imm16 408 | _decode_L32I_N = mnem("L32I.N", "RRRN", 409 | inline0=lambda insn, _: insn.r << 2) 410 | _decode_S32I_N = mnem("S32I.N", "RRRN", 411 | inline0=lambda insn, _: insn.r << 2) 412 | _decode_ADD_N = mnem("ADD.N", "RRRN") 413 | _decode_ADDI_N = mnem("ADDI.N", "RRRN", 414 | inline0=lambda insn, _: insn.t if insn.t != 0 else -1) 415 | 416 | 417 | # The next three functions implement the metaprogramming glue between layers 418 | @classmethod 419 | def _do_tbl_layer(cls, insn, insn_bytes, component, map): 420 | """Do the lookups for one table layer. 421 | 422 | component is the string to decode, like "op1", or "r". 423 | map is the map to look up in 424 | """ 425 | return cls._do_lut(insn, insn_bytes, 426 | [(component, globals()["decode_" + component])], 427 | component, 428 | map) 429 | 430 | @classmethod 431 | def _do_lut(cls, 432 | insn, 433 | insn_bytes, 434 | lookup_map, 435 | value_to_look_up, 436 | table_to_look_in, 437 | ): 438 | """Do an iteration of table-lookups 439 | 440 | Tensilica has a bunch of tables that define the instruction encoding. 441 | We decode them a layer at a time, dispatching to relevant handlers at 442 | each level. By the time we're done, we should have the whole instruction 443 | decoded. 444 | 445 | At each layer, we read one or more values out of the insn bytes and 446 | assign it to the decoded properties. We then grab a value from the table 447 | using one of those values and call the next layer 448 | 449 | Params: 450 | insn (Instruction): the instruction object to fill in 451 | insn_bytes (bytes): the instruction bytes we're decoding 452 | 453 | lookup_map (List[Tuple]): list of tuples of 454 | (decoded_name, function_to_decode). The function will receive 455 | insn_bytes as a param and should return a numeric value. 456 | 457 | value_to_look_up (string): One of the decoded_name values from the 458 | previous param 459 | 460 | table_to_look_in (List): The table to look in (access as cls._name 461 | and pass that in) 462 | 463 | """ 464 | for (decoded_name, function_to_decode) in lookup_map: 465 | try: 466 | getattr(insn, decoded_name) 467 | except AttributeError: 468 | raise 469 | setattr(insn, decoded_name, function_to_decode(insn_bytes)) 470 | 471 | value = getattr(insn, value_to_look_up) 472 | return cls._call_from_map(table_to_look_in, value, insn, insn_bytes) 473 | 474 | @staticmethod 475 | def _call_from_map(map, index, insn, insn_bytes): 476 | """Part of the operation of _do_lut, see there for comments""" 477 | try: 478 | name = "_decode_" + map[index] 479 | except IndexError: 480 | raise Exception(f"Unsupported index {index} in map {map}") 481 | 482 | func = getattr(Instruction, name, None) 483 | if not func: 484 | raise Exception(f"Unimplemented: {name}") 485 | 486 | return func(insn, insn_bytes) 487 | 488 | # From here down, it's a pretty mechanical translation of the Xtensa docs 489 | 490 | _qrst_map = [ 491 | "RST0", "RST1", "RST2", "RST3", 492 | "EXTUI", "EXTUI", "CUST0", "CUST1", 493 | "LSCX", "LSC4", "FP0", "FP1", 494 | None, None, None, None, 495 | ] 496 | @classmethod 497 | def _decode_QRST(cls, insn, insn_bytes): 498 | # Formats RRR, CALLX, RSR (t, s, r, op2 vary) 499 | # That means op1 is the commonality we'll map off of 500 | return cls._do_tbl_layer(insn, insn_bytes, "op1", cls._qrst_map) 501 | 502 | _decode_EXTUI = mnem("EXTUI", 503 | "RRR", # RRR is dubious for this... it's complex 504 | # IIRC inline0 ended up being named something else but 505 | # I didn't want to reuse the number 506 | inline1=lambda insn, _: insn.op2 + 1 507 | ) 508 | 509 | _rst0_map = [ 510 | "ST0", "AND", "OR", "XOR", 511 | "ST1", "TLB", "RT0", None, # None is reserved 512 | "ADD", "ADDX2", "ADDX4", "ADDX8", 513 | "SUB", "SUBX2", "SUBX4", "SUBX8", 514 | ] 515 | @classmethod 516 | def _decode_RST0(cls, insn, insn_bytes): 517 | # Formats RRR and CALLX (t, s, r vary) 518 | # That means op2 is the commonality we'll map off of 519 | return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._rst0_map) 520 | 521 | _decode_AND = mnem("AND", "RRR") 522 | _decode_OR = mnem("OR", "RRR") 523 | _decode_XOR = mnem("XOR", "RRR") 524 | _decode_ADD = mnem("ADD", "RRR") 525 | _decode_ADDX2 = mnem("ADDX2", "RRR") 526 | _decode_ADDX4 = mnem("ADDX4", "RRR") 527 | _decode_ADDX8 = mnem("ADDX8", "RRR") 528 | _decode_SUB = mnem("SUB", "RRR") 529 | _decode_SUBX2 = mnem("SUBX2", "RRR") 530 | _decode_SUBX4 = mnem("SUBX4", "RRR") 531 | _decode_SUBX8 = mnem("SUBX8", "RRR") 532 | 533 | _st0_map = [ 534 | "SNM0", "MOVSP", "SYNC", "RFEI", 535 | "BREAK", "SYSCALL", "RSIL", "WAITI", 536 | "ANY4", "ALL4", "ANY8", "ALL8", 537 | None, None, None, None, # these are reserved 538 | ] 539 | @classmethod 540 | def _decode_ST0(cls, insn, insn_bytes): 541 | # Formats RRR and CALLX 542 | return cls._do_tbl_layer(insn, insn_bytes, "r", cls._st0_map) 543 | 544 | _decode_MOVSP = mnem("MOVSP", "RRR") 545 | _decode_BREAK = mnem("BREAK", "RRR") 546 | _decode_SYSCALL = mnem("SYSCALL", "RRR", lambda insn: insn.s == 0 and insn.t == 0) 547 | _decode_RSIL = mnem("RSIL", "RRR") 548 | _decode_WAITI = mnem("WAITI", "RRR", lambda insn: insn.t == 0) 549 | _decode_ANY4 = mnem("ANY4", "RRR") 550 | _decode_ALL4 = mnem("ALL4", "RRR") 551 | _decode_ANY8 = mnem("ANY8", "RRR") 552 | _decode_ALL8 = mnem("ALL8", "RRR") 553 | 554 | _snm0_map = [ 555 | "ILL", None, "JR", "CALLX", # None is reserved 556 | ] 557 | @classmethod 558 | def _decode_SNM0(cls, insn, insn_bytes): 559 | # Format CALLX (n, s vary) 560 | return cls._do_tbl_layer(insn, insn_bytes, "m", cls._snm0_map) 561 | 562 | _decode_ILL = mnem("ILL", "CALLX", lambda insn: insn.s == 0 and insn.n == 0) 563 | 564 | _jr_map = [ 565 | "RET", "RETW", "JX", None, # None is reserved 566 | ] 567 | @classmethod 568 | def _decode_JR(cls, insn, insn_bytes): 569 | # Format CALLX (s varies) 570 | return cls._do_tbl_layer(insn, insn_bytes, "n", cls._jr_map) 571 | 572 | _decode_RET = mnem("RET", "CALLX", lambda insn: insn.s == 0) 573 | _decode_RETW = mnem("RETW", "CALLX", lambda insn: insn.s == 0) 574 | _decode_JX = mnem("JX", "CALLX") 575 | 576 | _callx_map = [ 577 | "CALLX0", "CALLX4", "CALLX8", "CALLX12", 578 | ] 579 | @classmethod 580 | def _decode_CALLX(cls, insn, insn_bytes): 581 | # Format CALLX (s varies) 582 | return cls._do_tbl_layer(insn, insn_bytes, "n", cls._callx_map) 583 | 584 | _decode_CALLX0 = mnem("CALLX0", "CALLX") 585 | _decode_CALLX4 = mnem("CALLX4", "CALLX") 586 | _decode_CALLX8 = mnem("CALLX8", "CALLX") 587 | _decode_CALLX12 = mnem("CALLX12", "CALLX") 588 | 589 | # SYNC 590 | _sync_map = [ 591 | "ISYNC", "RSYNC", "ESYNC", "DSYNC", 592 | None, None, None, None, # None is reserved 593 | "EXCW", None, None, None, 594 | "MEMW", "EXTW", None, "NOP", 595 | # The manual doesn't show NOP here, but the NOP encoding it shows 596 | # _should_ go here, and objdump disassembles it as "nop" 597 | ] 598 | @classmethod 599 | def _decode_SYNC(cls, insn, insn_bytes): 600 | # Format RRR (s varies) 601 | return cls._do_tbl_layer(insn, insn_bytes, "t", cls._sync_map) 602 | 603 | _decode_ISYNC = mnem("ISYNC", "RRR", lambda insn: insn.s == 0) 604 | _decode_RSYNC = mnem("RSYNC", "RRR", lambda insn: insn.s == 0) 605 | _decode_ESYNC = mnem("ESYNC", "RRR", lambda insn: insn.s == 0) 606 | _decode_DSYNC = mnem("DSYNC", "RRR", lambda insn: insn.s == 0) 607 | _decode_EXCW = mnem("EXCW", "RRR", lambda insn: insn.s == 0) 608 | _decode_MEMW = mnem("MEMW", "RRR", lambda insn: insn.s == 0) 609 | _decode_EXTW = mnem("EXTW", "RRR", lambda insn: insn.s == 0) 610 | _decode_NOP = mnem("NOP", "RRR", lambda insn: insn.s == 0) 611 | 612 | _rfei_map = [ 613 | "RFET", "RFI", "RFME", None, # None is reserved 614 | None, None, None, None, 615 | None, None, None, None, 616 | None, None, None, None, 617 | ] 618 | @classmethod 619 | def _decode_RFEI(cls, insn, insn_bytes): 620 | # Format RRR (s varies) 621 | return cls._do_tbl_layer(insn, insn_bytes, "t", cls._rfei_map) 622 | 623 | _decode_RFI = mnem("RFI", "RRR") 624 | _decode_RFME = mnem("RFME", "RRR", lambda insn: insn.s == 0) 625 | 626 | _rfet_map = [ 627 | "RFE", "RFUI", "RFDE", None, # None is reserved 628 | "RFWO", "RFWU", None, None, 629 | None, None, None, None, 630 | None, None, None, None, 631 | ] 632 | @classmethod 633 | def _decode_RFET(cls, insn, insn_bytes): 634 | # Format RRR (no bits vary) 635 | return cls._do_tbl_layer(insn, insn_bytes, "s", cls._rfet_map) 636 | 637 | _decode_RFE = mnem("RFE", "RRR") 638 | _decode_RFUI = mnem("RFUI", "RRR") 639 | _decode_RFDE = mnem("RFDE", "RRR") 640 | _decode_RFWO = mnem("RFWO", "RRR") 641 | _decode_RFWU = mnem("RFWU", "RRR") 642 | 643 | _st1_map = [ 644 | "SSR", "SSL", "SSA8L", "SSA8B", 645 | "SSAI", None, "RER", "WER", # None is reserved 646 | "ROTW", None, None, None, # None is reserved 647 | None, None, "NSA", "NSAU", 648 | ] 649 | @classmethod 650 | def _decode_ST1(cls, insn, insn_bytes): 651 | # Format RRR (t, s vary) 652 | return cls._do_tbl_layer(insn, insn_bytes, "r", cls._st1_map) 653 | 654 | _decode_SSR = mnem("SSR", "RRR", lambda insn: insn.t == 0) 655 | _decode_SSL = mnem("SSL", "RRR", lambda insn: insn.t == 0) 656 | _decode_SSA8L = mnem("SSA8L", "RRR", lambda insn: insn.t == 0) 657 | _decode_SSA8B = mnem("SSA8B", "RRR", lambda insn: insn.t == 0) 658 | _decode_SSAI = mnem("SSAI", "RRR", lambda insn: insn.t == 0, 659 | inline0=lambda insn, _: insn.s + ((insn.t & 1) << 4) ) 660 | _decode_RER = mnem("RER", "RRR") 661 | _decode_WER = mnem("WER", "RRR") 662 | _decode_ROTW = mnem("ROTW", "RRR", lambda insn: insn.s == 0) 663 | _decode_NSA = mnem("NSA", "RRR") 664 | _decode_NSAU = mnem("NSAU", "RRR") 665 | 666 | _tlb_map = [ 667 | None, None, None, "RITLB0", # None is reserved 668 | "IITLB", "PITLB", "WITLB", "RITLB1", 669 | None, None, None, "RDTLB0", 670 | "IDTLB", "PDTLB", "WDTLB", "RDTLB1", 671 | ] 672 | @classmethod 673 | def _decode_TLB(cls, insn, insn_bytes): 674 | # Format RRR (t, s vary) 675 | return cls._do_tbl_layer(insn, insn_bytes, "r", cls._tlb_map) 676 | 677 | _decode_RITLB0 = mnem("RITLB0", "RRR") 678 | _decode_IITLB = mnem("IITLB", "RRR", lambda insn: insn.t == 0) 679 | _decode_PITLB = mnem("PITLB", "RRR") 680 | _decode_WITLB = mnem("WITLB", "RRR") 681 | _decode_RITLB1 = mnem("RITLB1", "RRR") 682 | _decode_RDTLB0 = mnem("RDTLB0", "RRR") 683 | _decode_IDTLB = mnem("IDTLB", "RRR", lambda insn: insn.t == 0) 684 | _decode_PDTLB = mnem("PDTLB", "RRR") 685 | _decode_WDTLB = mnem("WDTLB", "RRR") 686 | _decode_RDTLB1 = mnem("RDTLB1", "RRR") 687 | 688 | _rt0_map = [ 689 | "NEG", "ABS", None, None, 690 | None, None, None, None, 691 | None, None, None, None, 692 | None, None, None, None, 693 | ] 694 | @classmethod 695 | def _decode_RT0(cls, insn, insn_bytes): 696 | # Format RRR (t, r vary) 697 | return cls._do_tbl_layer(insn, insn_bytes, "s", cls._rt0_map) 698 | 699 | _decode_NEG = mnem("NEG", "RRR") 700 | _decode_ABS = mnem("ABS", "RRR") 701 | 702 | _rst1_map = [ 703 | "SLLI", "SLLI", "SRAI", "SRAI", 704 | "SRLI", None, "XSR", "ACCER", 705 | "SRC", "SRL", "SLL", "SRA", 706 | "MUL16U", "MUL16S", None, "IMP" 707 | ] 708 | @classmethod 709 | def _decode_RST1(cls, insn, insn_bytes): 710 | # Format RRR (t, s, r vary) 711 | return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._rst1_map) 712 | 713 | _decode_SLLI = mnem("SLLI", "RRR", 714 | inline0=lambda insn, _: 32 - ( insn.t + ((insn.op2 & 1) << 4) )) 715 | _decode_SRAI = mnem("SRAI", "RRR", 716 | inline0=lambda insn, _: insn.s + ((insn.op2 & 1) << 4)) 717 | _decode_SRLI = mnem("SRLI", "RRR") 718 | _decode_XSR = mnem("XSR", "RSR") 719 | _decode_SRC = mnem("SRC", "RRR") 720 | _decode_SRL = mnem("SRL", "RRR", lambda insn: insn.s == 0) 721 | _decode_SLL = mnem("SLL", "RRR", lambda insn: insn.t == 0) 722 | _decode_SRA = mnem("SRA", "RRR", lambda insn: insn.s == 0) 723 | _decode_MUL16U = mnem("MUL16U", "RRR") 724 | _decode_MUL16S = mnem("MUL16S", "RRR") 725 | 726 | _accer_map = [ 727 | None, None, None, None, 728 | None, None, "RER", "WER", 729 | None, None, None, None, 730 | None, None, None, None, 731 | ] 732 | @classmethod 733 | def _decode_ACCER(cls, insn, insn_bytes): 734 | # Format RRR (t, s vary) 735 | # There's a bug in the manual here: it says to filter on op2, however we 736 | # filtered on op2 to get here. Inspection suggests that we should in 737 | # fact filter on the following values for r: 738 | # RER = 0110 739 | # WER = 0111 740 | return cls._do_tbl_layer(insn, insn_bytes, "r", cls._accer_map) 741 | 742 | _decode_RER = mnem("RER", "RRR") 743 | _decode_WER = mnem("WER", "RRR") 744 | 745 | _imp_map = [ 746 | "LICT", "SICT", "LICW", "SICW", 747 | None, None, None, None, # None is reserved 748 | "LDCT", "SDCT", None, None, 749 | None, None, "RFDX", None, 750 | ] 751 | @classmethod 752 | def _decode_IMP(cls, insn, insn_bytes): 753 | # Format RRR (t, s vary) 754 | return cls._do_tbl_layer(insn, insn_bytes, "r", cls._imp_map) 755 | 756 | _decode_LICT = mnem("LICT", "RRR") 757 | _decode_SICT = mnem("SICT", "RRR") 758 | _decode_LICW = mnem("LICW", "RRR") 759 | _decode_SICW = mnem("SICW", "RRR") 760 | _decode_LDCT = mnem("LDCT", "RRR") 761 | _decode_SDCT = mnem("SDCT", "RRR") 762 | 763 | _rfdx_map = [ 764 | "RFDO", "RFDD", None, None, # None is reserved 765 | None, None, None, None, 766 | None, None, None, None, 767 | None, None, None, None, 768 | ] 769 | @classmethod 770 | def _decode_RFDX(cls, insn, insn_bytes): 771 | # Format RRR (s varies) 772 | return cls._do_tbl_layer(insn, insn_bytes, "t", cls._rfdx_map) 773 | 774 | _decode_RFDO = mnem("RFDO", "RRR", lambda insn: insn.s == 0) 775 | _decode_RFDD = mnem("RFDD", "RRR", lambda insn: insn.s in [0, 1]) 776 | 777 | _rst2_map = [ 778 | "ANDB", "ANDBC", "ORB", "ORBC", 779 | "XORB", None, None, None, 780 | "MULL", None, "MULUH", "MULSH", 781 | "QUOU", "QUOS", "REMU", "REMS", 782 | ] 783 | @classmethod 784 | def _decode_RST2(cls, insn, insn_bytes): 785 | # Format RRR (t, s, r vary) 786 | return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._rst2_map) 787 | 788 | _decode_ANDB = mnem("ANDB", "RRR") 789 | _decode_ANDBC = mnem("ANDBC", "RRR") 790 | _decode_ORB = mnem("ORB", "RRR") 791 | _decode_ORBC = mnem("ORBC", "RRR") 792 | _decode_XORB = mnem("XORB", "RRR") 793 | _decode_MULL = mnem("MULL", "RRR") 794 | _decode_MULUH = mnem("MULUH", "RRR") 795 | _decode_MULSH = mnem("MULSH", "RRR") 796 | _decode_QUOU = mnem("QUOU", "RRR") 797 | _decode_QUOS = mnem("QUOS", "RRR") 798 | _decode_REMU = mnem("REMU", "RRR") 799 | _decode_REMS = mnem("REMS", "RRR") 800 | 801 | _rst3_map = [ 802 | "RSR", "WSR", "SEXT", "CLAMPS", 803 | "MIN", "MAX", "MINU", "MAXU", 804 | "MOVEQZ", "MOVNEZ", "MOVLTZ", "MOVGEZ", 805 | "MOVF", "MOVT", "RUR", "WUR", 806 | ] 807 | @classmethod 808 | def _decode_RST3(cls, insn, insn_bytes): 809 | # Formats RRR and RSR (t, s, r vary) 810 | return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._rst3_map) 811 | 812 | _decode_RSR = mnem("RSR", "RSR") 813 | _decode_WSR = mnem("WSR", "RSR") 814 | _decode_SEXT = mnem("SEXT", "RRR") 815 | _decode_CLAMPS = mnem("CLAMPS", "RRR") 816 | _decode_MIN = mnem("MIN", "RRR") 817 | _decode_MAX = mnem("MAX", "RRR") 818 | _decode_MINU = mnem("MINU", "RRR") 819 | _decode_MAXU = mnem("MAXU", "RRR") 820 | _decode_MOVEQZ = mnem("MOVEQZ", "RRR") 821 | _decode_MOVNEZ = mnem("MOVNEZ", "RRR") 822 | _decode_MOVLTZ = mnem("MOVLTZ", "RRR") 823 | _decode_MOVGEZ = mnem("MOVGEZ", "RRR") 824 | _decode_MOVF = mnem("MOVF", "RRR") 825 | _decode_MOVT = mnem("MOVT", "RRR") 826 | _decode_RUR = mnem("RUR", "RRR") # lol, could probably treat as RSR 827 | _decode_WUR = mnem("WUR", "RSR") 828 | 829 | _lscx_map = [ 830 | "LSX", "LSXU", None, None, # None is reserved 831 | "SSX", "SSXU", None, None, 832 | None, None, None, None, 833 | None, None, None, None, 834 | ] 835 | @classmethod 836 | def _decode_LSCX(cls, insn, insn_bytes): 837 | # Format RRR (t, s, r vary) 838 | return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._lscx_map) 839 | 840 | _decode_LSX = mnem("LSX", "RRR") 841 | _decode_LSXU = mnem("LSXU", "RRR") 842 | _decode_SSX = mnem("SSX", "RRR") 843 | _decode_SSXU = mnem("SSXU", "RRR") 844 | 845 | _lsc4_map = [ 846 | "L32E", None, None, None, 847 | "S32E", None, None, None, 848 | None, None, None, None, 849 | None, None, None, None, 850 | ] 851 | @classmethod 852 | def _decode_LSC4(cls, insn, insn_bytes): 853 | # Format RRI4 (t, s, r vary) 854 | return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._lsc4_map) 855 | 856 | _decode_L32E = mnem("L32E", "RRI4") 857 | _decode_S32E = mnem("S32E", "RRI4") 858 | 859 | _fp0_map = [ 860 | "ADD_S", "SUB_S", "MUL_S", None, # None is reserved 861 | "MADD_S", "MSUB_S", None, None, 862 | "ROUND_S", "TRUNC_S", "FLOOR_S", "CEIL_S", 863 | "FLOAT_S", "UFLOAT_S", "UTRUNC_S", "FP1OP", 864 | ] 865 | @classmethod 866 | def _decode_FP0(cls, insn, insn_bytes): 867 | # Format RRR (t, s, r vary) 868 | return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._fp0_map) 869 | 870 | _decode_ADD_S = mnem("ADD_S", "RRR") 871 | _decode_SUB_S = mnem("SUB_S", "RRR") 872 | _decode_MUL_S = mnem("MUL_S", "RRR") 873 | _decode_MADD_S = mnem("MADD_S", "RRR") 874 | _decode_MSUB_S = mnem("MSUB_S", "RRR") 875 | _decode_ROUND_S = mnem("ROUND_S", "RRR") 876 | _decode_TRUNC_S = mnem("TRUNC_S", "RRR") 877 | _decode_FLOOR_S = mnem("FLOOR_S", "RRR") 878 | _decode_CEIL_S = mnem("CEIL_S", "RRR") 879 | _decode_FLOAT_S = mnem("FLOAT_S", "RRR") 880 | _decode_UFLOAT_S = mnem("UFLOAT_S", "RRR") 881 | _decode_UTRUNC_S = mnem("UTRUNC_S", "RRR") 882 | 883 | _fp1op_map = [ 884 | "MOV_S", "ABS_S", None, None, # None is reserved 885 | "RFR", "WFR", "NEG_S", None, 886 | None, None, None, None, 887 | None, None, None, None, 888 | ] 889 | @classmethod 890 | def _decode_FP1OP(cls, insn, insn_bytes): 891 | # Format RRR (s, r vary) 892 | return cls._do_tbl_layer(insn, insn_bytes, "t", cls._fp1op_map) 893 | 894 | _decode_MOV_S = mnem("MOV.S", "RRR") 895 | _decode_ABS_S = mnem("ABS.S", "RRR") 896 | _decode_RFR = mnem("RFR", "RRR") 897 | _decode_WFR = mnem("WFR", "RRR") 898 | _decode_NEG_S = mnem("NEG.S", "RRR") 899 | 900 | _fp1_map = [ 901 | None, "UN_S", "OEQ_S", "UEQ_S", # None is reserved 902 | "OLT_S", "ULT_S", "OLE_S", "ULE_S", 903 | "MOVEQZ_S", "MOVNEZ_S", "MOVLTZ_S", "MOVGEZ_S", 904 | "MOVF_S", "MOVT_S", None, None, 905 | ] 906 | @classmethod 907 | def _decode_FP1(cls, insn, insn_bytes): 908 | # Format RRR (t, s, r vary) 909 | return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._fp1_map) 910 | 911 | _decode_UN_S = mnem("UN.S", "RRR") 912 | _decode_OEQ_S = mnem("OEQ.S", "RRR") 913 | _decode_UEQ_S = mnem("UEQ.S", "RRR") 914 | _decode_OLT_S = mnem("OLT.S", "RRR") 915 | _decode_ULT_S = mnem("ULT.S", "RRR") 916 | _decode_OLE_S = mnem("OLE.S", "RRR") 917 | _decode_ULE_S = mnem("ULE.S", "RRR") 918 | _decode_MOVEQZ_S = mnem("MOVEQZ.S", "RRR") 919 | _decode_MOVNEZ_S = mnem("MOVNEZ.S", "RRR") 920 | _decode_MOVLTZ_S = mnem("MOVLTZ.S", "RRR") 921 | _decode_MOVGEZ_S = mnem("MOVGEZ.S", "RRR") 922 | _decode_MOVF_S = mnem("MOVF.S", "RRR") 923 | _decode_MOVT_S = mnem("MOVT.S", "RRR") 924 | 925 | _lsai_map = [ 926 | "L8UI", "L16UI", "L32I", None, # None is reserved 927 | "S8I", "S16I", "S32I", "CACHE", 928 | None, "L16SI", "MOVI", "L32AI", 929 | "ADDI", "ADDMI", "S32C1I", "S32RI", 930 | ] 931 | @classmethod 932 | def _decode_LSAI(cls, insn, insn_bytes): 933 | # Formats RRI8 and RRI4 (t, s, imm8 vary) 934 | return cls._do_tbl_layer(insn, insn_bytes, "r", cls._lsai_map) 935 | 936 | _decode_L8UI = mnem("L8UI", "RRI8") 937 | _decode_L16UI = mnem("L16UI", "RRI8", 938 | inline0=lambda insn, _: insn.imm8 << 1) 939 | _decode_L32I = mnem("L32I", "RRI8", 940 | inline0=lambda insn, _: insn.imm8 << 2) 941 | _decode_S8I = mnem("S8I", "RRI8") 942 | _decode_S16I = mnem("S16I", "RRI8", 943 | inline0=lambda insn, _: insn.imm8 << 1) 944 | _decode_S32I = mnem("S32I", "RRI8", 945 | inline0=lambda insn, _: insn.imm8 << 2) 946 | _decode_L16SI = mnem("L16SI", "RRI8", 947 | inline0=lambda insn, _: insn.imm8 << 1) 948 | _decode_MOVI = mnem("MOVI", "RRI8", 949 | inline0=lambda insn, _: 950 | sign_extend((insn.s << 8) + insn.imm8, 12) 951 | ) 952 | _decode_L32AI = mnem("L32AI", "RRI8", 953 | inline0=lambda insn, _: insn.imm8 << 2) 954 | _decode_ADDI = mnem("ADDI", "RRI8") 955 | _decode_ADDMI = mnem("ADDMI", "RRI8") 956 | _decode_S32C1I = mnem("S32C1I", "RRI8") 957 | _decode_S32RI = mnem("S32RI", "RRI8", 958 | inline0=lambda insn, _: insn.imm8 << 2) 959 | 960 | _cache_map = [ 961 | "DPFR", "DPFW", "DPFRO", "DPFWO", 962 | "DHWB", "DHWBI", "DHI", "DII", 963 | "DCE", None, None, None, # None is reserved 964 | "IPF", "ICE", "IHI", "III", 965 | ] 966 | @classmethod 967 | def _decode_CACHE(cls, insn, insn_bytes): 968 | # Formats RRI8 and RRI4 (s, imm8 vary) 969 | return cls._do_tbl_layer(insn, insn_bytes, "t", cls._cache_map) 970 | 971 | _decode_DPFR = mnem("DPFR", "RRI8") 972 | _decode_DPFW = mnem("DPFW", "RRI8") 973 | _decode_DPFRO = mnem("DPFRO", "RRI8") 974 | _decode_DPFWO = mnem("DPFWO", "RRI8") 975 | _decode_DHWB = mnem("DHWB", "RRI8") 976 | _decode_DHWBI = mnem("DHWBI", "RRI8") 977 | _decode_DHI = mnem("DHI", "RRI8") 978 | _decode_DII = mnem("DII", "RRI8") 979 | _decode_IPF = mnem("IPF", "RRI8") 980 | _decode_IHI = mnem("IHI", "RRI8") 981 | _decode_III = mnem("III", "RRI8") 982 | 983 | _dce_map = [ 984 | "DPFL", None, "DHU", "DIU", # None is reserved 985 | "DIWB", "DIWBI", None, None, 986 | None, None, None, None, 987 | None, None, None, None, 988 | ] 989 | @classmethod 990 | def _decode_DCE(cls, insn, insn_bytes): 991 | # Format RRI4 (s, imm4 vary) 992 | return cls._do_tbl_layer(insn, insn_bytes, "op1", cls._dce_map) 993 | 994 | _decode_DPFL = mnem("DPFL", "RRI4") 995 | _decode_DHU = mnem("DHU", "RRI4") 996 | _decode_DIU = mnem("DIU", "RRI4") 997 | _decode_DIWB = mnem("DIWB", "RRI4") 998 | _decode_DIWBI = mnem("DIWBI", "RRI4") 999 | 1000 | _ice_map = [ 1001 | "IPFL", None, "IHU", "IIU", # None is reserved 1002 | None, None, None, None, 1003 | None, None, None, None, 1004 | None, None, None, None, 1005 | ] 1006 | @classmethod 1007 | def _decode_ICE(cls, insn, insn_bytes): 1008 | # Format RRI4 (s, imm4 vary) 1009 | return cls._do_tbl_layer(insn, insn_bytes, "op1", cls._ice_map) 1010 | 1011 | _decode_IPFL = mnem("IPFL", "RRI4") 1012 | _decode_IHU = mnem("IHU", "RRI4") 1013 | _decode_IIU = mnem("IIU", "RRI4") 1014 | 1015 | _lsci_map = [ 1016 | "LSI", None, None, None, # None is reserved 1017 | "SSI", None, None, None, 1018 | "LSIU", None, None, None, 1019 | "SSIU", None, None, None, 1020 | ] 1021 | @classmethod 1022 | def _decode_LSCI(cls, insn, insn_bytes): 1023 | # format RRI8 (t, s, imm8 vary) 1024 | return cls._do_tbl_layer(insn, insn_bytes, "r", cls._lsci_map) 1025 | 1026 | _decode_LSI = mnem("LSI", "RRI8") 1027 | _decode_SSI = mnem("SSI", "RRI8") 1028 | _decode_LSIU = mnem("LSIU", "RRI8") 1029 | _decode_SSIU = mnem("SSIU", "RRI8") 1030 | 1031 | _mac16_map = [ 1032 | "MACID", "MACCD", "MACDD", "MACAD", 1033 | "MACIA", "MACCA", "MACDA", "MACAA", 1034 | "MACI", "MACC", None, None, # None is reserved 1035 | None, None, None, None, 1036 | ] 1037 | @classmethod 1038 | def _decode_MAC16(cls, insn, insn_bytes): 1039 | # format RRR (t, s, r, op1 vary) 1040 | return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._mac16_map) 1041 | 1042 | # TODO: Skipping this MAC stuff, seems like a vector processor, that I doubt 1043 | # the ESP8266 has... 1044 | 1045 | _calln_map = [ 1046 | "CALL0", "CALL4", "CALL8", "CALL12", 1047 | ] 1048 | @classmethod 1049 | def _decode_CALLN(cls, insn, insn_bytes): 1050 | # Format CALL (offset varies) 1051 | return cls._do_tbl_layer(insn, insn_bytes, "n", cls._calln_map) 1052 | 1053 | _decode_CALL0 = mnem("CALL0", "CALL") 1054 | _decode_CALL4 = mnem("CALL4", "CALL") 1055 | _decode_CALL8 = mnem("CALL8", "CALL") 1056 | _decode_CALL12 = mnem("CALL12", "CALL") 1057 | 1058 | _si_map = [ 1059 | "J", "BZ", "BI0", "BI1", 1060 | ] 1061 | @classmethod 1062 | def _decode_SI(cls, insn, insn_bytes): 1063 | # Formats CALL, BRI8 and BRI12 (offset varies) 1064 | return cls._do_tbl_layer(insn, insn_bytes, "n", cls._si_map) 1065 | 1066 | _decode_J = mnem("J", "CALL") 1067 | 1068 | _bz_map = [ 1069 | "BEQZ", "BNEZ", "BLTZ", "BGEZ", 1070 | ] 1071 | @classmethod 1072 | def _decode_BZ(cls, insn, insn_bytes): 1073 | # Format BRI12 (s, imm12 vary) 1074 | return cls._do_tbl_layer(insn, insn_bytes, "m", cls._bz_map) 1075 | 1076 | _decode_BEQZ = mnem("BEQZ", "BRI12") 1077 | _decode_BNEZ = mnem("BNEZ", "BRI12") 1078 | _decode_BLTZ = mnem("BLTZ", "BRI12") 1079 | _decode_BGEZ = mnem("BGEZ", "BRI12") 1080 | 1081 | _bi0_map = [ 1082 | "BEQI", "BNEI", "BLTI", "BGEI", 1083 | ] 1084 | @classmethod 1085 | def _decode_BI0(cls, insn, insn_bytes): 1086 | # Format BRI8 (s, r, imm8 vary) 1087 | return cls._do_tbl_layer(insn, insn_bytes, "m", cls._bi0_map) 1088 | 1089 | _decode_BEQI = mnem("BEQI", "BRI8") 1090 | _decode_BNEI = mnem("BNEI", "BRI8") 1091 | _decode_BLTI = mnem("BLTI", "BRI8") 1092 | _decode_BGEI = mnem("BGEI", "BRI8") 1093 | 1094 | _bi1_map = [ 1095 | "ENTRY", 1096 | "B1", 1097 | "BLTUI", 1098 | "BGEUI", 1099 | ] 1100 | @classmethod 1101 | def _decode_BI1(cls, insn, insn_bytes): 1102 | # Formats BRI8 and BRI12 (s, r, imm8 vary) 1103 | return cls._do_tbl_layer(insn, insn_bytes, "m", cls._bi1_map) 1104 | 1105 | _decode_ENTRY = mnem("ENTRY", "BRI12", 1106 | inline0=lambda insn, _: insn.imm12 << 3) 1107 | _decode_BLTUI = mnem("BLTUI", "BRI8") 1108 | _decode_BGEUI = mnem("BGEUI", "BRI8") 1109 | 1110 | _b1_map = [ 1111 | "BF", "BT", None, None, # None is reserved 1112 | None, None, None, None, 1113 | "LOOP", "LOOPNEZ", "LOOPGTZ", None, 1114 | None, None, None, None, 1115 | ] 1116 | @classmethod 1117 | def _decode_B1(cls, insn, insn_bytes): 1118 | # Format BRI8 (s, imm8 vary) 1119 | return cls._do_tbl_layer(insn, insn_bytes, "r", cls._b1_map) 1120 | 1121 | _decode_BF = mnem("BF", "BRI8") 1122 | _decode_BT = mnem("BT", "BRI8") 1123 | _decode_LOOP = mnem("LOOP", "BRI8") 1124 | _decode_LOOPNEZ = mnem("LOOPNEZ", "BRI8") 1125 | _decode_LOOPGTZ = mnem("LOOPGTZ", "BRI8") 1126 | 1127 | _b_map = [ 1128 | "BNONE", "BEQ", "BLT", "BLTU", 1129 | "BALL", "BBC", "BBCI", "BBCI", 1130 | "BANY", "BNE", "BGE", "BGEU", 1131 | "BNALL", "BBS", "BBSI", "BBSI" 1132 | ] 1133 | @classmethod 1134 | def _decode_B(cls, insn, insn_bytes): 1135 | # Format RRI8 (t, s, imm8 vary) 1136 | return cls._do_tbl_layer(insn, insn_bytes, "r", cls._b_map) 1137 | 1138 | _decode_BNONE = mnem("BNONE", "RRI8") 1139 | _decode_BEQ = mnem("BEQ", "RRI8") 1140 | _decode_BLT = mnem("BLT", "RRI8") 1141 | _decode_BLTU = mnem("BLTU", "RRI8") 1142 | _decode_BALL = mnem("BALL", "RRI8") 1143 | _decode_BBC = mnem("BBC", "RRI8") 1144 | _decode_BBCI = mnem("BBCI", "RRI8", 1145 | inline0=lambda insn, _: insn.t + ((insn.r & 1) << 4)) 1146 | _decode_BANY = mnem("BANY", "RRI8") 1147 | _decode_BNE = mnem("BNE", "RRI8") 1148 | _decode_BGE = mnem("BGE", "RRI8") 1149 | _decode_BGEU = mnem("BGEU", "RRI8") 1150 | _decode_BNALL = mnem("BNALL", "RRI8") 1151 | _decode_BBS = mnem("BBS", "RRI8") 1152 | _decode_BBSI = mnem("BBSI", "RRI8", 1153 | inline0=lambda insn, _: insn.t + ((insn.r & 1) << 4)) 1154 | 1155 | _st2_map = [ 1156 | "MOVI_N", "MOVI_N", "MOVI_N", "MOVI_N", 1157 | "MOVI_N", "MOVI_N", "MOVI_N", "MOVI_N", 1158 | "BEQZ_N", "BEQZ_N", "BEQZ_N", "BEQZ_N", 1159 | "BNEZ_N", "BNEZ_N", "BNEZ_N", "BNEZ_N", 1160 | ] 1161 | @classmethod 1162 | def _decode_ST2(cls, insn, insn_bytes): 1163 | # Formats RI7 and RI6 (s, r vary) 1164 | return cls._do_tbl_layer(insn, insn_bytes, "t", cls._st2_map) 1165 | 1166 | _decode_MOVI_N = mnem("MOVI.N", "RI7", 1167 | inline0=lambda insn, _: 1168 | sign_extend(insn.imm7, 7) if 1169 | # Sign-extending the 7-bit value with the logical 1170 | # and of its two most significant bits 1171 | ((insn.imm7 >> 5) == 3) else 1172 | insn.imm7 1173 | ) 1174 | _decode_BEQZ_N = mnem("BEQZ.N", "RI6") 1175 | _decode_BNEZ_N = mnem("BNEZ.N", "RI6") 1176 | 1177 | _st3_map = [ 1178 | "MOV_N", None, None, None, # None is reserved 1179 | None, None, None, None, 1180 | None, None, None, None, 1181 | None, None, None, "S3", 1182 | ] 1183 | @classmethod 1184 | def _decode_ST3(cls, insn, insn_bytes): 1185 | # Format RRRN (t, s vary) 1186 | return cls._do_tbl_layer(insn, insn_bytes, "r", cls._st3_map) 1187 | 1188 | _decode_MOV_N = mnem("MOV.N", "RRRN") 1189 | 1190 | _s3_map = [ 1191 | "RET_N", "RETW_N", "BREAK_N", "NOP_N", 1192 | None, None, "ILL_N", None, # None is reserved 1193 | None, None, None, None, 1194 | None, None, None, None, 1195 | ] 1196 | @classmethod 1197 | def _decode_S3(cls, insn, insn_bytes): 1198 | # Format RRRN (no fields vary) 1199 | return cls._do_tbl_layer(insn, insn_bytes, "t", cls._s3_map) 1200 | 1201 | _decode_RET_N = mnem("RET.N", "RRRN") 1202 | _decode_RETW_N = mnem("RETW.N", "RRRN") 1203 | _decode_BREAK_N = mnem("BREAK.N", "RRRN") 1204 | _decode_NOP_N = mnem("NOP.N", "RRRN") 1205 | _decode_ILL_N = mnem("ILL.N", "RRRN") 1206 | 1207 | # Here's where we do the per-format decoding. This isn't quite as useful as 1208 | # I thought it would be, since Xtensa's instruction formats are not at all 1209 | # rigid (they sneak immediates into whatever bits are available, as they 1210 | # should). 1211 | 1212 | # We actually don't keep the instruction bytes around for the disassembly 1213 | # stage, so everything has to be parsed out somewhere in the decoding stage. 1214 | @classmethod 1215 | def _decode_fmt_RRR(cls, insn, insn_bytes): 1216 | insn.length = 3 1217 | insn.instruction_type = InstructionType.RRR 1218 | # EXTUI uses op2 to encode part of its operation, so parse it here 1219 | insn.op2 = decode_op2(insn_bytes) 1220 | _decode_components(insn, insn_bytes, ["t", "s", "r"]) 1221 | 1222 | @classmethod 1223 | def _decode_fmt_RSR(cls, insn, insn_bytes): 1224 | insn.instruction_type = InstructionType.RSR 1225 | insn.length = 3 1226 | _decode_components(insn, insn_bytes, ["t", "sr"]) 1227 | 1228 | @classmethod 1229 | def _decode_fmt_CALLX(cls, insn, insn_bytes): 1230 | insn.length = 3 1231 | insn.instruction_type = InstructionType.CALLX 1232 | _decode_components(insn, insn_bytes, ["n", "m", "s", "r"]) 1233 | 1234 | @classmethod 1235 | def _decode_fmt_RRI4(cls, insn, insn_bytes): 1236 | insn.length = 3 1237 | insn.instruction_type = InstructionType.RRI4 1238 | _decode_components(insn, insn_bytes, ["r", "s", "t", "imm4"]) 1239 | 1240 | @classmethod 1241 | def _decode_fmt_RRI8(cls, insn, insn_bytes): 1242 | insn.length = 3 1243 | insn.instruction_type = InstructionType.RRI8 1244 | _decode_components(insn, insn_bytes, ["r", "s", "t", "imm8"]) 1245 | 1246 | @classmethod 1247 | def _decode_fmt_RI16(cls, insn, insn_bytes): 1248 | insn.length = 3 1249 | insn.instruction_type = InstructionType.RI16 1250 | _decode_components(insn, insn_bytes, ["t", "imm16"]) 1251 | 1252 | @classmethod 1253 | def _decode_fmt_CALL(cls, insn, insn_bytes): 1254 | insn.length = 3 1255 | insn.instruction_type = InstructionType.CALL 1256 | _decode_components(insn, insn_bytes, ["n", "offset"]) 1257 | 1258 | @classmethod 1259 | def _decode_fmt_BRI8(cls, insn, insn_bytes): 1260 | insn.length = 3 1261 | insn.instruction_type = InstructionType.BRI8 1262 | _decode_components(insn, insn_bytes, ["r", "s", "m", "n", "imm8"]) 1263 | 1264 | @classmethod 1265 | def _decode_fmt_BRI12(cls, insn, insn_bytes): 1266 | insn.length = 3 1267 | insn.instruction_type = InstructionType.BRI12 1268 | _decode_components(insn, insn_bytes, ["s", "m", "n", "imm12"]) 1269 | 1270 | @classmethod 1271 | def _decode_fmt_RRRN(cls, insn, insn_bytes): 1272 | insn.length = 2 1273 | insn.instruction_type = InstructionType.RRRN 1274 | _decode_components(insn, insn_bytes, ["r", "s", "t"]) 1275 | 1276 | @classmethod 1277 | def _decode_fmt_RI7(cls, insn, insn_bytes): 1278 | insn.length = 2 1279 | insn.instruction_type = InstructionType.RI7 1280 | _decode_components(insn, insn_bytes, ["s", "i", "imm7"]) 1281 | 1282 | @classmethod 1283 | def _decode_fmt_RI6(cls, insn, insn_bytes): 1284 | insn.length = 2 1285 | insn.instruction_type = InstructionType.RI6 1286 | _decode_components(insn, insn_bytes, ["s", "i", "z", "imm6"]) 1287 | --------------------------------------------------------------------------------