├── __init__.py
├── .gitignore
├── examples
    ├── firmware.bin
    ├── firmware.elf
    ├── main.cpp
    └── README.md
├── screenshots
    ├── hero.png
    └── open-with-options.png
├── binja_xtensa
    ├── test_mnemonics.csv.bz2
    ├── torture_test.dump.bz2
    ├── esp32_torture_test.dump.bz2
    ├── test_mnemonic_text.dump.bz2
    ├── fix_data.py
    ├── parse_rom_ld.py
    ├── binaryview.py
    ├── __init__.py
    ├── test_instruction.py
    ├── firmware_parser.py
    ├── known_symbols.py
    ├── disassembly.py
    ├── lifter.py
    └── instruction.py
├── LICENSE
├── plugin.json
└── README.md


/__init__.py:
--------------------------------------------------------------------------------
1 | from . import binja_xtensa
2 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | .cache
4 | 


--------------------------------------------------------------------------------
/examples/firmware.bin:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/examples/firmware.bin


--------------------------------------------------------------------------------
/examples/firmware.elf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/examples/firmware.elf


--------------------------------------------------------------------------------
/screenshots/hero.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/screenshots/hero.png


--------------------------------------------------------------------------------
/screenshots/open-with-options.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/screenshots/open-with-options.png


--------------------------------------------------------------------------------
/binja_xtensa/test_mnemonics.csv.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/binja_xtensa/test_mnemonics.csv.bz2


--------------------------------------------------------------------------------
/binja_xtensa/torture_test.dump.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/binja_xtensa/torture_test.dump.bz2


--------------------------------------------------------------------------------
/binja_xtensa/esp32_torture_test.dump.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/binja_xtensa/esp32_torture_test.dump.bz2


--------------------------------------------------------------------------------
/binja_xtensa/test_mnemonic_text.dump.bz2:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/zackorndorff/binja-xtensa/HEAD/binja_xtensa/test_mnemonic_text.dump.bz2


--------------------------------------------------------------------------------
/examples/main.cpp:
--------------------------------------------------------------------------------
 1 | #include <Arduino.h>
 2 | 
 3 | void setup() {
 4 |   // put your setup code here, to run once:
 5 |   Serial.begin(115200);
 6 | }
 7 | 
 8 | void loop() {
 9 |   // put your main code here, to run repeatedly:
10 |   delay(1000);
11 |   Serial.println("Hello world");
12 | }
13 | 


--------------------------------------------------------------------------------
/examples/README.md:
--------------------------------------------------------------------------------
 1 | # Examples
 2 | 
 3 | Want to try out the plugin, but don't have a firmware image laying around? Never
 4 | fear, I've included built versions of a Hello World Arduino program for your
 5 | testing.
 6 | 
 7 | ## Files
 8 | 
 9 | * `firmware.bin`: firmware image
10 | * `firmware.elf`: built elf
11 | * `main.cpp`: source code
12 | 


--------------------------------------------------------------------------------
/binja_xtensa/fix_data.py:
--------------------------------------------------------------------------------
 1 | import binascii
 2 | import csv
 3 | 
 4 | 
 5 | with open("fixed.csv", "w") as wfile:
 6 |     with open("test_mnemonics.csv", "r") as file:
 7 |         reader = csv.reader(file)
 8 |         for row in reader:
 9 |             opcode, mnem = row
10 |             # Need to byte-swap opcode
11 |             data = binascii.unhexlify(opcode)
12 |             reverse_data = bytearray(data)
13 |             reverse_data.reverse()
14 |             wfile.write(f"{binascii.hexlify(reverse_data).decode('utf-8')},{mnem}\n")
15 | 


--------------------------------------------------------------------------------
/binja_xtensa/parse_rom_ld.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | """
 3 | Script to parse ESP SDK linker script and save symbols for the ROM
 4 | 
 5 | File you want from the SDK is called eagle.rom.addr.v6.ld
 6 | 
 7 | This script will produce a known_symbols.py from it
 8 | """
 9 | 
10 | import json
11 | import re
12 | 
13 | ROM_RE = re.compile(
14 |     r'^\s*PROVIDE\s+\(\s*([a-zA-Z0-9_]+)\s*=\s*(0x[0-9a-fA-F]+)\s*\);$'
15 | )
16 | 
17 | symbols = {}
18 | 
19 | with open("eagle.rom.addr.v6.ld", "r") as f:
20 |     for line in f:
21 |         m = ROM_RE.match(line)
22 |         if m:
23 |             symbol, addr = m.groups()
24 |             addr = int(addr, 0)
25 |             symbols[addr] = symbol
26 | 
27 | 
28 | with open("known_symbols.json", "w") as f:
29 |     data = json.dumps(symbols)
30 |     f.write("known_symbols = ")
31 |     f.write(data)
32 |     f.write("\n")
33 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | Copyright 2020-2021 Zack Orndorff
 2 | 
 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 4 | this software and associated documentation files (the "Software"), to deal in
 5 | the Software without restriction, including without limitation the rights to
 6 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 7 | the Software, and to permit persons to whom the Software is furnished to do so,
 8 | subject to the following conditions:
 9 | 
10 | The above copyright notice and this permission notice shall be included in all
11 | copies or substantial portions of the Software.
12 | 
13 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
15 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
16 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
17 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
18 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
19 | 


--------------------------------------------------------------------------------
/plugin.json:
--------------------------------------------------------------------------------
 1 | {
 2 |    "pluginmetadataversion": 2,
 3 |    "name": "Xtensa Architecture and ESP8266 Loader",
 4 |    "author": "Zack Orndorff",
 5 |    "type": [
 6 |       "binaryview",
 7 |       "architecture"
 8 |    ],
 9 |    "api": [
10 |       "python3"
11 |    ],
12 |    "description": "Xtensa Architecture and ESP8266 Image Loader",
13 |    "longdescription": "",
14 |    "license": {
15 |       "name": "MIT",
16 |       "text": "Copyright 2020-2021 Zack Orndorff\n\nPermission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the \"Software\"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions:\n\nThe above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software.\n\nTHE SOFTWARE IS PROVIDED \"AS IS\", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE."
17 |    },
18 |    "platforms": [
19 |       "Darwin",
20 |       "Windows",
21 |       "Linux"
22 |    ],
23 |    "installinstructions": {
24 |        "Darwin": "",
25 |        "Linux": "",
26 |        "Windows": ""
27 |    },
28 |    "version": "0.5.1",
29 |    "minimumbinaryninjaversion": 2846
30 | }
31 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # binja-xtensa: Architecture Plugin and ESP8266 Loader
 2 | 
 3 | Tensilica Xtensa Architecture Plugin and ESP8266 Firmware Loader for Binary
 4 | Ninja.
 5 | 
 6 | ![screenshot of Binary Ninja showing setup and loop of a decompiled ESP8266
 7 | Arduino project](https://raw.githubusercontent.com/zackorndorff/binja-xtensa/0.5/screenshots/hero.png)
 8 | 
 9 | ## Features
10 | 
11 | * Disassembly of nearly all Xtensa instructions
12 | * Lifting for most Xtensa instructions you'll see in ESP8266 Firmware
13 | * Support for Xtensa ELF files so they will be automatically recognized
14 | * Loader for ESP8266 raw firmware dumps. This support is a little finicky to
15 |   use, as there's multiple partitions in the firmware dumps. By default it uses
16 |   the last one with a detected header; you can adjust this via Open With
17 |   Options
18 |     * At the moment it doesn't completely map the sections properly, but it's a
19 |       start :)
20 | 
21 | ## What it doesn't do
22 | 
23 | * It was written mostly as an exercise for the author. It's useful enough to
24 |   share, but no promises it's useful for your project :)
25 | * Lift register windowing instructions (it disassembles most of them)
26 |     * You need this for ESP32 support. It shouldn't be too bad to add, as long
27 |       as you can figure out how to lift the windowed registers
28 | * Anything with the optional vector unit
29 | * Disassemble and lift most of the boolean instructions
30 | * Lift most floating point instructions
31 | * Deal with special registers (I figure you might as well look at the asm
32 |   for that anyway)
33 | * Anything quickly. This is Python, and not particularly well optimized Python
34 |   at that. If you're using this seriously, I recommend rewriting in C++
35 | * Find `main` in a raw binary for you
36 | 
37 | ## Installation
38 | 
39 | Install via the Binary Ninja plugin manager. Alternatively, clone this
40 | repository into your Binary Ninja plugins directory. See the [official Binary
41 | Ninja documentation](https://docs.binary.ninja/guide/plugins.html) for more
42 | details.
43 | 
44 | ## Using the ESP8266 Firmware Loader
45 | 
46 | The default of picking the last usable partition works decent, but if you want
47 | more control, use Open With Options and change `Loader > Which Firmware` to the
48 | option corresponding to the address you want to load.
49 | 
50 | I attempt to load in symbols from the SDK's linker script so some of the
51 | ROM-implemented functions are less mysterious. See
52 | [parse_rom_ld.py](binja_xtensa/parse_rom_ld.py) for the parsing code,
53 | [known_symbols.py](binja_xtensa/known_symbols.py) for the database it'll apply,
54 | and function `setup_esp8266_map` in
55 | [binaryview.py](binja_xtensa/binaryview.py#L17) for the code that applies it.
56 | This should probably be a load time option... but it's not at the moment :/
57 | 
58 | ![screenshot of Binary Ninja's Open With Options showing the Loader Which
59 | Firmware option](https://raw.githubusercontent.com/zackorndorff/binja-xtensa/0.5/screenshots/open-with-options.png)
60 | 
61 | ## Future Work
62 | 
63 | * Support register windowing instructions to support ESP32 firmware
64 | * Improve the raw firmware loader
65 | * Rewrite to be faster
66 | 
67 | ## Why did you write this?
68 | 
69 | 1. I was goofing around with ESP8266 and Arduino and was annoyed I didn't have
70 |    an easy way to disassemble the built binaries
71 | 2. I hadn't written a full architecture plugin and I thought it'd be a good
72 |    exercise
73 | 3. I got bored over COVID-19 lockdown in 2020 and needed something to do
74 | 
75 | ## Testing
76 | 
77 | There are some simple tests in
78 | [test_instruction.py](binja_xtensa/test_instruction.py), which are mostly just
79 | taking uniq'd output from objdump on some binaries I had laying around and
80 | making sure the output matches. They can be run with `python -m pytest` from the
81 | root of the project.
82 | 
83 | ## License
84 | 
85 | This project copyright Zack Orndorff (@zackorndorff) and is available under the
86 | MIT license. See [LICENSE](LICENSE).
87 | 


--------------------------------------------------------------------------------
/binja_xtensa/binaryview.py:
--------------------------------------------------------------------------------
  1 | """
  2 | ESP8266 Firmware .bin BinaryView
  3 | 
  4 | Using `firmware_parser.py`, we attempt to find binaries in the dump. By default
  5 | we'll pick an interesting one (currently the last one with a detected header),
  6 | but we present a load option to the user to allow picking a different one.
  7 | """
  8 | import json
  9 | import struct
 10 | 
 11 | from binaryninja import Architecture, BinaryView, Settings, Symbol
 12 | from binaryninja.enums import SectionSemantics, SegmentFlag, SymbolType
 13 | 
 14 | from .firmware_parser import parse_firmware
 15 | from .known_symbols import known_symbols
 16 | 
 17 | def setup_esp8266_map(bv):
 18 |     """Define symbols for the ESP8266 ROM"""
 19 |     for addr, symbol in known_symbols.items():
 20 |         addr = int(addr, 0)
 21 | 
 22 |         # https://github.com/esp8266/esp8266-wiki/wiki/Memory-Map
 23 |         rom_start = 0x40000000
 24 |         rom_end = 0x40010000
 25 | 
 26 |         bv.add_auto_segment(rom_start, rom_end - rom_start, 0, 0,
 27 |                             SegmentFlag.SegmentContainsCode |
 28 |                             SegmentFlag.SegmentContainsData |
 29 |                             SegmentFlag.SegmentReadable     |
 30 |                             SegmentFlag.SegmentExecutable)
 31 | 
 32 |         bv.add_auto_section("esp8266_ROM", rom_start, rom_end - rom_start,
 33 |                             SectionSemantics.ExternalSectionSemantics)
 34 | 
 35 |         if rom_start <= addr <= rom_end:
 36 |             sym_type = SymbolType.ImportedFunctionSymbol
 37 |         else:
 38 |             sym_type = SymbolType.ImportedDataSymbol
 39 | 
 40 |         bv.define_auto_symbol(Symbol(
 41 |             sym_type,
 42 |             addr, symbol))
 43 | 
 44 | 
 45 | class ESPFirmware(BinaryView):
 46 |     name = "ESPFirmware"
 47 |     long_name = "ESP Firmware"
 48 | 
 49 |     def __init__(self, data):
 50 |         BinaryView.__init__(self, file_metadata=data.file, parent_view=data)
 51 |         self.raw = data
 52 | 
 53 |     @classmethod
 54 |     def is_valid_for_data(cls, data):
 55 |         # These happen to be the two magic bytes used by firmware_parser.py
 56 |         if data.read(0, 1) in [b'\xe9', b'\xea']:
 57 |             return True
 58 |         return False
 59 | 
 60 |     @classmethod
 61 |     def _pick_default_firmware(cls, firmware_options):
 62 |         """Rudimentary heuristic for "interesting" binaries"""
 63 |         for idx, firm in reversed(list(enumerate(firmware_options))):
 64 |             if firm.name != "AppendedData":
 65 |                 return idx, firm
 66 | 
 67 |         return 0, firmware_options[0]
 68 | 
 69 |     @classmethod
 70 |     def get_load_settings_for_data(cls, data):
 71 |         # This example was crucial in figuring out how to present load options
 72 |         # https://github.com/Vector35/binaryninja-api/blob/dev/python/examples/mappedview.py
 73 |         # It's also helpful to call Settings().serialize_schema() from the
 74 |         # Python console and examine the results.
 75 | 
 76 |         firmware_options = parse_firmware(data)
 77 |         default_firmware_idx, _ = cls._pick_default_firmware(firmware_options)
 78 | 
 79 |         ourEnum = ["option" + str(i) for i in range(len(firmware_options))]
 80 |         ourEnumDescriptions = [
 81 |             f"{i.name} at {hex(i.bv_offset)}"
 82 |             for i in firmware_options]
 83 | 
 84 |         # TODO: actually JSON serialize this
 85 |         setting =  f"""{{
 86 |             "title": "Which Firmware",
 87 |             "type": "string",
 88 |             "description": "Which of the binaries in this file do you want?",
 89 |             "enum": {json.dumps(ourEnum)},
 90 |             "enumDescriptions": {json.dumps(ourEnumDescriptions)},
 91 |             "default": {json.dumps(ourEnum[default_firmware_idx])}
 92 |             }}
 93 |             """
 94 | 
 95 |         print(setting)
 96 | 
 97 |         load_settings = Settings("esp_bv_settings")
 98 |         assert load_settings.register_group("loader", "Loader")
 99 |         assert load_settings.register_setting("loader.esp.whichFirmware",
100 |                                               setting)
101 |         return load_settings
102 | 
103 |     def perform_is_executable(self):
104 |         return True
105 | 
106 |     def perform_get_entry_point(self):
107 |         # This should be set by the the_firmware.load() if there is an entry
108 |         # point.
109 |         # Otherwise, for lack of a better choice, we end up with 0
110 |         return self.entry_addr
111 | 
112 |     def perform_get_address_size(self):
113 |         return 4
114 | 
115 |     def init(self):
116 | 
117 |         try:
118 |             load_settings = self.get_load_settings(self.name)
119 |             which_firmware = load_settings.get_string("loader.esp.whichFirmware", self)
120 |         except:
121 |             which_firmware = None
122 | 
123 |         firmware_options = parse_firmware(self.parent_view)
124 | 
125 |         try:
126 |             prefix = "option"
127 | 
128 |             if which_firmware is None:
129 |                 try:
130 |                     which_firmware_idx, _ = self._pick_default_firmware(firmware_options)
131 |                 except:
132 |                     import traceback
133 |                     traceback.print_exc()
134 |                     raise
135 |                 which_firmware = prefix + str(which_firmware_idx)
136 | 
137 |             if not which_firmware.startswith(prefix):
138 |                 raise Exception("You didn't choose one of the firmware options")
139 |             which_firmware = int(which_firmware[len(prefix):])
140 |         except:
141 |             print("You didn't choose one of the firmware options")
142 |             return False
143 | 
144 |         try:
145 |             print("Using firmware index", which_firmware)
146 |             the_firmware = firmware_options[which_firmware]
147 |         except:
148 |             print("You didn't choose one of the firmware options")
149 |             return False
150 | 
151 |         self.platform = Architecture['xtensa'].standalone_platform
152 |         self.arch = Architecture['xtensa']
153 |         self.entry_addr = 0
154 | 
155 |         # Will create segments and set entry_addr as needed.
156 |         the_firmware.load(self, self.parent_view)
157 | 
158 |         if self.entry_addr != 0:
159 |             for seg in self.segments:
160 |                 if (seg.start <= self.entry_addr <= seg.end) and seg.executable:
161 |                     #self.add_auto_segment(seg.start, seg.data_length,
162 |                     #                      seg.data_offset, seg.data_length,
163 |                     #                      SegmentFlag.SegmentContainsCode |
164 |                     #                      SegmentFlag.SegmentReadable |
165 |                     #                      SegmentFlag.SegmentExecutable)
166 |                     # It seems the ReadOnlyCodeSectionSemantics kicks off the
167 |                     # autoanalysis
168 |                     self.add_auto_section('entry_section', seg.start,
169 |                                           seg.end - seg.start,
170 |                                           SectionSemantics.ReadOnlyCodeSectionSemantics
171 |                                           )
172 |             # I want to be able to find the entry point in the UI
173 |             # I couldn't find a create_auto_function... maybe I didn't look hard
174 |             # enough
175 |             self.create_user_function(self.entry_addr)
176 |             self.define_auto_symbol(Symbol(
177 |                 SymbolType.FunctionSymbol,
178 |                 self.entry_addr,
179 |                 "entry"))
180 | 
181 |         setup_esp8266_map(self)
182 | 
183 |         return True
184 | 


--------------------------------------------------------------------------------
/binja_xtensa/__init__.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Binary Ninja Xtensa (and ESP8266) support
  3 | 
  4 | This package offers several features:
  5 | 
  6 | Xtensa (little-endian):
  7 |     * correct length decoding for nearly all instructions
  8 |     * correct mnemonic decoding for nearly all instructions
  9 |     * fully correct disassembly for most instructions you'll see. In some places
 10 |       there are comments documenting skipped features. The disassembler took over
 11 |       10 hours to write, and the firmware I'm looking at didn't have a lot of
 12 |       floating point instructions :)
 13 |     * objdump-equivalent disassembly for all the instructions in the firmware I
 14 |       had laying around (complete with a pytest scaffolding to test this)
 15 |     * lifting to BNIL for 80+ of the most common instructions. This is enough
 16 |       for useful decompilation, courtesy of the Binary Ninja core.
 17 |     * a CallingConvention that matches what the documentation says GCC does.
 18 |       After adding this and setting it default, Binary Ninja started recognizing
 19 |       a good amount of function arguments.
 20 |     * declared support for whatever ELF architecture the ESP8266 tools in
 21 |       platformio generate... so if you open up an elf it builds you, it should
 22 |       recognize the architecture.
 23 | 
 24 | ESP8266-specific:
 25 |     * a BinaryView implementation that attempts to detect and load ESP8266
 26 |       firmware dumps. The current implementation is hacky, but it's able to
 27 |       examine multiple .bin dumps I've obtained through different methods.
 28 | 
 29 |       * These dumps sometimes contain multiple binaries (namely, a bootloader
 30 |         plus user code. I've added a "Open with Options" option to choose which
 31 |         of the binaries you want to look at. By default, it tries to pick the
 32 |         last binary containing parsed segments.
 33 | 
 34 | What it doesn't yet do:
 35 |     * anything with windowed registers
 36 |     * anything with the optional vector unit
 37 |     * disassemble and lift most of the boolean instructions
 38 |     * lift most floating point instructions
 39 |     * deal with special registers (I figure you might as well look at the asm
 40 |       for that anyway)
 41 |     * help you find `main` in a raw binary :)
 42 | 
 43 | I've abused Python metaprogramming throughout, so it should be possible to fix
 44 | each of the above inaccuracies on a per-instruction basis if the need arises.
 45 | I've attempted to document how that works.
 46 | """
 47 | 
 48 | from binaryninja import (Architecture, BinaryViewType, CallingConvention,
 49 |                          IntrinsicInfo, InstructionInfo, InstructionTextToken,
 50 |                          RegisterInfo, log)
 51 | from binaryninja.enums import (BranchType, Endianness, FlagRole,
 52 |                                LowLevelILFlagCondition)
 53 | 
 54 | from .instruction import Instruction
 55 | from .disassembly import disassemble_instruction
 56 | from .lifter import lift
 57 | from .binaryview import ESPFirmware
 58 | 
 59 | 
 60 | __all__ = ['XtensaLE']
 61 | 
 62 | 
 63 | class XtensaLE(Architecture):
 64 |     name = 'xtensa'
 65 |     endianness = Endianness.LittleEndian
 66 | 
 67 |     default_int_size = 4
 68 |     address_size = 4
 69 |     max_instr_length = 3
 70 | 
 71 |     # Uses for regs are from "CALL0 Register Usage and Stack Layout (8.1.2)"
 72 |     link_reg = 'a0'
 73 |     stack_pointer = 'a1'
 74 |     regs = {
 75 |         'a0': RegisterInfo("a0", 4, 0), # ret addr
 76 |         'a1': RegisterInfo("a1", 4, 0), # sp (callee-saved)
 77 |         'a2': RegisterInfo("a2", 4, 0), # arg1
 78 |         'a3': RegisterInfo("a3", 4, 0), # arg2
 79 |         'a4': RegisterInfo("a4", 4, 0), # arg3
 80 |         'a5': RegisterInfo("a5", 4, 0), # arg4
 81 |         'a6': RegisterInfo("a6", 4, 0), # arg5
 82 |         'a7': RegisterInfo("a7", 4, 0), # arg6
 83 |         'a8': RegisterInfo("a8", 4, 0), # static chain (see section 8.1.8)
 84 |         'a9': RegisterInfo("a9", 4, 0),
 85 |         'a10': RegisterInfo("a10", 4, 0),
 86 |         'a11': RegisterInfo("a11", 4, 0),
 87 |         'a12': RegisterInfo("a12", 4, 0), # callee-saved
 88 |         'a13': RegisterInfo("a13", 4, 0), # callee-saved
 89 |         'a14': RegisterInfo("a14", 4, 0), # callee-saved
 90 |         'a15': RegisterInfo("a15", 4, 0), # optional stack-frame pointer
 91 |         'sar': RegisterInfo("sar", 1, 0), # Shift Address Register: Not a GPR
 92 |     }
 93 | 
 94 |     # Do we have flags?
 95 |     flags = {}
 96 |     flag_roles = {}
 97 |     flag_write_types = {}
 98 |     flags_written_by_flag_write_type = {}
 99 |     flags_required_for_flag_condition = {}
100 | 
101 |     intrinsics = {
102 |         "memw": IntrinsicInfo([], []),
103 |         "isync": IntrinsicInfo([], []),
104 |     }
105 | 
106 |     def _decode_instruction(self, data, addr):
107 |         insn = None
108 |         try:
109 |             insn = Instruction.decode(data)
110 |         except:
111 |             pass
112 |         return insn
113 | 
114 |     def get_instruction_info(self, data, addr):
115 |         insn = self._decode_instruction(data, addr)
116 |         if not insn:
117 |             return None
118 |         result = InstructionInfo()
119 |         result.length = insn.length
120 |         if insn.length > 3 or insn.length < 0:
121 |             raise Exception("Somehow we got here without setting length")
122 | 
123 |         # Add branches
124 |         if insn.mnem in ["RET", "RET.N"]:
125 |             result.add_branch(BranchType.FunctionReturn)
126 | 
127 |         # Section 3.8.4 "Jump and Call Instructions
128 |         elif insn.mnem in ["J"]:
129 |             result.add_branch(BranchType.UnconditionalBranch,
130 |                               insn.target_offset(addr))
131 |         elif insn.mnem in ["JX"]:
132 |             result.add_branch(BranchType.IndirectBranch)
133 | 
134 |         elif insn.mnem in ["CALL0", "CALL4", "CALL8", "CALL12"]:
135 |             result.add_branch(BranchType.CallDestination,
136 |                               insn.target_offset(addr))
137 |         elif insn.mnem in ["CALLX0", "CALLX4", "CALLX8", "CALLX12"]:
138 |             pass
139 |             #result.add_branch(BranchType.IndirectBranch)
140 | 
141 |         elif insn.mnem in ["SYSCALL"]:
142 |             result.add_branch(BranchType.SystemCall)
143 | 
144 |         elif insn.mnem.replace(".", "_") in [k for k in Instruction._target_offset_map.keys() if
145 |                            k.startswith("B")]: # lol
146 |             result.add_branch(BranchType.TrueBranch, insn.target_offset(addr))
147 |             result.add_branch(BranchType.FalseBranch, addr + insn.length)
148 | 
149 |         return result
150 | 
151 |     def get_instruction_text(self, data, addr):
152 |         insn = self._decode_instruction(data, addr)
153 |         if not insn:
154 |             return None
155 |         text = disassemble_instruction(insn, addr)
156 |         return text, insn.length
157 | 
158 |     def get_instruction_low_level_il(self, data, addr, il):
159 |         insn = self._decode_instruction(data, addr)
160 |         if not insn:
161 |             return None
162 |         return lift(insn, addr, il)
163 | 
164 | 
165 | class XtensaCall0CallingConvention(CallingConvention):
166 |     # a0 is dubiously caller saved... it's the ret addr / link register
167 |     caller_saved_regs = ["a0", "a2", "a3", "a4", "a5", "a6", "a7", "a8", "a9",
168 |                          "a10", "a11"]
169 |     int_arg_regs = ["a2", "a3", "a4", "a5", "a6", "a7"]
170 |     int_return_reg = "a2"
171 |     high_int_return_reg = "a3"
172 | 
173 | 
174 | def register_stuff():
175 |     XtensaLE.register()
176 | 
177 |     # Register ourselves with the ELF loader
178 |     BinaryViewType['ELF'].register_arch(94, Endianness.LittleEndian,
179 |                                         Architecture['xtensa'])
180 |     arch = Architecture['xtensa']
181 |     arch.register_calling_convention(XtensaCall0CallingConvention(arch, "default"))
182 | 
183 |     # If we register on the Architecture's standalone platform, it seems to use our
184 |     # calling convention without showing __convention("default") on every function
185 |     esp_plat = arch.standalone_platform
186 |     esp_plat.default_calling_convention = arch.calling_conventions['default']
187 | 
188 |     ESPFirmware.register()
189 | 
190 | 
191 | register_stuff()
192 | 


--------------------------------------------------------------------------------
/binja_xtensa/test_instruction.py:
--------------------------------------------------------------------------------
  1 | import binascii
  2 | import bz2
  3 | from collections import namedtuple
  4 | import csv
  5 | import os
  6 | import re
  7 | 
  8 | import pytest
  9 | 
 10 | from .instruction import Instruction, InstructionType, sign_extend
 11 | from .disassembly import disassemble_instruction, tokens_to_text
 12 | 
 13 | DIR = os.path.dirname(__file__)
 14 | 
 15 | def test_decode_abs():
 16 |     # RRR type
 17 |     # ABS ar, at
 18 |     # 0110 0000 rrrr 0001 tttt 0000
 19 |     # 60        r1        t0
 20 |     # ABS a7, a9
 21 |     # 60 71 90 => 907160
 22 |     INSN_ABS = binascii.unhexlify("907160")
 23 |     insn = Instruction.decode(INSN_ABS)
 24 |     assert insn.op0 == 0
 25 |     assert insn.op1 == 0
 26 |     assert insn.op2 == 6
 27 |     assert insn.r == 7
 28 |     assert insn.t == 9
 29 |     assert insn.s == 1
 30 |     assert insn.length == 3
 31 |     assert insn.mnem == "ABS"
 32 |     assert insn.instruction_type == InstructionType.RRR
 33 | 
 34 | def test_decode_add():
 35 |     """
 36 |     ADD ar, as, at
 37 |     ADD a3, a2, a1
 38 | 
 39 |     * bit 23
 40 |     * 1000 # op2
 41 |     * 0000 # op1
 42 |     * 0011 # a3 is r
 43 |     * 0010 # a2 is s
 44 |     * 0001 # a1 is t
 45 |     * 0000 # op0
 46 |     * bit 0
 47 | 
 48 |     Thus our insn is 80 32 10, which must be byte swapped to 10 32 80
 49 |     """
 50 |     #EveryInstR Group
 51 |     insn = Instruction.decode(binascii.unhexlify("103280"))
 52 |     assert insn.op0 == 0
 53 |     assert insn.op1 == 0
 54 |     assert insn.op2 == 8
 55 |     assert insn.r == 3
 56 |     assert insn.s == 2
 57 |     assert insn.t == 1
 58 |     assert insn.length == 3
 59 |     assert insn.mnem == "ADD"
 60 |     assert insn.instruction_type == InstructionType.RRR
 61 | 
 62 | def test_add_narrow():
 63 |     """
 64 |     ADD.N ar, as, at
 65 |     * bit 15
 66 |     * rrrr
 67 |     * ssss
 68 |     * tttt
 69 |     * 1010 # op0
 70 |     * bit 0
 71 |     Requires Code Density Option
 72 | 
 73 |     ADD.N a9, a5, a3
 74 |     is then 1001 0101 0011 1010, or 953a, reversed to 3a95
 75 |     """
 76 |     INSN_ADD_N = binascii.unhexlify("3a95")
 77 |     insn = Instruction.decode(INSN_ADD_N)
 78 |     assert insn.op0 == 0b1010
 79 |     assert insn.t == 3
 80 |     assert insn.s == 5
 81 |     assert insn.r == 9
 82 |     assert insn.length == 2
 83 |     assert insn.mnem == "ADD.N"
 84 | 
 85 | def test_addi():
 86 |     """
 87 |     RRI8 type
 88 |     ADDI at, as, -128..127
 89 |     * bit 23
 90 |     * imm8 # check encoding of this
 91 |     * 1100
 92 |     * s
 93 |     * t
 94 |     * 0010
 95 |     * bit 0
 96 |     
 97 |     ADDI a11, a1, -2
 98 |     is then
 99 |     1111 1110 1100 0001 1011 0010, or fe c1 b2, reversed to b2c1fe
100 |     """
101 |     insn = Instruction.decode(binascii.unhexlify("b2c1fe"))
102 |     assert insn.op0 == 0b0010
103 |     assert insn.r == 0b1100
104 |     assert insn.s == 1
105 |     assert insn.t == 11
106 |     # TODO: handle and test negative handling. I'd argue it should be a separate
107 |     # value, as the decoded imm8 doesn't seem like a signed value
108 |     #assert insn.imm8 == -2
109 |     assert insn.imm8 == 0b11111110
110 |     assert insn.length == 3
111 |     assert insn.mnem == "ADDI"
112 |     assert insn.instruction_type == InstructionType.RRI8
113 | 
114 | 
115 | test_mnemonics_data = []
116 | with bz2.open(os.path.join(DIR, "test_mnemonics.csv.bz2"), "rt") as fp:
117 |     reader = csv.reader(fp)
118 |     for row in reader:
119 |         opcode = row[0]
120 |         mnem = row[1]
121 |         opbytes = binascii.unhexlify(opcode)
122 |         test_mnemonics_data.append((opbytes, mnem.strip()))
123 | 
124 | 
125 | def test_mnemonics_data_is_valid():
126 |     assert len(test_mnemonics_data) > 0
127 |     assert len(test_mnemonics_data[0]) == 2
128 | 
129 | def compare_mnem(one, two):
130 |     to_compare = []
131 |     for it in (one, two):
132 |         if (it.startswith("rsr.") or
133 |             it.startswith("wsr.") or
134 |             it.startswith("xsr.")):
135 |                 # Work around not having the register names for special regs
136 |                 it = it[:3]
137 |         it = it.lower().strip()
138 |         to_compare.append(it)
139 |     one, two = to_compare
140 |     return one == two
141 | 
142 | 
143 | @pytest.mark.parametrize("opbytes,mnem_expected", test_mnemonics_data)
144 | def test_mnem_from_file(opbytes, mnem_expected):
145 |     insn = Instruction.decode(opbytes)
146 |     assert insn.length == len(opbytes)
147 |     assert compare_mnem(insn.mnem, mnem_expected)
148 | 
149 | mtd_re = r'([0-9a-f]+):\s+([0-9a-f]+)\s+([a-z0-9.]+)\s+(.*)$'
150 | mtd_rec = re.compile(mtd_re)
151 | with bz2.open(os.path.join(DIR, "test_mnemonic_text.dump.bz2"), "rt") as fp:
152 |     mnem_text_dump = fp.readlines()
153 | 
154 | def bswap_opcode_string(opstr):
155 |     data = binascii.unhexlify(opstr)
156 |     reverse_data = bytearray(data)
157 |     reverse_data.reverse()
158 |     return binascii.hexlify(reverse_data).decode('utf-8')
159 | 
160 | DisassLine = namedtuple('DisassLine', ['addr', 'opcode', 'mnem', 'rest'])
161 | 
162 | def parse_test_data(data_lines):
163 |     newdata = []
164 |     for line in data_lines:
165 |         match_obj = mtd_rec.match(line)
166 |         assert match_obj
167 |         addr, opcode, mnem, rest = match_obj.groups()
168 |         opcode = bswap_opcode_string(opcode)
169 |         assert len(addr)
170 |         assert len(opcode)
171 |         assert len(mnem)
172 |         newdata.append(DisassLine(addr, opcode, mnem, rest))
173 |     return newdata
174 | 
175 | def test_mtd_re():
176 |     data = parse_test_data(mnem_text_dump)
177 |     assert len(data) > 0
178 |     assert len(data[0]) == 4
179 | 
180 | def _normalize_insn(it):
181 |     it = it.replace("\t", "").lower()
182 |     tokens = []
183 |     for tok in it.split():
184 |         tok = tok.replace(",", "")
185 |         if tok.startswith("0x"):
186 |             tokens.append(str(sign_extend(int(tok, 0), 32)))
187 |         else:
188 |             tokens.append(tok)
189 |     return ''.join(tokens)
190 | 
191 | def compare_insn(one, two):
192 |     one = _normalize_insn(one)
193 |     two = _normalize_insn(two)
194 | 
195 |     return one == two
196 | 
197 | def test_tokens_to_text():
198 |     INSN_ABS = binascii.unhexlify("907160")
199 |     insn = Instruction.decode(INSN_ABS)
200 |     disass_text = tokens_to_text(disassemble_instruction(insn, 0))
201 |     assert compare_insn(disass_text, "ABS    a7, a9")
202 |     assert compare_insn(disass_text, "abs a7, a9")
203 | 
204 | mtd_data = parse_test_data(mnem_text_dump)
205 | # mnem_text_dump is a bunch of dumped disassembly, uniq'd on the mnem for
206 | # brevity
207 | @pytest.mark.parametrize("parsed_line", mtd_data)
208 | def test_mnem_text_dump(parsed_line):
209 |     insn = Instruction.decode(binascii.unhexlify(parsed_line.opcode))
210 |     assert compare_mnem(insn.mnem, parsed_line.mnem)
211 | 
212 |     addr = int(parsed_line.addr, 16)
213 |     disass_text = tokens_to_text(disassemble_instruction(insn, addr))
214 | 
215 |     expected_insn_text = (parsed_line.mnem + " " + parsed_line.rest).strip()
216 | 
217 |     assert compare_insn(expected_insn_text, disass_text)
218 | 
219 | with bz2.open(os.path.join(DIR, "torture_test.dump.bz2"), "rt") as fp:
220 |     lots_text_dump = fp.readlines()
221 | lots_data = parse_test_data(lots_text_dump)
222 | # lots_text_dump is a bunch of dumped disassembly, uniq'd on the mnem for
223 | # brevity
224 | @pytest.mark.parametrize("parsed_line", lots_data)
225 | def test_lots_text_dump(parsed_line):
226 |     insn = Instruction.decode(binascii.unhexlify(parsed_line.opcode))
227 |     assert compare_mnem(insn.mnem, parsed_line.mnem)
228 | 
229 |     addr = int(parsed_line.addr, 16)
230 |     disass_text = tokens_to_text(disassemble_instruction(insn, addr))
231 | 
232 |     expected_insn_text = (parsed_line.mnem + " " + parsed_line.rest).strip()
233 | 
234 |     assert compare_insn(expected_insn_text, disass_text)
235 | 
236 | with bz2.open( os.path.join(DIR, "esp32_torture_test.dump.bz2"), "rt") as fp:
237 |     esp32_lots_text_dump = fp.readlines()
238 | esp32_lots_data = parse_test_data(esp32_lots_text_dump)
239 | # lots_text_dump is a bunch of dumped disassembly, uniq'd on the mnem for
240 | # brevity
241 | @pytest.mark.parametrize("esp32_parsed_line", esp32_lots_data)
242 | def test_lots_text_dump(esp32_parsed_line):
243 |     if esp32_parsed_line.mnem in ['rer', 'wer']:
244 |         # I disagree with objdump here; the manual states that these insns take
245 |         # arguments; objdump doesn't appear to think so? Also possible my
246 |         # cleanup of the output broke the objdump results?
247 |         pytest.xfail()
248 |     insn = Instruction.decode(binascii.unhexlify(esp32_parsed_line.opcode))
249 |     assert compare_mnem(insn.mnem, esp32_parsed_line.mnem)
250 | 
251 |     addr = int(esp32_parsed_line.addr, 16)
252 |     disass_text = tokens_to_text(disassemble_instruction(insn, addr))
253 | 
254 |     expected_insn_text = (esp32_parsed_line.mnem + " " +
255 |             esp32_parsed_line.rest).strip()
256 | 
257 |     assert compare_insn(expected_insn_text, disass_text)
258 | 
259 | def test_rotw_positive():
260 |     rotw_insn = binascii.unhexlify("208040") # ROTW 2
261 |     insn = Instruction.decode(rotw_insn)
262 |     assert compare_mnem(insn.mnem, "ROTW")
263 |     assert insn.rotw_simm4() == 2
264 |     disass_text = tokens_to_text(disassemble_instruction(insn, 0x1000))
265 |     assert compare_insn(disass_text, "ROTW 2")
266 | 
267 | def test_rotw_negative():
268 |     rotw_insn = binascii.unhexlify("f08040") # ROTW -1
269 |     insn = Instruction.decode(rotw_insn)
270 |     assert compare_mnem(insn.mnem, "ROTW")
271 |     assert insn.rotw_simm4() == -1
272 |     disass_text = tokens_to_text(disassemble_instruction(insn, 0x1000))
273 |     assert compare_insn(disass_text, "ROTW -1")
274 | 
275 | # We didn't have any tests for the FPU, which lead to an undetected typo
276 | def test_mov_s_fpu():
277 |     movs_insn = binascii.unhexlify("0012fa")
278 |     insn = Instruction.decode(movs_insn)
279 |     assert compare_mnem(insn.mnem, "MOV.S")
280 |     # Disassembly support does not yet exist
281 |     #disass_text = tokens_to_text(disassemble_instruction(insn, 0x1000))
282 |     #assert compare_insn(disass_text, "MOV.S f1, f2")
283 | 
284 | 


--------------------------------------------------------------------------------
/binja_xtensa/firmware_parser.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | """
  3 | ESP8266 firmware parser
  4 | 
  5 | Very hacky at the moment. This logic is based on a quick reading of the
  6 | following sources:
  7 |     * https://github.com/espressif/esptool/wiki/Firmware-Image-Format
  8 |     * https://richard.burtons.org/2015/05/17/decompiling-the-esp8266-boot-loader-v1-3b3/
  9 |     * https://boredpentester.com/reversing-esp8266-firmware-part-3/ (that whole
 10 |       series really)
 11 | 
 12 | These firmware dumps seem to contain multiple binaries. So we have a rudimentary
 13 | heuristic to find a couple binaries, which we pass back in a list to the
 14 | binaryview to present to the user as options.
 15 | """
 16 | 
 17 | import binascii
 18 | import struct
 19 | 
 20 | from binaryninja import BinaryViewType
 21 | from binaryninja.enums import SegmentFlag
 22 | 
 23 | class InvalidFormat(Exception):
 24 |     pass
 25 | 
 26 | class ESPSegment:
 27 |     header_fmt = "<II"
 28 | 
 29 |     def __init__(self, load_address, size, outer_size, data_bv_offset):
 30 |         self.outer_size = outer_size
 31 |         self.load_address = load_address
 32 |         self.size = size
 33 |         self.data_bv_offset = data_bv_offset
 34 | 
 35 |     def __repr__(self):
 36 |         return f"""ESPSegment(outer_size={hex(self.outer_size)},
 37 | load_address={hex(self.load_address)},
 38 | size={hex(self.size)},
 39 | data_bv_offset={hex(self.data_bv_offset)})
 40 | """
 41 |     def load(self, bv, parent_bv, outer_entry_point=None):
 42 |         if outer_entry_point is None or not (
 43 |                 self.load_address <=
 44 |                 outer_entry_point <=
 45 |                 self.load_address + self.size):
 46 |             permissions = (SegmentFlag.SegmentContainsCode |
 47 |                            SegmentFlag.SegmentContainsData |
 48 |                            SegmentFlag.SegmentReadable     |
 49 |                            SegmentFlag.SegmentWritable     |
 50 |                            SegmentFlag.SegmentExecutable)
 51 |         else:
 52 |             permissions = (SegmentFlag.SegmentContainsCode |
 53 |                            SegmentFlag.SegmentContainsData |
 54 |                            SegmentFlag.SegmentReadable     |
 55 |                            SegmentFlag.SegmentExecutable)
 56 | 
 57 |         bv.add_auto_segment(self.load_address, self.size,
 58 |                             self.data_bv_offset, self.size,
 59 |                             permissions)
 60 | 
 61 |     @classmethod
 62 |     def parse(cls, bv, bv_offset):
 63 |         header_size = struct.calcsize(cls.header_fmt)
 64 |         header = bv.read(bv_offset + 0, header_size)
 65 |         if len(header) < header_size:
 66 |             raise InvalidFormat("Could not read Segment Header")
 67 |         load_address, seg_size = struct.unpack(cls.header_fmt, header)
 68 | 
 69 |         return cls(
 70 |             outer_size=header_size + seg_size,
 71 |             load_address=load_address,
 72 |             size=seg_size,
 73 |             data_bv_offset=bv_offset + header_size)
 74 | 
 75 | class E9File:
 76 |     name = "Raw(E9)"
 77 |     header_fmt = "<BBBBI"
 78 |     def __init__(self, bv_offset, magic, segment_count, flash_interface,
 79 |                  flash_cfg, entry_point, data_bv_offset, outer_size):
 80 |         self.bv_offset = bv_offset
 81 |         self.magic = magic
 82 |         self.segment_count = segment_count
 83 |         self.flash_interface = flash_interface
 84 |         self.flash_cfg = flash_cfg
 85 |         self.entry_point = entry_point
 86 |         print("entry point:", hex(entry_point))
 87 |         self.data_bv_offset = data_bv_offset
 88 |         self.outer_size = outer_size
 89 |         self.segments = []
 90 | 
 91 |     def __repr__(self):
 92 |         return f"""E9File(bv_offset={hex(self.bv_offset)},
 93 | magic={hex(self.magic)},
 94 | segment_count={self.segment_count},
 95 | flash_interface={hex(self.flash_interface)},
 96 | flash_cfg={hex(self.flash_cfg)},
 97 | entry_point={hex(self.entry_point)},
 98 | data_bv_offset={hex(self.data_bv_offset)},
 99 | outer_size={hex(self.outer_size)},
100 | segments={repr(self.segments)})
101 | """
102 | 
103 |     def _segments_size(self):
104 |         return sum(i.outer_size for i in self.segments)
105 | 
106 |     def load(self, bv, parent_bv, outer_entry_point=None):
107 |         for seg in self.segments:
108 |             seg.load(bv, parent_bv, outer_entry_point)
109 |         bv.entry_addr = self.entry_point
110 | 
111 |     @classmethod
112 |     def parse(cls, bv, bv_offset):
113 |         header_size = struct.calcsize(cls.header_fmt)
114 |         header = bv.read(bv_offset + 0, header_size)
115 |         if len(header) < header_size:
116 |             raise InvalidFormat("Could not read E9 Header")
117 | 
118 |         (magic, seg_count, flash_interface, flash_cfg,
119 |          entry_point) = struct.unpack(cls.header_fmt, header)
120 | 
121 |         if magic != 0xe9:
122 |             raise InvalidFormat("Invalid magic")
123 | 
124 |         f = cls(bv_offset=bv_offset,
125 |                 magic=magic,
126 |                 segment_count=seg_count,
127 |                 flash_interface=flash_interface,
128 |                 flash_cfg=flash_cfg,
129 |                 entry_point=entry_point,
130 |                 data_bv_offset=bv_offset + header_size,
131 |                 outer_size=None # will fill in below
132 |                 )
133 | 
134 |         for _ in range(seg_count):
135 |             f.segments.append(ESPSegment.parse(bv, bv_offset + header_size + f._segments_size()))
136 | 
137 |         f.outer_size = header_size + f._segments_size()
138 | 
139 |         return f
140 | 
141 | class EAFile:
142 |     name = "Bootloaded(EA)"
143 |     header_fmt = "<BBBBIII"
144 |     def __init__(self, bv_offset, magic1, magic2, config, entry_point,
145 |                  text_length, data_bv_offset, outer_size):
146 |         self.bv_offset = bv_offset
147 |         self.magic1 = magic1
148 |         self.magic2 = magic2
149 |         self.config = config
150 |         self.entry_point = entry_point
151 |         print("ENTRY_POINT:", entry_point)
152 |         self.text_length = text_length
153 |         self.data_bv_offset = data_bv_offset
154 |         self.outer_size = outer_size
155 |         self.e9file = None
156 | 
157 |     def __repr__(self):
158 |         return f"""EAFile(bv_offset={hex(self.bv_offset)},
159 | magic1={hex(self.magic1)},
160 | magic2={self.magic2},
161 | config={hex(self.config[0])} {hex(self.config[1])},
162 | entry_point={hex(self.entry_point)},
163 | text_length={hex(self.text_length)},
164 | data_bv_offset={hex(self.data_bv_offset)},
165 | outer_size={hex(self.outer_size)},
166 | e9file={repr(self.e9file)})
167 | """
168 | 
169 |     def load(self, bv, parent_bv):
170 |         bv.add_auto_segment(0x40200000 + self.data_bv_offset, self.text_length,
171 |                             self.data_bv_offset, self.text_length,
172 |                             (SegmentFlag.SegmentContainsCode |
173 |                              SegmentFlag.SegmentContainsData |
174 |                              SegmentFlag.SegmentDenyWrite    |
175 |                              SegmentFlag.SegmentReadable     |
176 |                              SegmentFlag.SegmentExecutable))
177 |         self.e9file.load(bv, parent_bv, self.entry_point)
178 |         bv.entry_addr = self.entry_point
179 | 
180 |     @classmethod
181 |     def parse(cls, bv, bv_offset):
182 |         header_size = struct.calcsize(cls.header_fmt)
183 |         header = bv.read(bv_offset + 0, header_size)
184 |         if len(header) < header_size:
185 |             raise InvalidFormat("Could not read EA Header")
186 | 
187 |         config = [None, None]
188 |         (magic1, magic2, config[0], config[1], entry_point, unused, text_length
189 |          ) = struct.unpack(cls.header_fmt, header)
190 | 
191 |         if magic1 != 0xea:
192 |             raise InvalidFormat("Invalid magic")
193 | 
194 |         f = cls(bv_offset=bv_offset,
195 |                 magic1=magic1,
196 |                 magic2=magic2,
197 |                 config=config,
198 |                 entry_point=entry_point,
199 |                 text_length=text_length,
200 |                 data_bv_offset=bv_offset+header_size,
201 |                 outer_size=None # will fill in below
202 |                 )
203 | 
204 |         f.e9file = E9File.parse(bv, f.data_bv_offset + text_length)
205 | 
206 |         f.outer_size = header_size + text_length + f.e9file.outer_size
207 | 
208 |         return f
209 | 
210 | class AppendedData:
211 |     name = "AppendedData"
212 |     def __init__(self, length, data_bv_offset):
213 |         self.length = length
214 |         self.data_bv_offset = self.bv_offset = data_bv_offset
215 |         self.outer_size = length
216 | 
217 |     def __repr__(self):
218 |         return f"""AppendedData(length={hex(self.length)},
219 | data_bv_offset={hex(self.data_bv_offset)})
220 | """
221 | 
222 |     def load(self, bv, parent_bv):
223 |         bv.add_auto_segment(0, self.length,
224 |                             self.data_bv_offset, self.length,
225 |                             (SegmentFlag.SegmentContainsCode |
226 |                              SegmentFlag.SegmentContainsData |
227 |                              SegmentFlag.SegmentReadable     |
228 |                              SegmentFlag.SegmentWritable     |
229 |                              SegmentFlag.SegmentExecutable))
230 | 
231 |     @classmethod
232 |     def parse(cls, bv, bv_offset):
233 |         return AppendedData(bv.end-bv_offset, bv_offset)
234 | 
235 | 
236 | def parse_firmware(bv):
237 |     firmware_options = []
238 |     try:
239 |         f = E9File.parse(bv, 0)
240 |         firmware_options.append(f)
241 |     except InvalidFormat:
242 |         print("Could not find starting E9File")
243 |         return
244 | 
245 |     if f.outer_size > 0x1000:
246 |         return
247 |     try:
248 |         f2 = EAFile.parse(bv, 0x1000)
249 |         firmware_options.append(f2)
250 |     except InvalidFormat:
251 |         print("Could not find following EAFile")
252 | 
253 |     try:
254 |         f3 = E9File.parse(bv, 0x1000)
255 |         firmware_options.append(f3)
256 |     except InvalidFormat:
257 |         print("Could not find following E9File")
258 | 
259 |     next_addr = firmware_options[-1].bv_offset + firmware_options[-1].outer_size 
260 |     if (next_addr < bv.end):
261 |         firmware_options.append(AppendedData.parse(bv, next_addr))
262 | 
263 |     return firmware_options
264 | 
265 | def main():
266 |     TEST_FIRMWARE = ""
267 |     bv = BinaryViewType['Raw'].open(TEST_FIRMWARE)
268 |     if not bv:
269 |         print("Could not open bv")
270 |         return
271 |     print()
272 |     print()
273 |     data = parse_firmware(bv)
274 |     print(data)
275 | 
276 | if __name__ == '__main__':
277 |     main()
278 | 


--------------------------------------------------------------------------------
/binja_xtensa/known_symbols.py:
--------------------------------------------------------------------------------
1 | known_symbols = {"1073767424": "rom_iq_est_disable", "1073772548": "rom_phy_reset_req", "1073764360": "sip_send", "1073741840": "_DebugExceptionVector", "1073743892": "cmd_parse", "1073780760": "MD5Init", "1073756188": "UartRegReadProc", "1073741856": "_NMIExceptionVector", "1073794396": "__truncdfsf2", "1073768496": "rom_set_noise_floor", "1073780788": "MD5Update", "1073756220": "uartAttach", "1073788992": "pbkdf2_sha1", "1073774668": "rom_rfcal_rxiq", "1073741904": "_UserExceptionVector", "1073771536": "rom_pbus_enter_debugmode", "1073764452": "sip_to_host_chain_append", "1073766504": "slc_set_host_io_max_window", "1073752176": "rtc_enter_sleep", "1073768564": "rom_start_noisefloor", "1073760376": "SPIUnlock", "1073766420": "slc_pause_from_host", "1073762428": "lldesc_set_owner", "1073741952": "_ResetVector", "1073743212": "_xtos_restore_intlevel", "1073766540": "slc_init_credit", "1073764036": "sip_post_init", "1073791136": "strncpy", "1073748132": "ets_memset", "1073764520": "sip_get_ptr", "1073772572": "rom_restart_cal", "1073766572": "slc_add_credits", "1073751752": "dtm_set_intr_mask", "1073789108": "hmac_sha1_vector", "1073776824": "rom_rfcal_txiq_cover", "1073766592": "rom_abs_temp", "1073748164": "ets_memmove", "1073753804": "ets_delay_us", "1073766608": "rom_chip_v5_disable_cca", "1073748180": "ets_memcmp", "1073772764": "rom_write_rfpll_sdm", "1073790796": "strlen", "1073760492": "SPIReadModeCnfig", "1073793264": "__muldf3", "1073769172": "rom_tx_mac_enable", "1073762556": "sip_post", "1073780992": "MD5Final", "1073762688": "sip_alloc_to_host_evt", "1073761324": "SPIParamCfg", "1073766668": "rom_chip_v5_sense_backoff", "1073771224": "rom_i2c_writeReg", "1073788292": "SHA1Init", "1073770780": "rom_chip_v5_rx_init", "1073767472": "rom_iq_est_enable", "1073756452": "uart_baudrate_detect", "1073780296": "base64_decode", "1073770892": "rom_chip_v5_tx_init", "1073754456": "ets_wdt_restore", "1073766748": "rom_dc_iq_est", "1073783136": "hmac_md5_vector", "1073743248": "_xtos_cause3_handler", "1073772904": "rom_rfpll_set_freq", "1073751656": "rtc_set_sleep_mode", "1073754480": "ets_wdt_init", "1073752808": "ets_bzero", "1073791360": "__addsf3", "1073760644": "SPIEraseChip", "1073768844": "rom_stop_tx_tone", "1073799568": "memset", "1073751180": "ets_install_putc2", "1073748376": "mem_init", "1073753160": "ets_timer_setfn", "1073754548": "roundup2", "1073766840": "rom_en_pwdet", "1073754560": "multofup", "1073755552": "FilePacketSendReqMsgProc", "1073799244": "memmove", "1073765776": "slc_enable", "1073790372": "_xtos_ints_off", "1073748448": "mem_free", "1073751120": "ets_install_external_printf", "1073772284": "rom_pbus_xpd_tx_off", "1073752556": "rtc_intr_handler", "1073788328": "SHA1Update", "1073742932": "_xtos_set_exception_handler", "1073760768": "SPIEraseSector", "1073744388": "remove_head_space", "1073766460": "slc_resume_from_host", "1073756692": "Uart_Init", "1073799708": "__udivsi3", "1073773096": "rom_cal_tos_v50", "1073754672": "UartConnCheck", "1073762868": "sip_to_host_evt_send_done", "1073766968": "rom_get_bb_atten", "1073760692": "SPIEraseBlock", "1073752640": "ets_rtc_int_register", "1073789512": "sha1_prf", "1073760844": "SPIWrite", "1073744480": "convert_para_str", "1073769232": "rom_ana_inf_gating_en", "1073775204": "rom_rfcal_rxiq_set_reg", "1073791592": "__subsf3", "1073777264": "rom_rfcal_txiq_set_reg", "1073772648": "rom_rfpll_reset", "1073745344": "ets_set_idle_cb", "1073752712": "ets_strcpy", "1073777772": "rom_set_txbb_atten", "1073752728": "ets_strncpy", "1073771164": "rom_i2c_readReg_Mask", "1073741936": "_DoubleExceptionVector", "1073799844": "__floatunsisf", "1073752744": "ets_strcmp", "1073799852": "__floatsisf", "1073729776": "rcons", "1073752760": "ets_strncmp", "1073792312": "__adddf3", "1073763008": "sip_reclaim_tx_data_pkt", "1073752776": "ets_strlen", "1073783500": "hmac_md5", "1073779412": "aes_decrypt", "1073752792": "ets_strstr", "1073799972": "xthal_window_spill", "1073767132": "rom_get_fm_sar_dout", "1073745360": "ets_task", "1073779428": "aes_decrypt_deinit", "1073799912": "__floatunsidf", "1073763944": "sip_get_state", "1073783532": "sha1_vector", "1073799920": "__floatsidf", "1073752824": "ets_str2macaddr", "1073779456": "rijndaelKeySetupEnc", "1073769224": "phy_get_romfuncs", "1073771276": "rom_i2c_writeReg_Mask", "1073798384": "__umulsidi3", "1073795856": "__udivdi3", "1073761052": "SPIRead", "1073799968": "xthal_window_spill_nw", "1073744676": "conv_str_decimal", "1073799976": "_rom_store_table", "1073756976": "uart_tx_one_char", "1073789756": "wepkey_64", "1073748800": "mem_malloc", "1073799648": "strstr", "1073761092": "SPIEraseArea", "1073742988": "_xtos_l1int_handler", "1073763148": "sip_reclaim_from_host_cmd", "1073757028": "uart_rx_one_char_block", "1073754984": "UartDwnLdProc", "1073777516": "rom_rxiq_cover_mg_mp", "1073752948": "ets_char2xdigit", "1073771388": "rom_pbus_debugmode", "1073762112": "lldesc_build_chain", "1073775496": "rom_rfcal_txcap", "1073757068": "uart_rx_one_char", "1073767316": "rom_get_noisefloor", "1073767344": "rom_get_power_db", "1073757116": "uart_rx_intr_handler", "1073761360": "gpio_init", "1073771112": "rom_i2c_readReg", "1073753036": "ets_getc", "1073741988": "_ResetHandler", "1073791964": "__mulsf3", "1073798112": "_xtos_alloca_handler", "1073798116": "_xtos_syscall_handler", "1073762384": "lldesc_num2link", "1073773552": "rom_pbus_dco___SA2", "1073760424": "SPILock", "1073798136": "_xtos_p_none", "1073794232": "__fixdfsi", "1073798140": "_xtos_set_intlevel", "1073759232": "SPI_write_status", "1073779728": "aes_unwrap", "1073798168": "_xtos_set_min_intlevel", "1073765404": "slc_reattach", "1073749036": "mem_calloc", "1073757232": "UartRxString", "1073751096": "ets_install_uart_printf", "1073768628": "rom_start_tx_tone", "1073759292": "SPI_write_enable", "1073790016": "wepkey_128", "1073798212": "_xtos_unhandled_exception", "1073771592": "rom_pbus_exit_debugmode", "1073763404": "sip_install_rx_ctrl_cb", "1073769552": "rom_set_channel_freq", "1073749176": "eprintf_init_buf", "1073798228": "_xtos_return_from_exc", "1073749080": "mem_zalloc", "1073783652": "SHA1Transform", "1073763420": "sip_install_rx_data_cb", "1073776144": "rom_rfcal_txiq", "1073753188": "timer_insert", "1073749100": "mem_realloc", "1073748500": "mem_trim", "1073771644": "rom_pbus_force_test", "1073757312": "send_packet", "1073767556": "rom_linear_to_db", "1073798280": "__divsi3", "1073759372": "Wait_SPI_Idle", "1073751188": "est_get_printf_buf_remain_len", "1073792196": "__fixunssfsi", "1073751196": "est_reset_printf_buf_len", "1073768988": "rom_txtone_linear_pwr", "1073730816": "Td4s", "1073755844": "MemDwnLdStartMsgProc", "1073745080": "conv_str_hex", "1073759424": "Enable_QMode", "1073753284": "ets_timer_arm", "1073751244": "ets_printf", "1073761488": "gpio_output_set", "1073733136": "UartDev", "1073771736": "rom_pbus_rd", "1073761520": "gpio_input_get", "1073767976": "rom_rxiq_get_mis", "1073757428": "SendMsg", "1073779964": "base64_encode", "1073794304": "__fixunsdfsi", "1073756324": "uart_buff_switch", "1073761540": "gpio_register_set", "1073757448": "recv_packet", "1073767692": "rom_set_txclk_en", "1073749268": "eprintf", "1073790248": "_xtos_set_interrupt_handler_arg", "1073800072": "_rom_store", "1073777972": "rom_set_txiq_cal", "1073798456": "xthal_get_ccount", "1073798464": "xthal_set_ccompare", "1073751364": "ets_uart_printf", "1073749320": "eprintf_to_host", "1073771852": "rom_pbus_set_rxgain", "1073742892": "_start", "1073767760": "rom_set_rxclk_en", "1073798488": "xthal_get_intread", "1073761628": "gpio_register_get", "1073798496": "xthal_set_intclear", "1073798504": "rc4_skip", "1073757896": "uart_rx_readbuff", "1073790320": "_xtos_set_interrupt_handler", "1073743220": "_xtos_set_vpri", "1073751416": "ets_external_printf", "1073793940": "__divdf3", "1073753472": "ets_timer_done", "1073790340": "_xtos_ints_on", "1073765456": "slc_init_attach", "1073761672": "gpio_intr_pending", "1073766636": "rom_chip_v5_enable_cca", "1073761680": "gpio_pin_intr_state_set", "1073743256": "_xtos_c_wrapper_handler", "1073749408": "ets_write_char", "1073728752": "Te0", "1073767844": "rom_mhz2ieee", "1073741872": "_KernelExceptionVector", "1073753512": "ets_timer_handler_isr", "1073754352": "ets_wdt_disable", "1073772104": "rom_pbus_workmode", "1073765816": "slc_select_tohost_gpio_mode", "1073765824": "slc_select_tohost_gpio", "1073772448": "rom_pbus_xpd_tx_on__low_gain", "1073790408": "strcmp", "1073761740": "gpio_intr_ack", "1073778128": "rijndaelKeySetupDec", "1073751520": "rtc_get_reset_reason", "1073748148": "ets_memcpy", "1073765860": "slc_send_to_host_chain", "1073798204": "_xtos_unhandled_interrupt", "1073743344": "srand", "1073745832": "ets_isr_unmask", "1073794556": "__extendsfdf2", "1073743360": "rand", "1073729792": "Td0", "1073745412": "ets_run", "1073751980": "save_tsf_us", "1073772048": "rom_pbus_set_txgain", "1073745444": "ets_post", "1073761832": "gpio_intr_handler_register", "1073767008": "rom_get_corr_power", "1073759812": "spi_flash_attach", "1073788488": "SHA1Final", "1073751628": "software_reset", "1073751084": "ets_install_putc1", "1073743440": "__muldi3", "1073755736": "FlashDwnLdStopReqMsgProc", "1073794656": "__divdi3", "1073753704": "ets_timer_init", "1073799784": "__umodsi3", "1073759864": "Cache_Read_Enable", "1073763964": "sip_init_attach", "1073756064": "UartConnectProc", "1073798788": "bzero", "1073789480": "hmac_sha1", "1073743496": "xthal_bcopy", "1073755788": "FlashDwnLdParamCfgMsgProc", "1073761936": "gpio_pin_wakeup_enable", "1073766036": "slc_from_host_chain_recycle", "1073751708": "dtm_params_init", "1073778340": "aes_decrypt_init", "1073798824": "memcmp", "1073744156": "get_first_seg", "1073757868": "RcvMsg", "1073759176": "SPI_read_status", "1073774260": "rom_rfcal_pwrctrl", "1073743556": "xthal_memcpy", "1073790664": "strcpy", "1073772236": "rom_pbus_xpd_rx_on", "1073751760": "dtm_get_intr_mask", "1073762004": "gpio_pin_wakeup_disable", "1073751772": "dtm_set_params", "1073755888": "MemPacketSendReqMsgProc", "1073757940": "UartGetCmdLn", "1073762044": "gpio_intr_test", "1073749760": "ets_vprintf", "1073753860": "ets_update_cpu_frequency", "1073753868": "ets_get_cpu_frequency", "1073766160": "slc_to_host_chain_recycle", "1073727252": "flashchip", "1073766180": "slc_from_host_chain_fetch", "1073792648": "__subdf3", "1073753908": "ets_wdt_get_mode", "1073768248": "rom_sar_init", "1073772352": "rom_pbus_xpd_tx_on", "1073798984": "memcpy", "1073758028": "GetUartDevice", "1073755448": "FlashDwnLdStartMsgProc", "1073758040": "SelectSpiFunction", "1073796976": "__umoddi3", "1073753064": "ets_putc", "1073745780": "ets_intr_lock", "1073756028": "MemDwnLdStopReqMsgProc", "1073745792": "ets_intr_unlock", "1073753408": "ets_timer_disarm", "1073770372": "rom_chip_50_set_channel", "1073745800": "ets_isr_attach", "1073768332": "rom_set_ana_inf_tx_scale", "1073769112": "rom_tx_mac_disable", "1073745816": "ets_isr_mask", "1073754016": "ets_wdt_enable", "1073751972": "save_rxbcn_mactime", "1073790888": "strncmp", "1073772168": "rom_pbus_xpd_rx_off", "1073780652": "md5_vector", "1073751992": "ets_enter_sleep", "1073745852": "ets_set_user_start", "1073768392": "rom_set_loopback_gain", "1073798476": "xthal_get_ccompare", "1073743692": "xthal_copy123", "1073756076": "UartRegWriteProc", "1073745900": "main", "1073760240": "Cache_Read_Disable"}
2 | 


--------------------------------------------------------------------------------
/binja_xtensa/disassembly.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Xtensa disassembly rendering
  3 | 
  4 | The idea is instruction.py handles instruction decoding, then to get
  5 | human-readable disassembly, we call disassemble_instruction from this file.
  6 | 
  7 | The lifter should *not* need the information in this file. If it does, move that
  8 | computation into the decoder.
  9 | """
 10 | from binaryninja import InstructionTextToken
 11 | from binaryninja.enums import InstructionTextTokenType
 12 | 
 13 | from .instruction import Instruction, InstructionType, sign_extend
 14 | 
 15 | # Helpers to generate Binary Ninja InstructionTextTokens, since the names are
 16 | # so long. We also do some cosmetic transformations of the encoded immediates
 17 | # here.
 18 | _MNEM_ALIGN = 8
 19 | def _get_space(mnem_length):
 20 |     # Vertically align the first operand where possible
 21 |     spaces = 1 if mnem_length >= _MNEM_ALIGN else _MNEM_ALIGN - mnem_length
 22 |     return InstructionTextToken(InstructionTextTokenType.TextToken,
 23 |                                 " " * spaces)
 24 | 
 25 | def _get_comma():
 26 |     return InstructionTextToken(InstructionTextTokenType.OperandSeparatorToken, ", ")
 27 | 
 28 | def _get_reg_tok(reg_name):
 29 |     return InstructionTextToken(InstructionTextTokenType.RegisterToken,
 30 |                                 reg_name)
 31 | 
 32 | def _get_imm8_tok(val):
 33 |     return InstructionTextToken(InstructionTextTokenType.IntegerToken,
 34 |                                 str(val), val, size=1)
 35 | 
 36 | def _get_imm32_tok(val):
 37 |     return InstructionTextToken(InstructionTextTokenType.IntegerToken,
 38 |                                 str(val), val, size=4)
 39 | 
 40 | def _get_imm4(insn, _):
 41 |     val = insn.imm4
 42 |     return _get_imm8_tok(val)
 43 | 
 44 | def _get_imm8(insn, _):
 45 |     val = insn.imm8
 46 |     return _get_imm8_tok(val)
 47 | 
 48 | def _get_simm8(insn, _):
 49 |     val = sign_extend(insn.imm8, 8)
 50 |     return _get_imm8_tok(val)
 51 | 
 52 | def _get_simm8_s8(insn, _):
 53 |     val = sign_extend(insn.imm8, 8)
 54 |     val <<= 8
 55 |     return InstructionTextToken(InstructionTextTokenType.IntegerToken,
 56 |                                 str(val), val, size=4)
 57 | 
 58 | def _get_rotw_simm4(insn, _):
 59 |     return _get_imm8_tok(insn.rotw_simm4())
 60 | 
 61 | def _get_addi_n_imm(insn, _):
 62 |     val = insn.inline0(_)
 63 |     return InstructionTextToken(InstructionTextTokenType.IntegerToken,
 64 |                                 str(val), val, size=4)
 65 | 
 66 | def _get_possible_address_token(addr):
 67 |     return InstructionTextToken(InstructionTextTokenType.PossibleAddressToken,
 68 |                                 hex(addr)[2:], addr, size=4)
 69 | def _get_target_offset(insn, addr):
 70 |     val = insn.target_offset(addr)
 71 |     return _get_possible_address_token(val)
 72 | 
 73 | def _get_mem_offset(insn, addr):
 74 |     val = insn.mem_offset(addr)
 75 |     return _get_possible_address_token(val)
 76 | 
 77 | def _get_b4const(insn, _):
 78 |     val = insn.b4const()
 79 |     return InstructionTextToken(InstructionTextTokenType.IntegerToken,
 80 |                                 str(val), val, size=4)
 81 | 
 82 | def _get_b4constu(insn, _):
 83 |     val = insn.b4constu()
 84 |     return InstructionTextToken(InstructionTextTokenType.IntegerToken,
 85 |                                 str(val), val, size=4)
 86 | 
 87 | # I wanted the mechanical instruction -> disassembly process to be as easy to
 88 | # write as possible. Thus, it's structured so I can take the example instruction
 89 | # out of the manual and type it in here with slight modification, and it'll
 90 | # mostly work. Then I just have to check for nonobvious differences and move on
 91 | # to the next instruction.
 92 | 
 93 | # This table defines the logic that backs up each of those things from the
 94 | # manual.
 95 | 
 96 | # each of these should return a binja InstructionTextToken
 97 | _disassembly_fmts = {
 98 |     "ar": lambda insn, _: _get_reg_tok("a" + str(insn.r)),
 99 |     "as": lambda insn, _: _get_reg_tok("a" + str(insn.s)),
100 |     "at": lambda insn, _: _get_reg_tok("a" + str(insn.t)),
101 | 
102 |     "fr": lambda insn, _: _get_reg_tok("f" + str(insn.r)),
103 |     "fs": lambda insn, _: _get_reg_tok("f" + str(insn.s)),
104 |     "ft": lambda insn, _: _get_reg_tok("f" + str(insn.t)),
105 | 
106 |     "bt": lambda insn, _: _get_reg_tok("b" + str(insn.t)),
107 |     "bs": lambda insn, _: _get_reg_tok("b" + str(insn.s)),
108 |     "br": lambda insn, _: _get_reg_tok("b" + str(insn.r)),
109 | 
110 |     "s": lambda insn, _: _get_imm8_tok(insn.s),
111 |     "t": lambda insn, _: _get_imm8_tok(insn.t),
112 | 
113 |     "imm4": _get_imm4,
114 | 
115 |     "imm8": _get_imm8,
116 |     "simm8": _get_simm8,
117 |     "simm8_s8": _get_simm8_s8, # simm8 shifted left by 8
118 | 
119 |     "rotw_simm4": _get_rotw_simm4,
120 | 
121 |     "target_offset": _get_target_offset,
122 |     "mem_offset": _get_mem_offset,
123 | 
124 |     "b4const": _get_b4const,
125 |     "b4constu": _get_b4constu,
126 | 
127 |     # Oddball
128 |     # Probably should have been an inline0... but I hadn't hacked that in yet
129 |     # when I dealt with ADDI.N
130 |     "addi_n_imm": _get_addi_n_imm,
131 | }
132 | def _dis(fmt_str, *args):
133 |     """Helper to create disassembly functions for different formats
134 |     
135 |     See below to see how it's used.
136 |     """
137 |     def inner(insn, addr):
138 |         fmts = fmt_str.split()
139 |         tokens = []
140 |         tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken,
141 |                                          insn.mnem))
142 |         tokens.append(_get_space(len(insn.mnem)))
143 |         for idx, fmt in enumerate(fmts):
144 |             if idx > 0:
145 |                 tokens.append(_get_comma())
146 | 
147 |             # For one-off encodings, I wanted a way to specify that in the _dis
148 |             # invocation for the instruction. These "inline" encodings are
149 |             # similar to the ones in the decoder, but they're distinct at a
150 |             # programmatic level; they just share a name and are used together
151 |             # :)
152 |             if fmt.startswith("inline"):
153 |                 tok_idx = int(fmt[len("inline"):])
154 |                 try:
155 |                     token_func = args[tok_idx]
156 |                 except IndexError:
157 |                     token_func = getattr(insn, fmt)
158 |             else:
159 |                 token_func = _disassembly_fmts[fmt]
160 | 
161 |             tokens.append(token_func(insn, addr))
162 |         return tokens
163 |     return inner
164 | 
165 | def disassemble_instruction(insn, addr):
166 |     """Return Binary Ninja InstructionTextTokens for instruction
167 | 
168 |     So to disassemble an instruction, we call Instruction.decode with the bytes,
169 |     then we call disassemble_instruction with the returned instruction and the
170 |     address it's loaded at.
171 |     """
172 |     func = None
173 |     try:
174 |         func = globals()["_disassemble_" + insn.mnem.replace(".", "_")]
175 |     except KeyError:
176 |         pass
177 | 
178 |     if func:
179 |         return func(insn, addr)
180 |     if insn.instruction_type == InstructionType.RRR:
181 |         return _disassemble_rrr(insn, addr)
182 |     elif insn.instruction_type == InstructionType.RRRN:
183 |         return _disassemble_rrrn(insn, addr)
184 |     elif insn.instruction_type == InstructionType.RRI8:
185 |         return _disassemble_rri8(insn, addr)
186 |     else:
187 |         # Fallback for when we don't have a fallback for a particular
188 |         # instruction type.
189 |         # If I had to rewrite this, I'd remove the type-fallbacks and just show
190 |         # a warning in fallback cases, as we do here.
191 |         text = []
192 |         text.append(InstructionTextToken(InstructionTextTokenType.InstructionToken,
193 |                                          insn.mnem))
194 |         text.append(_get_space(len(insn.mnem)))
195 |         text.append(InstructionTextToken(InstructionTextTokenType.TextToken,
196 |                                      "unimplemented_disass"))
197 |         return text
198 | 
199 | def tokens_to_text(token_list):
200 |     """Convert a list of binja tokens to plain text
201 | 
202 |     Mostly useful for testing
203 |     """
204 |     for tok in token_list:
205 |         assert tok.value is not None
206 |     return ''.join([tok.text for tok in token_list])
207 | 
208 | def _disassemble_RSR(insn, addr):
209 |     mnem = insn.mnem + "." + insn.get_sr_name()
210 |     tokens = []
211 |     tokens.append(InstructionTextToken(InstructionTextTokenType.InstructionToken,
212 |                                      mnem))
213 |     tokens.append(_get_space(len(mnem)))
214 |     fmts = ["at"]
215 |     for idx, fmt in enumerate(fmts):
216 |         if idx > 0:
217 |             tokens.append(_get_comma())
218 | 
219 |         if fmt.startswith("inline"):
220 |             tok_idx = int(fmt[len("inline"):])
221 |             token_func = args[tok_idx]
222 |         else:
223 |             token_func = _disassembly_fmts[fmt]
224 | 
225 |         tokens.append(token_func(insn, addr))
226 |     return tokens
227 | 
228 | _disassemble_WSR = _disassemble_XSR = _disassemble_RSR
229 | 
230 | # As I mentioned in the decoding code, instruction formats aren't too useful in
231 | # Xtensa... but we do fall back to these for a few simple instructions. It's
232 | # almost easier to list an instruction below than it is to verify the default is
233 | # correct.
234 | _disassemble_rrr = _dis("ar as at")
235 | _disassemble_rrrn = _dis("ar as at")
236 | _disassemble_rri8 = _dis("at as simm8")
237 | 
238 | # Overrides for exceptions to the instruction type
239 | _disassemble_ABS = _dis("ar at")
240 | _disassemble_ABS_S = _dis("fr fs")
241 | _disassemble_ADD_S = _dis("fr fs ft")
242 | _disassemble_ADDI_N = _dis("ar as addi_n_imm")
243 | _disassemble_ADDMI = _dis("at as simm8_s8")
244 | _disassemble_ALL4 = _dis("bt bs")
245 | _disassemble_ALL8 = _dis("bt bs")
246 | _disassemble_ANDB = _dis("br bs bt")
247 | _disassemble_ANDBC = _dis("br bs bt")
248 | _disassemble_ANY4 = _dis("bt bs")
249 | _disassemble_ANY8 = _dis("bt bs")
250 | 
251 | _disassemble_BALL = _dis("as at target_offset")
252 | _disassemble_BANY = _dis("as at target_offset")
253 | _disassemble_BBC = _dis("as at target_offset")
254 | _disassemble_BBCI = _dis("as inline0 target_offset",
255 |                          lambda insn, _: _get_imm8_tok(insn.inline0(_)))
256 | _disassemble_BBS = _dis("as at target_offset")
257 | 
258 | _disassemble_BBSI = _dis("as inline0 target_offset",
259 |                          lambda insn, _: _get_imm8_tok(insn.inline0(_)))
260 | _disassemble_BEQ = _dis("as at target_offset")
261 | _disassemble_BEQI = _dis("as b4const target_offset")
262 | _disassemble_BEQZ = _dis("as target_offset")
263 | _disassemble_BEQZ_N = _dis("as target_offset")
264 | _disassemble_BF = _dis("bs target_offset")
265 | _disassemble_BGE = _dis("as at target_offset")
266 | _disassemble_BGEI = _dis("as b4const target_offset")
267 | _disassemble_BGEU = _dis("as at target_offset")
268 | _disassemble_BGEUI = _dis("as b4constu target_offset")
269 | _disassemble_BGEZ = _dis("as target_offset")
270 | _disassemble_BLT = _dis("as at target_offset")
271 | _disassemble_BLTI = _dis("as b4const target_offset")
272 | _disassemble_BLTU = _dis("as at target_offset")
273 | _disassemble_BLTUI = _dis("as b4constu target_offset")
274 | _disassemble_BLTZ = _dis("as target_offset")
275 | _disassemble_BNALL = _dis("as at target_offset")
276 | _disassemble_BNE = _dis("as at target_offset")
277 | _disassemble_BNEI = _dis("as b4const target_offset")
278 | _disassemble_BNEZ = _dis("as target_offset")
279 | _disassemble_BNEZ_N = _dis("as target_offset")
280 | _disassemble_BNONE = _dis("as at target_offset")
281 | 
282 | _disassemble_BREAK = _dis("s t")
283 | _disassemble_BREAK_N = _dis("s")
284 | _disassemble_BT = _dis("bs target_offset")
285 | 
286 | _disassemble_CALL0  = _dis("target_offset")
287 | _disassemble_CALL4  = _dis("target_offset")
288 | _disassemble_CALL8  = _dis("target_offset")
289 | _disassemble_CALL12 = _dis("target_offset")
290 | 
291 | _disassemble_CALLX0  = _dis("as")
292 | _disassemble_CALLX4  = _dis("as")
293 | _disassemble_CALLX8  = _dis("as")
294 | _disassemble_CALLX12 = _dis("as")
295 | 
296 | _disassemble_CEIL_S = _dis("ar fs t")
297 | # Skipping CLAMPS, I don't care about floats
298 | # Skipping DHI, DHU, DHWB, DHWBI, DII, DIU, DIWB, DIWBI, DPFL, DPFR, DPFRO,
299 | # DPFW, DPFWO, they deal with data caching, which is an extension
300 | _disassemble_DSYNC = _dis("") # Just the mnem
301 | _disassemble_ENTRY = _dis("as inline0",
302 |                           lambda insn, _: _get_imm32_tok(insn.inline0(_)))
303 | _disassemble_ESYNC = _dis("") # Just the mnem
304 | _disassemble_EXCW = _dis("") # Just the mnem
305 | _disassemble_EXTUI = _dis("ar at inline0 inline1",
306 |                          lambda insn, _: _get_imm8_tok(insn.extui_shiftimm()),
307 |                          lambda insn, _: _get_imm8_tok(insn.inline1(_)))
308 | _disassemble_EXTW = _dis("")
309 | # Skipping float stuff
310 | _disassemble_IDTLB = _dis("as")
311 | # Skipping IHI, IHU, III
312 | _disassemble_IITLB = _dis("as")
313 | # Skipping IIU
314 | _disassemble_ILL = _dis("")
315 | _disassemble_ILL_N = _dis("")
316 | # Skipping IPF, IPFL
317 | _disassemble_ISYNC = _dis("")
318 | _disassemble_J = _dis("target_offset")
319 | _disassemble_JX = _dis("as")
320 | _disassemble_L8UI = _dis("at as imm8")
321 | _disassemble_L16SI = _dis("at as inline0",
322 |                           lambda insn, _: _get_imm32_tok(insn.inline0(_)))
323 | _disassemble_L16UI = _dis("at as inline0",
324 |                           lambda insn, _: _get_imm32_tok(insn.inline0(_)))
325 | _disassemble_L32AI = _dis("at as inline0",
326 |                           lambda insn, _: _get_imm32_tok(insn.inline0(_)))
327 | # Skipping windowed L32E
328 | _disassemble_L32I = _dis("at as inline0",
329 |                           lambda insn, _: _get_imm32_tok(insn.inline0(_)))
330 | _disassemble_L32I_N = _dis("at as inline0",
331 |                           lambda insn, _: _get_imm32_tok(insn.inline0(_)))
332 | _disassemble_L32R = _dis("at mem_offset")
333 | # Skipping LDCT
334 | # Skipping LDDEC,LDINC; they're MAC16
335 | # Skipping LICT, LICW, instruction cache option
336 | # Skipping LOOP, LOOPGTZ, LOOPNEZ, loop option
337 | # Skipping LSI, LSIU, LSX, LSXU, MADD_S (floats)
338 | _disassemble_MEMW = _dis("")
339 | _disassemble_MOVI = _dis("at inline0",
340 |                          lambda insn, _: _get_imm32_tok(insn.inline0(_)))
341 | _disassemble_MOVI_N = _dis("as inline0",
342 |                            lambda insn, _: _get_imm32_tok(insn.inline0(_)))
343 | _disassemble_MOV_N = _dis("at as")
344 | _disassemble_MOVSP = _dis("at as")
345 | _disassemble_NEG = _dis("ar at")
346 | _disassemble_NOP = _dis("")
347 | _disassemble_NOP_N = _dis("")
348 | _disassemble_NSA = _dis("at as")
349 | _disassemble_NSAU = _dis("at as")
350 | _disassemble_PDTLB = _dis("at as")
351 | _disassemble_PITLB = _dis("at as")
352 | _disassemble_RDTLB0 = _dis("at as")
353 | _disassemble_RDTLB1 = _dis("at as")
354 | _disassemble_RER = _dis("at as")
355 | _disassemble_RET = _dis("") # Equivalent in function to "JX a0"
356 | _disassemble_RET_N = _dis("") # Same function as RET
357 | _disassemble_RETW = _dis("")
358 | _disassemble_RETW_N = _dis("")
359 | _disassemble_RFDD = _dis("")
360 | _disassemble_RFDE = _dis("")
361 | _disassemble_RFDO = _dis("")
362 | _disassemble_RFWO = _dis("")
363 | _disassemble_RFWU = _dis("")
364 | _disassemble_RFE = _dis("")
365 | _disassemble_RFI = _dis("s")
366 | _disassemble_RITLB0 = _dis("at as")
367 | _disassemble_RITLB1 = _dis("at as")
368 | _disassemble_ROTW = _dis("rotw_simm4")
369 | _disassemble_RSIL = _dis("at s")
370 | _disassemble_RSYNC = _dis("")
371 | 
372 | _disassemble_S8I = _dis("at as imm8")
373 | _disassemble_S16I = _dis("at as inline0",
374 |                          lambda insn, _: _get_imm32_tok(insn.inline0(_)))
375 | _disassemble_S32I = _dis("at as inline0",
376 |                          lambda insn, _: _get_imm32_tok(insn.inline0(_)))
377 | _disassemble_S32I_N = _dis("at as inline0",
378 |                            lambda insn, _: _get_imm32_tok(insn.inline0(_)))
379 | _disassemble_S32RI = _dis("at as inline0",
380 |                          lambda insn, _: _get_imm32_tok(insn.inline0(_)))
381 | _disassemble_SEXT = _dis("ar as inline0",
382 |                          lambda insn, _: _get_imm8_tok(insn.t + 7))
383 | _disassemble_SIMCALL = _dis("")
384 | _disassemble_SLL = _dis("ar as")
385 | _disassemble_SLLI = _dis("ar as inline0",
386 |                           lambda insn, _: _get_imm8_tok(insn.inline0(_)))
387 | _disassemble_SRA = _dis("ar at")
388 | _disassemble_SRAI = _dis("ar at inline0",
389 |                           lambda insn, _: _get_imm8_tok(insn.inline0(_)))
390 | _disassemble_SRL = _dis("ar at")
391 | _disassemble_SRLI = _dis("ar at s")
392 | _disassemble_SSA8B = _dis("as")
393 | _disassemble_SSA8L = _dis("as")
394 | _disassemble_SSAI = _dis("inline0",
395 |                          lambda insn, _: _get_imm8_tok(insn.inline0(_)))
396 | _disassemble_SSL = _dis("as")
397 | _disassemble_SSR = _dis("as")
398 | _disassemble_SYSCALL = _dis("")
399 | _disassemble_WAITI = _dis("s")
400 | _disassemble_WDTLB = _dis("at as")
401 | _disassemble_WER = _dis("at as")
402 | _disassemble_WITLB = _dis("at as")
403 | _disassemble_WER = _dis("at as")
404 | # _disassemble_WUR = _dis("at sr") # sr not yet handled
405 | 


--------------------------------------------------------------------------------
/binja_xtensa/lifter.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Xtensa lifting to BNIL
  3 | 
  4 | Here we provide a `lift` function that takes a decoded instruction and an
  5 | address where that instruction is, and we return BNIL.
  6 | """
  7 | from binaryninja import Architecture, LowLevelILLabel
  8 | 
  9 | from .instruction import sign_extend
 10 | 
 11 | def _reg_name(insn, fmt):
 12 |     """Get the concrete register for a particular part of an instruction
 13 | 
 14 |     For example, if the docs say an instruction writes to "as", we call this
 15 |     function, which will check the `s` decoded control signal (say it's "7") and
 16 |     return "a7" for passing to BNIL.
 17 |     """
 18 |     if fmt.startswith("a"):
 19 |         rest = fmt[1:]
 20 |         val = getattr(insn, rest, None)
 21 |         if val is None:
 22 |             raise Exception("Could not find property " + fmt)
 23 |         return "a" + str(val)
 24 |     else:
 25 |         # When we lift boolean instructions, we'll need to add support for "b"
 26 |         # registers, etc.
 27 |         raise Exception("Unimplemented reg name fmt: " + fmt)
 28 | 
 29 | def lift(insn, addr, il):
 30 |     """Dispatch function for lifting
 31 | 
 32 |     Looks up _lift_MNEM() in the current global namespace (I think that's just
 33 |     the module level?) and calls it if it exists, otherwise we say the
 34 |     instruction is unimplemented.
 35 |     """
 36 |     try:
 37 |         # We replace the "." in mnemonics with a "_", as we do in several other
 38 |         # places in the code.
 39 |         # At some point, this should become a property of the Instruction.
 40 |         func = globals()["_lift_" + insn.mnem.replace(".", "_")]
 41 |     except KeyError:
 42 |         il.append(il.unimplemented())
 43 |         return insn.length
 44 | 
 45 |     return func(insn, addr, il)
 46 | 
 47 | # Helpers for some shared code between instructions
 48 | 
 49 | def _lift_cond(cond, insn, addr, il):
 50 |     """Helper for lifting conditional jumps
 51 |     
 52 |     We pass in an IL condition (LowLevelILExpr) and this function lifts a IL
 53 |     conditional that will jump to `insn.target_offset(addr)` if the condition is
 54 |     true, otherwise we continue to the next instruction.
 55 |     """
 56 |     true_label = il.get_label_for_address(Architecture['xtensa'],
 57 |                                            insn.target_offset(addr))
 58 |     false_label = il.get_label_for_address(Architecture['xtensa'],
 59 |                                           addr + insn.length)
 60 |     must_mark_true = False
 61 |     if true_label is None:
 62 |         true_label = LowLevelILLabel()
 63 |         must_mark_true = True
 64 | 
 65 |     must_mark_false = False
 66 |     if false_label is None:
 67 |         false_label = LowLevelILLabel()
 68 |         must_mark_false = True
 69 | 
 70 |     il.append(
 71 |         il.if_expr(cond,
 72 |                    true_label,
 73 |                    false_label
 74 |                    ))
 75 |     if must_mark_true:
 76 |         il.mark_label(true_label)
 77 |         il.append(il.jump(il.const(4, insn.target_offset(addr))))
 78 |     if must_mark_false:
 79 |         il.mark_label(false_label)
 80 |         il.append(il.jump(il.const(4, addr + insn.length)))
 81 |     return insn.length
 82 | 
 83 | def _lift_cmov(cond, insn, addr, il):
 84 |     """Helper for lifting conditional moves
 85 |     
 86 |     We pass in an IL condition (LowLevelILExpr) and this function lifts a move
 87 |     from as to ar if the condition is true. In either case we then continue with
 88 |     the next instruction after the (potential) move.
 89 |     """
 90 |     true_label = LowLevelILLabel()
 91 |     false_label = LowLevelILLabel()
 92 |     il.append(il.if_expr(cond, true_label, false_label))
 93 |     il.mark_label(true_label)
 94 |     il.append(il.set_reg(4, _reg_name(insn, "ar"),
 95 |                          il.reg(4, _reg_name(insn, "as"))))
 96 |     il.mark_label(false_label)
 97 |     return insn.length
 98 | 
 99 | def _lift_addx(x_bits, insn, addr, il):
100 |     """Helper for ADDX2, ADDX4, ADDX8"""
101 |     il.append(
102 |         il.set_reg(4, _reg_name(insn, "ar"),
103 |                    il.add(4,
104 |                           il.shift_left(4,
105 |                                         il.reg(4, _reg_name(insn, "as")),
106 |                                         il.const(4, x_bits)),
107 |                           il.reg(4, _reg_name(insn, "at")))))
108 |     return insn.length
109 | 
110 | def _lift_subx(x_bits, insn, addr, il):
111 |     """Helper for SUBX2, SUBX4, SUBX8"""
112 |     il.append(
113 |         il.set_reg(4, _reg_name(insn, "ar"),
114 |                    il.sub(4,
115 |                           il.shift_left(4,
116 |                                         il.reg(4, _reg_name(insn, "as")),
117 |                                         il.const(4, x_bits)),
118 |                           il.reg(4, _reg_name(insn, "at")))))
119 |     return insn.length
120 | 
121 | # From here on down, I lifted instructions in priority order of how much
122 | # analysis it would get me. So I started with branches and common math and
123 | # worked my way down the frequency list.
124 | 
125 | def _lift_CALL0(insn, addr, il):
126 |     dest = il.const(4, insn.target_offset(addr))
127 |     il.append(
128 |         il.call(dest))
129 |     return insn.length
130 | 
131 | def _lift_CALLX0(insn, addr, il):
132 |     dest = il.reg(4, _reg_name(insn, "as"))
133 |     il.append(
134 |         il.call(dest))
135 |     return insn.length
136 | 
137 | def _lift_RET(insn, addr, il):
138 |     dest = il.reg(4, 'a0')
139 |     il.append(il.ret(dest))
140 |     return insn.length
141 | 
142 | _lift_RET_N = _lift_RET
143 | 
144 | def _lift_L32I_N(insn, addr, il):
145 |     _as = il.reg(4, _reg_name(insn, "as"))
146 |     imm = il.const(4, insn.inline0(addr))
147 |     va = il.add(4, _as, imm)
148 |     il.append(
149 |         il.set_reg(4, _reg_name(insn, "at"),
150 |                    il.load(4, va)
151 |                    ))
152 |     return insn.length
153 | 
154 | def _lift_L32R(insn, addr, il):
155 |     va = il.const(4, insn.mem_offset(addr))
156 |     il.append(
157 |         il.set_reg(4, _reg_name(insn, "at"),
158 |                    il.load(4, va)
159 |                    ))
160 |     return insn.length
161 | 
162 | def _lift_S32I_N(insn, addr, il):
163 |     _as = il.reg(4, _reg_name(insn, "as"))
164 |     imm = il.const(4, insn.inline0(addr))
165 |     va = il.add(4, _as, imm)
166 |     il.append(
167 |         il.store(4, va, il.reg(4, "a" + str(insn.t))))
168 |     return insn.length
169 | 
170 | def _lift_MOVI_N(insn, addr, il):
171 |     il.append(
172 |         il.set_reg(4, _reg_name(insn, "as"),
173 |                    il.const(4, insn.inline0(addr))
174 |                    ))
175 |     return insn.length
176 | 
177 | def _lift_MOV_N(insn, addr, il):
178 |     il.append(
179 |         il.set_reg(4, _reg_name(insn, "at"),
180 |                    il.reg(4, _reg_name(insn, "as"))
181 |                    ))
182 |     return insn.length
183 | 
184 | def _lift_ADDI(insn, addr, il):
185 |     il.append(
186 |         il.set_reg(4, _reg_name(insn, "at"),
187 |                    il.add(4,
188 |                           il.reg(4, _reg_name(insn, "as")),
189 |                           il.const(4, insn.simm8())
190 |                           )))
191 |     return insn.length
192 | 
193 | def _lift_L8UI(insn, addr, il):
194 |     va = il.add(4,
195 |                 il.reg(4, _reg_name(insn, "as")),
196 |                 il.const(4, insn.imm8))
197 |     il.append(
198 |         il.set_reg(4, _reg_name(insn, "at"),
199 |                    il.zero_extend(4,
200 |                                   il.load(1, va))))
201 |     return insn.length
202 | 
203 | def _lift_S32I(insn, addr, il):
204 |     va = il.add(4,
205 |                 il.reg(4, _reg_name(insn, "as")),
206 |                 il.const(4, insn.inline0(addr)))
207 |     il.append(
208 |         il.store(4, va, il.reg(4, _reg_name(insn, "at"))))
209 |     return insn.length
210 | 
211 | def _lift_L32I(insn, addr, il):
212 |     va = il.add(4,
213 |                 il.reg(4, _reg_name(insn, "as")),
214 |                 il.const(4, insn.inline0(addr)))
215 |     il.append(il.set_reg(4, _reg_name(insn, "at"),
216 |                          il.load(4, va)))
217 |     return insn.length
218 | 
219 | def _lift_L16SI(insn, addr, il):
220 |     va = il.add(4,
221 |                 il.reg(4, _reg_name(insn, "as")),
222 |                 il.const(4, insn.inline0(addr)))
223 |     il.append(il.set_reg(4, _reg_name(insn, "at"),
224 |                          il.sign_extend(4, il.load(2, va))))
225 |     return insn.length
226 | 
227 | def _lift_L16UI(insn, addr, il):
228 |     va = il.add(4,
229 |                 il.reg(4, _reg_name(insn, "as")),
230 |                 il.const(4, insn.inline0(addr)))
231 |     il.append(il.set_reg(4, _reg_name(insn, "at"),
232 |                          il.zero_extend(4, il.load(2, va))))
233 |     return insn.length
234 | 
235 | def _lift_J(insn, addr, il):
236 |     il.append(il.jump(il.const(4, insn.target_offset(addr))))
237 |     return insn.length
238 | 
239 | def _lift_CALLX0(insn, addr, il):
240 |     il.append(
241 |         il.call(il.reg(4, _reg_name(insn, "as"))))
242 |     return insn.length
243 | 
244 | def _lift_JX(insn, addr, il):
245 |     il.append(il.jump(il.reg(4, _reg_name(insn, "as"))))
246 |     return insn.length
247 | 
248 | def _lift_S8I(insn, addr, il):
249 |     il.append(il.store(1, il.add(4,
250 |                                  il.reg(4, _reg_name(insn, "as")),
251 |                                  il.const(4, insn.imm8)),
252 |                        il.low_part(1, il.reg(4, _reg_name(insn, "at")))))
253 |     return insn.length
254 | 
255 | def _lift_MOVI(insn, addr, il):
256 |     il.append(il.set_reg(4, _reg_name(insn, "at"),
257 |                          il.const(4, insn.inline0(addr))))
258 |     return insn.length
259 | 
260 | def _lift_EXTUI(insn, addr, il):
261 |     inp = il.reg(4, _reg_name(insn, "at"))
262 | 
263 |     mask = (2 ** insn.inline1(addr)) - 1
264 |     mask_il = il.const(4, mask)
265 | 
266 |     shiftimm = insn.extui_shiftimm()
267 |     if shiftimm:
268 |         shift_il = il.const(1, shiftimm)
269 |         shifted = il.logical_shift_right(4, inp, shift_il)
270 |         anded = il.and_expr(4, shifted, mask_il)
271 |     else:
272 |         # If we don't have to shift (thus shiftimm should be 0), then don't emit
273 |         # the IL for it
274 |         anded = il.and_expr(4, inp, mask_il)
275 | 
276 |     il.append(il.set_reg(4, _reg_name(insn, "ar"),
277 |                          anded
278 |                          ))
279 |     return insn.length
280 | 
281 | def _lift_OR(insn, addr, il):
282 |     il.append(
283 |         il.set_reg(4, _reg_name(insn, "ar"),
284 |                    il.or_expr(4,
285 |                               il.reg(4, _reg_name(insn, "as")),
286 |                               il.reg(4, _reg_name(insn, "at"))
287 |                               )))
288 |     return insn.length
289 | 
290 | def _lift_MEMW(insn, addr, il):
291 |     il.append(
292 |         il.intrinsic([], "memw", [])
293 |     )
294 |     return insn.length
295 | 
296 | def _lift_ADDI_N(insn, addr, il):
297 |     il.append(
298 |         il.set_reg(4, _reg_name(insn, "ar"),
299 |                    il.add(4,
300 |                        il.reg(4, _reg_name(insn, "as")),
301 |                        il.const(4, insn.inline0(addr))
302 |                    )))
303 |     return insn.length
304 | 
305 | def _lift_SLLI(insn, addr, il):
306 |     il.append(
307 |         il.set_reg(4, _reg_name(insn, "ar"),
308 |                    il.shift_left(4,
309 |                        il.reg(4, _reg_name(insn, "as")),
310 |                        il.const(1, insn.inline0(addr))
311 |                        )))
312 |     return insn.length
313 | 
314 | def _lift_ADD_N(insn, addr, il):
315 |     il.append(
316 |         il.set_reg(4, _reg_name(insn, "ar"),
317 |                    il.add(4,
318 |                           il.reg(4, _reg_name(insn, "as")),
319 |                           il.reg(4, _reg_name(insn, "at"))
320 |                           )))
321 |     return insn.length
322 | 
323 | def _lift_BEQZ_N(insn, addr, il):
324 |     cond = il.compare_equal(4, il.reg(4, _reg_name(insn, "as")), il.const(4, 0))
325 |     return _lift_cond(cond, insn, addr, il)
326 | 
327 | def _lift_AND(insn, addr, il):
328 |     il.append(
329 |         il.set_reg(4, _reg_name(insn, "ar"),
330 |                    il.and_expr(4,
331 |                               il.reg(4, _reg_name(insn, "as")),
332 |                               il.reg(4, _reg_name(insn, "at"))
333 |                               )))
334 |     return insn.length
335 | 
336 | _lift_BEQZ = _lift_BEQZ_N
337 | 
338 | def _lift_L16UI(insn, addr, il):
339 |     va = il.add(4,
340 |                 il.reg(4, _reg_name(insn, "as")),
341 |                 il.const(4, insn.inline0(addr)))
342 |     il.append(
343 |         il.set_reg(4, _reg_name(insn, "at"),
344 |                    il.zero_extend(4, il.load(2, va))))
345 |     return insn.length
346 | 
347 | def _lift_BNEZ(insn, addr, il):
348 |     cond = il.compare_not_equal(4,
349 |                                 il.reg(4, _reg_name(insn, "as")),
350 |                                 il.const(4, 0))
351 |     return _lift_cond(cond, insn, addr, il)
352 | 
353 | _lift_BNEZ_N = _lift_BNEZ
354 | 
355 | def _lift_BEQZ(insn, addr, il):
356 |     cond = il.compare_equal(4,
357 |                             il.reg(4, _reg_name(insn, "as")),
358 |                             il.const(4, 0))
359 |     return _lift_cond(cond, insn, addr, il)
360 | 
361 | _lift_BEQZ_N = _lift_BEQZ
362 | 
363 | def _lift_BNEI(insn, addr, il):
364 |     cond = il.compare_not_equal(4,
365 |                                 il.reg(4, _reg_name(insn, "as")),
366 |                                 il.const(4, insn.b4const()))
367 |     return _lift_cond(cond, insn, addr, il)
368 | 
369 | def _lift_BEQI(insn, addr, il):
370 |     cond = il.compare_equal(4,
371 |                                 il.reg(4, _reg_name(insn, "as")),
372 |                                 il.const(4, insn.b4const()))
373 |     return _lift_cond(cond, insn, addr, il)
374 | 
375 | def _lift_BALL(insn, addr, il):
376 |     cond = il.compare_equal(4,
377 |                             il.and_expr(4,
378 |                                 il.reg(4, _reg_name(insn, "at")),
379 |                                 il.not_expr(4, il.reg(4, _reg_name(insn, "as")))
380 |                             ),
381 |                             il.const(4, 0))
382 |     return _lift_cond(cond, insn, addr, il)
383 | 
384 | def _lift_BNALL(insn, addr, il):
385 |     cond = il.compare_not_equal(4,
386 |                             il.and_expr(4,
387 |                                 il.reg(4, _reg_name(insn, "at")),
388 |                                 il.not_expr(4, il.reg(4, _reg_name(insn, "as")))
389 |                             ),
390 |                             il.const(4, 0))
391 |     return _lift_cond(cond, insn, addr, il)
392 | 
393 | def _lift_BANY(insn, addr, il):
394 |     cond = il.compare_not_equal(4,
395 |                             il.and_expr(4,
396 |                                 il.reg(4, _reg_name(insn, "as")),
397 |                                 il.reg(4, _reg_name(insn, "at"))
398 |                             ),
399 |                             il.const(4, 0))
400 |     return _lift_cond(cond, insn, addr, il)
401 | 
402 | def _lift_BNONE(insn, addr, il):
403 |     cond = il.compare_equal(4,
404 |                             il.and_expr(4,
405 |                                 il.reg(4, _reg_name(insn, "as")),
406 |                                 il.reg(4, _reg_name(insn, "at"))
407 |                             ),
408 |                             il.const(4, 0))
409 |     return _lift_cond(cond, insn, addr, il)
410 | 
411 | def _lift_BBC(insn, addr, il):
412 |     cond = il.compare_equal(4,
413 |                             il.test_bit(4,
414 |                                 il.reg(4, _reg_name(insn, "as")),
415 |                                 # Strictly speaking we're supposed to check the
416 |                                 # low 5 bits of at. I don't really see the need
417 |                                 # to clutter the UI with it
418 | 
419 |                                 # Also: TODO: figure out which way Binja numbers
420 |                                 # the bits
421 |                                 il.reg(4, _reg_name(insn, "at"))
422 |                             ),
423 |                             il.const(4, 0))
424 |     return _lift_cond(cond, insn, addr, il)
425 | 
426 | def _lift_BBS(insn, addr, il):
427 |     cond = il.test_bit(4,
428 |         il.reg(4, _reg_name(insn, "as")),
429 |         # Strictly speaking we're supposed to check the
430 |         # low 5 bits of at. I don't really see the need
431 |         # to clutter the UI with it
432 |         il.reg(4, _reg_name(insn, "at")))
433 |     return _lift_cond(cond, insn, addr, il)
434 | 
435 | def _lift_BBCI(insn, addr, il):
436 |     cond = il.compare_equal(4,
437 |                             il.test_bit(4,
438 |                                 il.reg(4, _reg_name(insn, "as")),
439 |                                 # Also: TODO: figure out which way Binja numbers
440 |                                 # the bits
441 |                                 il.const(4, insn.inline0(addr))
442 |                             ),
443 |                             il.const(4, 0))
444 |     return _lift_cond(cond, insn, addr, il)
445 | 
446 | def _lift_BBSI(insn, addr, il):
447 |     cond = il.test_bit(4,
448 |         il.reg(4, _reg_name(insn, "as")),
449 |         il.const(4, insn.inline0(addr)))
450 |     return _lift_cond(cond, insn, addr, il)
451 | 
452 | def _lift_BEQ(insn, addr, il):
453 |     cond = il.compare_equal(4,
454 |                             il.reg(4, _reg_name(insn, "as")),
455 |                             il.reg(4, _reg_name(insn, "at")))
456 |     return _lift_cond(cond, insn, addr, il)
457 | 
458 | def _lift_BNE(insn, addr, il):
459 |     cond = il.compare_not_equal(4,
460 |                             il.reg(4, _reg_name(insn, "as")),
461 |                             il.reg(4, _reg_name(insn, "at")))
462 |     return _lift_cond(cond, insn, addr, il)
463 | 
464 | def _lift_BGE(insn, addr, il):
465 |     cond = il.compare_signed_greater_equal(4,
466 |                                            il.reg(4, _reg_name(insn, "as")),
467 |                                            il.reg(4, _reg_name(insn, "at"))
468 |                                            )
469 |     return _lift_cond(cond, insn, addr, il)
470 | 
471 | def _lift_BGEU(insn, addr, il):
472 |     cond = il.compare_unsigned_greater_equal(4,
473 |                                              il.reg(4, _reg_name(insn, "as")),
474 |                                              il.reg(4, _reg_name(insn, "at"))
475 |     )
476 |     return _lift_cond(cond, insn, addr, il)
477 | 
478 | def _lift_BGEI(insn, addr, il):
479 |     cond = il.compare_signed_greater_equal(4,
480 |                                            il.reg(4, _reg_name(insn, "as")),
481 |                                            il.const(4, insn.b4const())
482 |                                            )
483 |     return _lift_cond(cond, insn, addr, il)
484 | 
485 | def _lift_BGEUI(insn, addr, il):
486 |     cond = il.compare_unsigned_greater_equal(4,
487 |                                            il.reg(4, _reg_name(insn, "as")),
488 |                                            il.const(4, insn.b4constu())
489 |                                            )
490 |     return _lift_cond(cond, insn, addr, il)
491 | 
492 | def _lift_BGEZ(insn, addr, il):
493 |     cond = il.compare_signed_greater_equal(4,
494 |                                            il.reg(4, _reg_name(insn, "as")),
495 |                                            il.const(4, 0))
496 |     return _lift_cond(cond, insn, addr, il)
497 | 
498 | def _lift_BLT(insn, addr, il):
499 |     cond = il.compare_signed_less_than(4,
500 |                                        il.reg(4, _reg_name(insn, "as")),
501 |                                        il.reg(4, _reg_name(insn, "at"))
502 |                                        )
503 |     return _lift_cond(cond, insn, addr, il)
504 | 
505 | def _lift_BLTU(insn, addr, il):
506 |     cond = il.compare_unsigned_less_than(4,
507 |                                          il.reg(4, _reg_name(insn, "as")),
508 |                                          il.reg(4, _reg_name(insn, "at"))
509 |                                          )
510 |     return _lift_cond(cond, insn, addr, il)
511 | 
512 | def _lift_BLTI(insn, addr, il):
513 |     cond = il.compare_signed_less_than(4,
514 |                                        il.reg(4, _reg_name(insn, "as")),
515 |                                        il.const(4, insn.b4const())
516 |                                        )
517 |     return _lift_cond(cond, insn, addr, il)
518 | 
519 | def _lift_BLTUI(insn, addr, il):
520 |     cond = il.compare_unsigned_less_than(4,
521 |                                          il.reg(4, _reg_name(insn, "as")),
522 |                                          il.const(4, insn.b4constu())
523 |                                        )
524 |     return _lift_cond(cond, insn, addr, il)
525 | 
526 | def _lift_BLTZ(insn, addr, il):
527 |     cond = il.compare_signed_less_than(4,
528 |                                        il.reg(4, _reg_name(insn, "as")),
529 |                                        il.const(4, 0))
530 |     return _lift_cond(cond, insn, addr, il)
531 | 
532 | def _lift_SUB(insn, addr, il):
533 |     il.append(
534 |         il.set_reg(4, _reg_name(insn, "ar"),
535 |                    il.sub(4,
536 |                           il.reg(4, _reg_name(insn, "as")),
537 |                           il.reg(4, _reg_name(insn, "at"))
538 |                           )))
539 |     return insn.length
540 | 
541 | def _lift_ADD(insn, addr, il):
542 |     il.append(
543 |         il.set_reg(4, _reg_name(insn, "ar"),
544 |                    il.add(4,
545 |                           il.reg(4, _reg_name(insn, "as")),
546 |                           il.reg(4, _reg_name(insn, "at"))
547 |                           )))
548 |     return insn.length
549 | 
550 | def _lift_XOR(insn, addr, il):
551 |     il.append(
552 |         il.set_reg(4, _reg_name(insn, "ar"),
553 |                    il.xor_expr(4,
554 |                           il.reg(4, _reg_name(insn, "as")),
555 |                           il.reg(4, _reg_name(insn, "at"))
556 |                           )))
557 |     return insn.length
558 | 
559 | def _lift_S16I(insn, addr, il):
560 |     va = il.add(4,
561 |                 il.reg(4, _reg_name(insn, "as")),
562 |                 il.const(4, insn.inline0(addr))
563 |                 )
564 |     il.append(
565 |         il.store(2, va,
566 |                  il.low_part(2, il.reg(4, _reg_name(insn, "at")))))
567 |     return insn.length
568 | 
569 | def _lift_SRAI(insn, addr, il):
570 |     il.append(
571 |         il.set_reg(4, _reg_name(insn, "ar"),
572 |                    il.arith_shift_right(4,
573 |                                         il.reg(4, _reg_name(insn, "at")),
574 |                                         il.const(4, insn.inline0(addr)))))
575 |     return insn.length
576 | 
577 | def _lift_ADDX2(insn, addr, il):
578 |     return _lift_addx(1, insn, addr, il)
579 | 
580 | def _lift_ADDX4(insn, addr, il):
581 |     return _lift_addx(2, insn, addr, il)
582 | 
583 | def _lift_ADDX8(insn, addr, il):
584 |     return _lift_addx(3, insn, addr, il)
585 | 
586 | def _lift_SUBX2(insn, addr, il):
587 |     return _lift_subx(1, insn, addr, il)
588 | 
589 | def _lift_SUBX4(insn, addr, il):
590 |     return _lift_subx(2, insn, addr, il)
591 | 
592 | def _lift_SUBX8(insn, addr, il):
593 |     return _lift_subx(3, insn, addr, il)
594 | 
595 | def _lift_SRLI(insn, addr, il):
596 |     il.append(
597 |         il.set_reg(4, _reg_name(insn, "ar"),
598 |                    il.logical_shift_right(4,
599 |                                           il.reg(4, _reg_name(insn, "at")),
600 |                                           il.const(4, insn.s))))
601 |     return insn.length
602 | 
603 | def _lift_ADDMI(insn, addr, il):
604 |     constant = sign_extend(insn.imm8, 8) << 8
605 |     il.append(
606 |         il.set_reg(4, _reg_name(insn, "at"),
607 |                    il.add(4,
608 |                           il.reg(4, _reg_name(insn, "as")),
609 |                           il.const(4, constant))))
610 |     return insn.length
611 | 
612 | def _lift_MULL(insn, addr, il):
613 |     il.append(
614 |         il.set_reg(4, _reg_name(insn, "ar"),
615 |                    il.mult(4,
616 |                            il.reg(4, _reg_name(insn, "as")),
617 |                            il.reg(4, _reg_name(insn, "at")))))
618 |     return insn.length
619 | 
620 | def _lift_NEG(insn, addr, il):
621 |     il.append(
622 |         il.set_reg(4, _reg_name(insn, "ar"),
623 |                    il.neg_expr(4, il.reg(4, _reg_name(insn, "at")))))
624 |     return insn.length
625 | 
626 | def _lift_SYSCALL(insn, addr, il):
627 |     il.append(il.system_call())
628 |     return insn.length
629 | 
630 | def _lift_MOVEQZ(insn, addr, il):
631 |     cond = il.compare_equal(4,
632 |                             il.reg(4, _reg_name(insn, "at")),
633 |                             il.const(4, 0))
634 |     return _lift_cmov(cond, insn, addr, il)
635 | 
636 | def _lift_MOVNEZ(insn, addr, il):
637 |     cond = il.compare_not_equal(4,
638 |                             il.reg(4, _reg_name(insn, "at")),
639 |                             il.const(4, 0))
640 |     return _lift_cmov(cond, insn, addr, il)
641 | 
642 | def _lift_MOVGEZ(insn, addr, il):
643 |     cond = il.compare_signed_greater_equal(4,
644 |                             il.reg(4, _reg_name(insn, "at")),
645 |                             il.const(4, 0))
646 |     return _lift_cmov(cond, insn, addr, il)
647 | 
648 | def _lift_MOVLTZ(insn, addr, il):
649 |     cond = il.compare_signed_less_than(4,
650 |                             il.reg(4, _reg_name(insn, "at")),
651 |                             il.const(4, 0))
652 |     return _lift_cmov(cond, insn, addr, il)
653 | 
654 | def _lift_SSL(insn, addr, il):
655 |     il.append(il.set_reg(1, "sar",
656 |                          il.sub(1,
657 |                                 il.const(1, 32),
658 |                                 il.low_part(1, il.reg(4, _reg_name(insn, "as")))
659 |                                 )))
660 |     return insn.length
661 | 
662 | def _lift_SSR(insn, addr, il):
663 |     il.append(il.set_reg(1, "sar",
664 |                          il.low_part(1, il.reg(4, _reg_name(insn, "as")))))
665 |     return insn.length
666 | 
667 | def _lift_SSAI(insn, addr, il):
668 |     il.append(il.set_reg(1, "sar",
669 |                          il.const(1, insn.inline0(addr))))
670 |     return insn.length
671 | 
672 | def _lift_SLL(insn, addr, il):
673 |     il.append(il.set_reg(4, _reg_name(insn, "ar"),
674 |                          il.shift_left(4,
675 |                                        il.reg(4, _reg_name(insn, "as")),
676 |                                        il.reg(1, "sar"))))
677 |     return insn.length
678 | 
679 | def _lift_SRL(insn, addr, il):
680 |     il.append(il.set_reg(4, _reg_name(insn, "ar"),
681 |                          il.logical_shift_right(4,
682 |                                                 il.reg(4, _reg_name(insn, "at")),
683 |                                                 il.reg(1, "sar"))))
684 |     return insn.length
685 | 
686 | def _lift_SRC(insn, addr, il):
687 |     operand = il.reg_split(8,
688 |                            _reg_name(insn, "as"),
689 |                            _reg_name(insn, "at"))
690 |     il.append(il.set_reg(4, _reg_name(insn, "ar"),
691 |                          il.low_part(4,
692 |                                      il.logical_shift_right(8,
693 |                                                             operand,
694 |                                                             il.reg(1, "sar"))
695 |                                      )))
696 |     return insn.length
697 | 
698 | def _lift_SSA8L(insn, addr, il):
699 |     il.append(il.set_reg(1, "sar",
700 |                          # Low part is not strictly correct... but good enough
701 |                          il.shift_left(1,
702 |                                        il.low_part(1, il.reg(4, _reg_name(insn, "as"))),
703 |                                        il.const(1, 3))))
704 |     return insn.length
705 | 
706 | def _lift_SRA(insn, addr, il):
707 |     il.append(il.set_reg(4, _reg_name(insn, "ar"),
708 |                          il.arith_shift_right(4,
709 |                                               il.reg(4, _reg_name(insn, "at")),
710 |                                               il.reg(1, "sar"))))
711 |     return insn.length
712 | 
713 | def _lift_ISYNC(insn, addr, il):
714 |     il.append(
715 |         il.intrinsic([], "isync", [])
716 |     )
717 |     return insn.length
718 | 
719 | def _lift_ILL(insn, addr, il):
720 |     # TODO: pick a proper trap constant
721 |     il.append(il.trap(0))
722 |     return insn.length
723 | 
724 | def _lift_MUL16S(insn, addr, il):
725 |     il.append(
726 |         il.set_reg(4, _reg_name(insn, "ar"),
727 |                    il.mult(4,
728 |                            il.sign_extend(4,
729 |                                il.low_part(2,
730 |                                            il.reg(4, _reg_name(insn, "as")))),
731 |                            il.sign_extend(4,
732 |                                il.low_part(2,
733 |                                            il.reg(4, _reg_name(insn, "at"))))
734 |                            )))
735 |     return insn.length
736 | 
737 | def _lift_MUL16U(insn, addr, il):
738 |     il.append(
739 |         il.set_reg(4, _reg_name(insn, "ar"),
740 |                    il.mult(4,
741 |                            il.zero_extend(4,
742 |                                il.low_part(2,
743 |                                            il.reg(4, _reg_name(insn, "as")))),
744 |                            il.zero_extend(4,
745 |                                il.low_part(2,
746 |                                            il.reg(4, _reg_name(insn, "at"))))
747 |                            )))
748 |     return insn.length
749 | 
750 | def _lift_NOP(insn, addr, il):
751 |     il.append(il.nop())
752 |     return insn.length
753 | 
754 | _lift_NOP_N = _lift_NOP


--------------------------------------------------------------------------------
/binja_xtensa/instruction.py:
--------------------------------------------------------------------------------
   1 | """
   2 | Xtensa instruction decoder
   3 | 
   4 | This was created in roughly 10 hours over the course of a weekend with the
   5 | Xtensa manual in one window and Vim in the other. If you plan to make changes, I
   6 | suggest looking at section 7.3.1 "Opcode Maps" in the Xtensa manual, as the code
   7 | follows it directly (which explains the odd order of instructions). Overall, it
   8 | near-exactly matches the manual, with the exception of a few simplifications
   9 | involving instructions I didn't care about, and also fixing (<5) errors in the
  10 | manual.
  11 | 
  12 | The separation of concerns between instruction decoding, disassembly, and
  13 | lifting is roughly as follows: anything that can be done without knowing the
  14 | address is done as part of instruction decoding. There might be a couple places
  15 | where I declare the computation with a lambda in decoding, which is called
  16 | during disassembly with the address. Anyway, all the decoding are static
  17 | methods.
  18 | 
  19 | When I got to actual disassembly, I ran into a few issues where yes I had
  20 | decoded the instruction per the type "RRR", "RRI8", etc, but the immediate was
  21 | further encoded. In some cases (say, making a signed value from the imm8), I've
  22 | added methods to the Instruction class that will do that transformation. In more
  23 | instruction-specific cases, I added the ability to define a lambda inline that
  24 | does the specified transformation to the immediate (say it's stored shifted
  25 | right by a couple bits). In many cases, I've called it "inline0", then that is
  26 | referenced by the "inline0" in the disassembly code, as well as in the lifting
  27 | code.
  28 | 
  29 | Actual instruction decoding starts in Instruction.decode.
  30 | 
  31 | Link to the Xtensa docs/manual I was referencing:
  32 |     https://0x04.net/~mwk/doc/xtensa.pdf
  33 | 
  34 | """
  35 | from enum import Enum
  36 | 
  37 | 
  38 | # https://stackoverflow.com/a/32031543
  39 | def sign_extend(value, bits):
  40 |     sign_bit = 1 << (bits - 1)
  41 |     return (value & (sign_bit - 1)) - (value & sign_bit)
  42 | 
  43 | 
  44 | class InstructionType(Enum):
  45 |     RRR = 1
  46 |     RSR = 2
  47 |     CALLX = 3
  48 |     RRI4 = 4
  49 |     RRI8 = 5
  50 |     RI16 = 6
  51 |     CALL = 7
  52 |     BRI8 = 8
  53 |     BRI12 = 9
  54 |     RRRN = 10
  55 |     RI7 = 11
  56 |     RI6 = 12
  57 | 
  58 | 
  59 | def mnem(_mnem, func, validity_predicate=None, **outer_kwargs):
  60 |     """Not public, just need the DSL to be prettier without _"""
  61 |     def inner(insn, *args, **kwargs):
  62 |         insn.mnem = _mnem
  63 |         getattr(Instruction, "_decode_fmt_" + func)(
  64 |             insn, *args, **kwargs
  65 |         )
  66 |         if validity_predicate and not validity_predicate(insn):
  67 |             insn.valid = False
  68 |         else:
  69 |             insn.valid = True
  70 |         if outer_kwargs:
  71 |             for key, value in outer_kwargs.items():
  72 |                 if key.startswith("inline"):
  73 |                     bound = value.__get__(insn, insn.__class__)
  74 |                     setattr(insn, key, bound)
  75 |         return insn
  76 |     return inner
  77 | 
  78 | 
  79 | def _decode_components(insn, insn_bytes, components):
  80 |     for comp in components:
  81 |         setattr(insn, comp, globals()["decode_" + comp](insn_bytes))
  82 | 
  83 | 
  84 | # lambdas to decode the various control signals
  85 | decode_op0 = lambda insn_bytes: insn_bytes[0] & 0xf
  86 | decode_op1 = lambda insn_bytes: (insn_bytes[2]) & 0xf
  87 | decode_op2 = lambda insn_bytes: (insn_bytes[2] >> 4) & 0xf
  88 | decode_t = lambda insn_bytes: (insn_bytes[0] >> 4) & 0xf
  89 | decode_s = lambda insn_bytes: insn_bytes[1] & 0xf
  90 | decode_r = lambda insn_bytes: (insn_bytes[1] >> 4) & 0xf
  91 | decode_n = lambda insn_bytes: (insn_bytes[0] >> 4) & 3
  92 | decode_m = lambda insn_bytes: (insn_bytes[0] >> 6) & 3
  93 | decode_sr = lambda insn_bytes: insn_bytes[1]
  94 | decode_imm4 = lambda insn_bytes: (insn_bytes[2] >> 4) & 0xf
  95 | decode_imm8 = lambda insn_bytes: insn_bytes[2]
  96 | decode_imm12 = lambda insn_bytes: (insn_bytes[2] << 4) + ((insn_bytes[1] >> 4) & 0xf)
  97 | decode_imm16 = lambda insn_bytes: (insn_bytes[2] << 8) + insn_bytes[1]
  98 | decode_imm7 = lambda insn_bytes: (((insn_bytes[0] >> 4) & 0b111) << 4) + ((insn_bytes[1] >> 4) & 0xf)
  99 | decode_imm6 = lambda insn_bytes: (((insn_bytes[0] >> 4) & 0b11) << 4) + ((insn_bytes[1] >> 4) & 0xf)
 100 | decode_offset = lambda insn_bytes: (
 101 |     (insn_bytes[2] << 10) +
 102 |     (insn_bytes[1] << 2) +
 103 |     ((insn_bytes[0] >> 6) & 0b11)
 104 | )
 105 | decode_i = lambda insn_bytes: (insn_bytes[0] >> 7) & 1
 106 | decode_z = lambda insn_bytes: (insn_bytes[0] >> 6) & 1
 107 | 
 108 | 
 109 | class Instruction:
 110 | 
 111 |     # Instruction class starts with a bunch of utility methods. For the actual
 112 |     # decoding, see the "decode" classmethod.
 113 |     def __init__(self):
 114 |         self.op0 = None
 115 |         self.op1 = None
 116 |         self.op2 = None
 117 |         self.r = None
 118 |         self.s = None
 119 |         self.sr = None
 120 |         self.t = None
 121 |         self.n = None
 122 |         self.m = None
 123 |         self.i = None
 124 |         self.z = None
 125 |         self.imm4 = None
 126 |         self.imm6 = None
 127 |         self.imm7 = None
 128 |         self.imm8 = None
 129 |         self.imm12 = None
 130 |         self.imm16 = None
 131 |         self.offset = None
 132 |         self.length = None
 133 |         self.valid = None
 134 |         self.instruction_type = None
 135 | 
 136 |     # These are simple transformations done to immediate values and such.
 137 |     # Usually based on a line in the docs that say "the assembler will do such
 138 |     # and such to the immediate"
 139 |     def extui_shiftimm(self):
 140 |         if self.mnem != "EXTUI":
 141 |             return None
 142 |         return ((self.op1 & 1) << 4) + self.s
 143 | 
 144 |     def simm6(self):
 145 |         if self.imm6 is None:
 146 |             return None
 147 |         return sign_extend(self.imm6, 8)
 148 | 
 149 |     def simm8(self):
 150 |         if self.imm8 is None:
 151 |             return None
 152 |         return sign_extend(self.imm8, 8)
 153 | 
 154 |     def simm12(self):
 155 |         if self.imm12 is None:
 156 |             return None
 157 |         return sign_extend(self.imm12, 12)
 158 | 
 159 |     def rotw_simm4(self):
 160 |         """Parse immediate for the ROTW instruction
 161 | 
 162 |         The ROTW instruction has a signed imm4 in the "t" slot.
 163 |         """
 164 |         if self.t is None:
 165 |             return None
 166 |         return sign_extend(self.t, 4)
 167 | 
 168 |     # For PC-relative instructions, we need the address to compute the
 169 |     # "target_offset". In non-branching cases, I've tried to instead call it a
 170 |     # "mem_offset" (although I suspect I missed a couple).
 171 |     def offset_imm6(self, addr):
 172 |         return addr + 4 + self.imm6
 173 | 
 174 |     def offset_simm6(self, addr):
 175 |         return addr + 4 + self.simm6()
 176 | 
 177 |     def offset_simm8(self, addr):
 178 |         return addr + 4 + self.simm8()
 179 | 
 180 |     def offset_simm12(self, addr):
 181 |         return addr + 4 + self.simm12()
 182 | 
 183 |     def offset_call(self, addr):
 184 |         return (addr & 0xfffffffc) + (sign_extend(self.offset, 18) << 2) + 4
 185 | 
 186 |     def offset_j(self, addr):
 187 |         return addr + 4 + sign_extend(self.offset, 18)
 188 | 
 189 |     _target_offset_map = {
 190 |         "BALL": "offset_simm8",
 191 |         "BANY": "offset_simm8",
 192 |         "BBC": "offset_simm8",
 193 |         "BBCI": "offset_simm8",
 194 |         "BBS": "offset_simm8",
 195 |         "BBSI": "offset_simm8",
 196 |         "BEQ": "offset_simm8",
 197 |         "BEQI": "offset_simm8",
 198 |         "BEQZ": "offset_simm12",
 199 |         "BEQZ_N": "offset_imm6",
 200 |         "BF": "offset_simm8",
 201 |         "BGE": "offset_simm8",
 202 |         "BGEI": "offset_simm8",
 203 |         "BGEU": "offset_simm8",
 204 |         "BGEUI": "offset_simm8",
 205 |         "BGEZ": "offset_simm12",
 206 |         "BLT": "offset_simm8",
 207 |         "BLTI": "offset_simm8",
 208 |         "BLTU": "offset_simm8",
 209 |         "BLTUI": "offset_simm8",
 210 |         "BLTZ": "offset_simm12",
 211 |         "BNALL": "offset_simm8",
 212 |         "BNE": "offset_simm8",
 213 |         "BNEI": "offset_simm8",
 214 |         "BNEZ": "offset_simm12",
 215 |         "BNEZ_N": "offset_imm6",
 216 |         "BNONE": "offset_simm8",
 217 |         "BT": "offset_simm8",
 218 |         "CALL0": "offset_call",
 219 |         "CALL4": "offset_call",
 220 |         "CALL8": "offset_call",
 221 |         "CALL12": "offset_call",
 222 |         "J": "offset_j",
 223 |     }
 224 |     def target_offset(self, addr):
 225 |         try:
 226 |             mapped = self._target_offset_map[self.mnem.replace(".", "_")]
 227 |         except KeyError:
 228 |             return None
 229 |         func = getattr(self, mapped, None)
 230 |         if not func:
 231 |             raise Exception(f"Invalid handler for insn {self.mnem} in _target_offset_map")
 232 |         return func(addr)
 233 | 
 234 |     def offset_l32r(self, addr):
 235 |         enc = sign_extend(self.imm16 | 0xFFFF0000, 32) << 2
 236 |         return (enc + addr + 3) & 0xFFFFFFFC
 237 | 
 238 |     # mem_offset is roughly the same as target_offset, but for data accesses and
 239 |     # not jumps
 240 |     _mem_offset_map = {
 241 |         "L32R": "offset_l32r",
 242 |     }
 243 |     def mem_offset(self, addr):
 244 |         try:
 245 |             mapped = self._mem_offset_map[self.mnem.replace(".", "_")]
 246 |         except KeyError:
 247 |             return None
 248 |         func = getattr(self, mapped, None)
 249 |         if not func:
 250 |             raise Exception(f"Invalid handler for insn {self.mnem} in _mem_offset_map")
 251 |         return func(addr)
 252 | 
 253 |     # In a few places, an immediate is an index into these lookup tables. The
 254 |     # RTN in the docs calls it "B4CONST", so I do too.
 255 |     _b4const_vals = [
 256 |         -1, 1, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 32, 64, 128, 256,
 257 |     ]
 258 |     _b4constu_vals = [
 259 |         32768, 65536, 2, 3, 4, 5, 6, 7, 8, 10, 12, 16, 32, 64, 128, 256,
 260 |     ]
 261 |     _b4const_map = {
 262 |         "BEQI": "r",
 263 |         "BGEI": "r",
 264 |         "BLTI": "r",
 265 |         "BNEI": "r",
 266 |     }
 267 |     _b4constu_map = {
 268 |         "BGEUI": "r",
 269 |         "BLTUI": "r",
 270 |     }
 271 |     def b4const(self):
 272 |         try:
 273 |             comp = self._b4const_map[self.mnem]
 274 |         except KeyError:
 275 |             raise
 276 | 
 277 |         enc = getattr(self, comp)
 278 |         return self._b4const_vals[enc]
 279 | 
 280 |     def b4constu(self):
 281 |         try:
 282 |             comp = self._b4constu_map[self.mnem]
 283 |         except KeyError:
 284 |             raise
 285 | 
 286 |         enc = getattr(self, comp)
 287 |         return self._b4constu_vals[enc]
 288 | 
 289 |     # Table 5-128 Numerical List of Special Registers
 290 |     # This allows us to render "RSR.REGNAME" versus RSR at, <thing>
 291 |     _special_reg_map = {
 292 |         0: "LBEG",
 293 |         1: "LEND",
 294 |         2: "LCOUNT",
 295 |         3: "SAR",
 296 |         4: "BR",
 297 |         5: "LITBASE",
 298 |         12: "SCOMPARE1",
 299 |         16: "ACCLO",
 300 |         17: "ACCHI",
 301 |         32: "M0",
 302 |         33: "M1",
 303 |         34: "M2",
 304 |         35: "M3",
 305 |         72: "WindowBase",
 306 |         73: "WindowStart",
 307 |         83: "PTEVADDR",
 308 |         89: "MMID",
 309 |         90: "RASID",
 310 |         91: "ITLBCFG",
 311 |         92: "DTLBCFG",
 312 |         96: "IBREAKENABLE",
 313 |         98: "CACHEATTR",
 314 |         99: "ATOMCTL",
 315 |         104: "DDR",
 316 |         106: "MEPC",
 317 |         107: "MEPS",
 318 |         108: "MESAVE",
 319 |         109: "MESR",
 320 |         110: "MECR",
 321 |         111: "MEVADDR",
 322 |         128: "IBREAKA0",
 323 |         129: "IBREAKA1",
 324 |         144: "DBREAKA0",
 325 |         145: "DBREAKA1",
 326 |         160: "DBREAKC0",
 327 |         161: "DBREAKC1",
 328 |         177: "EPC1",
 329 |         178: "EPC2",
 330 |         179: "EPC3",
 331 |         180: "EPC4",
 332 |         181: "EPC5",
 333 |         182: "EPC6",
 334 |         183: "EPC7",
 335 |         192: "DEPC",
 336 |         194: "EPS2",
 337 |         195: "EPS3",
 338 |         196: "EPS4",
 339 |         197: "EPS5",
 340 |         198: "EPS6",
 341 |         199: "EPS7",
 342 |         209: "EXCSAVE1",
 343 |         210: "EXCSAVE2",
 344 |         211: "EXCSAVE3",
 345 |         212: "EXCSAVE4",
 346 |         213: "EXCSAVE5",
 347 |         214: "EXCSAVE6",
 348 |         215: "EXCSAVE7",
 349 |         224: "CPENABLE",
 350 |         226: "INTERRUPT", # Also known as INTSET
 351 |         227: "INTCLEAR",
 352 |         228: "INTENABLE",
 353 |         230: "PS",
 354 |         231: "VECBASE",
 355 |         232: "EXCCAUSE",
 356 |         233: "DEBUGCAUSE",
 357 |         234: "CCOUNT",
 358 |         235: "PRID",
 359 |         236: "ICOUNT",
 360 |         237: "ICOUNTLEVEL",
 361 |         238: "EXCVADDR",
 362 |         240: "CCOMPARE0",
 363 |         241: "CCOMPARE1",
 364 |         242: "CCOMPARE2",
 365 |         244: "MISC0",
 366 |         245: "MISC1",
 367 |         246: "MISC2",
 368 |         247: "MISC3",
 369 |     }
 370 | 
 371 |     def get_sr_name(self):
 372 |         if self.mnem not in ["RSR", "WSR", "XSR"]:
 373 |             return None
 374 |         try:
 375 |             return self._special_reg_map[self.sr]
 376 |         except KeyError:
 377 |             return str(self.sr)
 378 | 
 379 |     # For instruction decoding, we follow the tables in xtensa.pdf
 380 |     # (7.3.1 Opcode Maps)
 381 |     # We begin with Table 7-192 Whole Opcode Space. This switches off op0 to
 382 |     # subtables, which we then filter through to sub-sub-tables, etc. 10 hours
 383 |     # later, we made it to the bottom :)
 384 |     _op0_map = [
 385 |         "QRST", "L32R", "LSAI", "LSCI",
 386 |         "MAC16", "CALLN", "SI", "B",
 387 |         "L32I_N", "S32I_N", "ADD_N", "ADDI_N",
 388 |         "ST2", "ST3", None, None, # None is reserved
 389 |     ]
 390 |     @classmethod
 391 |     def decode(cls, insn_bytes):
 392 |         insn = Instruction()
 393 |         return cls._do_tbl_layer(insn, insn_bytes, "op0", cls._op0_map)
 394 | 
 395 |     # At each "layer" of the tables, we look up some control signal. In this
 396 |     # case, it was op0. op0 has 4 bits for a 16 entry table. We can do one of
 397 |     # two things: a sub-table or a leaf (instruction). By the magic of Python
 398 |     # metaprogramming, we lookup the classmethod _decode_<item>, which we
 399 |     # implement either as a function for a table layer, or we use the mnem
 400 |     # helper to indicate it's a leaf function.
 401 | 
 402 |     # These are the actual instructions found in the first table. Arguments to
 403 |     # mnem are mnemonic, instruction type, an optional predicate specifying if
 404 |     # the encoding is valid (for when the manual says t must be 0 or something),
 405 |     # and then "inline" kwargs that end up defining methods for disassembly and
 406 |     # lifting to use.
 407 |     _decode_L32R = mnem("L32R", "RI16") # op0, t, imm16
 408 |     _decode_L32I_N = mnem("L32I.N", "RRRN",
 409 |                           inline0=lambda insn, _: insn.r << 2)
 410 |     _decode_S32I_N = mnem("S32I.N", "RRRN",
 411 |                           inline0=lambda insn, _: insn.r << 2)
 412 |     _decode_ADD_N = mnem("ADD.N", "RRRN")
 413 |     _decode_ADDI_N = mnem("ADDI.N", "RRRN",
 414 |                           inline0=lambda insn, _: insn.t if insn.t != 0 else -1)
 415 | 
 416 | 
 417 |     # The next three functions implement the metaprogramming glue between layers
 418 |     @classmethod
 419 |     def _do_tbl_layer(cls, insn, insn_bytes, component, map):
 420 |         """Do the lookups for one table layer.
 421 |         
 422 |         component is the string to decode, like "op1", or "r".
 423 |         map is the map to look up in
 424 |         """
 425 |         return cls._do_lut(insn, insn_bytes,
 426 |                     [(component, globals()["decode_" + component])],
 427 |                     component,
 428 |                     map)
 429 | 
 430 |     @classmethod
 431 |     def _do_lut(cls,
 432 |                insn,
 433 |                insn_bytes,
 434 |                lookup_map,
 435 |                value_to_look_up,
 436 |                table_to_look_in,
 437 |                ):
 438 |         """Do an iteration of table-lookups
 439 | 
 440 |         Tensilica has a bunch of tables that define the instruction encoding.
 441 |         We decode them a layer at a time, dispatching to relevant handlers at
 442 |         each level. By the time we're done, we should have the whole instruction
 443 |         decoded.
 444 | 
 445 |         At each layer, we read one or more values out of the insn bytes and
 446 |         assign it to the decoded properties. We then grab a value from the table
 447 |         using one of those values and call the next layer
 448 | 
 449 |         Params:
 450 |             insn (Instruction): the instruction object to fill in
 451 |             insn_bytes (bytes): the instruction bytes we're decoding
 452 | 
 453 |             lookup_map (List[Tuple]): list of tuples of
 454 |             (decoded_name, function_to_decode). The function will receive
 455 |             insn_bytes as a param and should return a numeric value.
 456 | 
 457 |             value_to_look_up (string): One of the decoded_name values from the
 458 |             previous param
 459 | 
 460 |             table_to_look_in (List): The table to look in (access as cls._name
 461 |             and pass that in)
 462 | 
 463 |         """
 464 |         for (decoded_name, function_to_decode) in lookup_map:
 465 |             try:
 466 |                 getattr(insn, decoded_name)
 467 |             except AttributeError:
 468 |                 raise
 469 |             setattr(insn, decoded_name, function_to_decode(insn_bytes))
 470 | 
 471 |         value = getattr(insn, value_to_look_up)
 472 |         return cls._call_from_map(table_to_look_in, value, insn, insn_bytes)
 473 | 
 474 |     @staticmethod
 475 |     def _call_from_map(map, index, insn, insn_bytes):
 476 |         """Part of the operation of _do_lut, see there for comments"""
 477 |         try:
 478 |             name = "_decode_" + map[index]
 479 |         except IndexError:
 480 |             raise Exception(f"Unsupported index {index} in map {map}")
 481 | 
 482 |         func = getattr(Instruction, name, None)
 483 |         if not func:
 484 |             raise Exception(f"Unimplemented: {name}")
 485 | 
 486 |         return func(insn, insn_bytes)
 487 | 
 488 |     # From here down, it's a pretty mechanical translation of the Xtensa docs
 489 | 
 490 |     _qrst_map = [
 491 |         "RST0", "RST1", "RST2", "RST3",
 492 |         "EXTUI", "EXTUI", "CUST0", "CUST1",
 493 |         "LSCX", "LSC4", "FP0", "FP1",
 494 |         None, None, None, None,
 495 |     ]
 496 |     @classmethod
 497 |     def _decode_QRST(cls, insn, insn_bytes):
 498 |         # Formats RRR, CALLX, RSR (t, s, r, op2 vary)
 499 |         # That means op1 is the commonality we'll map off of
 500 |         return cls._do_tbl_layer(insn, insn_bytes, "op1", cls._qrst_map)
 501 | 
 502 |     _decode_EXTUI = mnem("EXTUI",
 503 |                          "RRR", # RRR is dubious for this... it's complex
 504 |                          # IIRC inline0 ended up being named something else but
 505 |                          # I didn't want to reuse the number
 506 |                          inline1=lambda insn, _: insn.op2 + 1
 507 |                          )
 508 | 
 509 |     _rst0_map = [
 510 |         "ST0", "AND", "OR", "XOR",
 511 |         "ST1", "TLB", "RT0", None, # None is reserved
 512 |         "ADD", "ADDX2", "ADDX4", "ADDX8",
 513 |         "SUB", "SUBX2", "SUBX4", "SUBX8",
 514 |     ]
 515 |     @classmethod
 516 |     def _decode_RST0(cls, insn, insn_bytes):
 517 |         # Formats RRR and CALLX (t, s, r vary)
 518 |         # That means op2 is the commonality we'll map off of
 519 |         return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._rst0_map)
 520 | 
 521 |     _decode_AND = mnem("AND", "RRR")
 522 |     _decode_OR = mnem("OR", "RRR")
 523 |     _decode_XOR = mnem("XOR", "RRR")
 524 |     _decode_ADD = mnem("ADD", "RRR")
 525 |     _decode_ADDX2 = mnem("ADDX2", "RRR")
 526 |     _decode_ADDX4 = mnem("ADDX4", "RRR")
 527 |     _decode_ADDX8 = mnem("ADDX8", "RRR")
 528 |     _decode_SUB = mnem("SUB", "RRR")
 529 |     _decode_SUBX2 = mnem("SUBX2", "RRR")
 530 |     _decode_SUBX4 = mnem("SUBX4", "RRR")
 531 |     _decode_SUBX8 = mnem("SUBX8", "RRR")
 532 | 
 533 |     _st0_map = [
 534 |         "SNM0", "MOVSP", "SYNC", "RFEI",
 535 |         "BREAK", "SYSCALL", "RSIL", "WAITI",
 536 |         "ANY4", "ALL4", "ANY8", "ALL8",
 537 |         None, None, None, None, # these are reserved
 538 |     ]
 539 |     @classmethod
 540 |     def _decode_ST0(cls, insn, insn_bytes):
 541 |         # Formats RRR and CALLX
 542 |         return cls._do_tbl_layer(insn, insn_bytes, "r", cls._st0_map)
 543 | 
 544 |     _decode_MOVSP = mnem("MOVSP", "RRR")
 545 |     _decode_BREAK = mnem("BREAK", "RRR")
 546 |     _decode_SYSCALL = mnem("SYSCALL", "RRR", lambda insn: insn.s == 0 and insn.t == 0)
 547 |     _decode_RSIL = mnem("RSIL", "RRR")
 548 |     _decode_WAITI = mnem("WAITI", "RRR", lambda insn: insn.t == 0)
 549 |     _decode_ANY4 = mnem("ANY4", "RRR")
 550 |     _decode_ALL4 = mnem("ALL4", "RRR")
 551 |     _decode_ANY8 = mnem("ANY8", "RRR")
 552 |     _decode_ALL8 = mnem("ALL8", "RRR")
 553 | 
 554 |     _snm0_map = [
 555 |         "ILL", None, "JR", "CALLX", # None is reserved
 556 |     ]
 557 |     @classmethod
 558 |     def _decode_SNM0(cls, insn, insn_bytes):
 559 |         # Format CALLX (n, s vary)
 560 |         return cls._do_tbl_layer(insn, insn_bytes, "m", cls._snm0_map)
 561 | 
 562 |     _decode_ILL = mnem("ILL", "CALLX", lambda insn: insn.s == 0 and insn.n == 0)
 563 | 
 564 |     _jr_map = [
 565 |         "RET", "RETW", "JX", None, # None is reserved
 566 |     ]
 567 |     @classmethod
 568 |     def _decode_JR(cls, insn, insn_bytes):
 569 |         # Format CALLX (s varies)
 570 |         return cls._do_tbl_layer(insn, insn_bytes, "n", cls._jr_map)
 571 | 
 572 |     _decode_RET = mnem("RET", "CALLX", lambda insn: insn.s == 0)
 573 |     _decode_RETW = mnem("RETW", "CALLX", lambda insn: insn.s == 0)
 574 |     _decode_JX = mnem("JX", "CALLX")
 575 | 
 576 |     _callx_map = [
 577 |         "CALLX0", "CALLX4", "CALLX8", "CALLX12",
 578 |     ]
 579 |     @classmethod
 580 |     def _decode_CALLX(cls, insn, insn_bytes):
 581 |         # Format CALLX (s varies)
 582 |         return cls._do_tbl_layer(insn, insn_bytes, "n", cls._callx_map)
 583 | 
 584 |     _decode_CALLX0 = mnem("CALLX0", "CALLX")
 585 |     _decode_CALLX4 = mnem("CALLX4", "CALLX")
 586 |     _decode_CALLX8 = mnem("CALLX8", "CALLX")
 587 |     _decode_CALLX12 = mnem("CALLX12", "CALLX")
 588 | 
 589 |     # SYNC
 590 |     _sync_map = [
 591 |         "ISYNC", "RSYNC", "ESYNC", "DSYNC",
 592 |         None, None, None, None, # None is reserved
 593 |         "EXCW", None, None, None,
 594 |         "MEMW", "EXTW", None, "NOP",
 595 |         # The manual doesn't show NOP here, but the NOP encoding it shows
 596 |         # _should_ go here, and objdump disassembles it as "nop"
 597 |     ]
 598 |     @classmethod
 599 |     def _decode_SYNC(cls, insn, insn_bytes):
 600 |         # Format RRR (s varies)
 601 |         return cls._do_tbl_layer(insn, insn_bytes, "t", cls._sync_map)
 602 | 
 603 |     _decode_ISYNC = mnem("ISYNC", "RRR", lambda insn: insn.s == 0)
 604 |     _decode_RSYNC = mnem("RSYNC", "RRR", lambda insn: insn.s == 0)
 605 |     _decode_ESYNC = mnem("ESYNC", "RRR", lambda insn: insn.s == 0)
 606 |     _decode_DSYNC = mnem("DSYNC", "RRR", lambda insn: insn.s == 0)
 607 |     _decode_EXCW = mnem("EXCW", "RRR", lambda insn: insn.s == 0)
 608 |     _decode_MEMW = mnem("MEMW", "RRR", lambda insn: insn.s == 0)
 609 |     _decode_EXTW = mnem("EXTW", "RRR", lambda insn: insn.s == 0)
 610 |     _decode_NOP = mnem("NOP", "RRR", lambda insn: insn.s == 0)
 611 | 
 612 |     _rfei_map = [
 613 |         "RFET", "RFI", "RFME", None, # None is reserved
 614 |         None, None, None, None,
 615 |         None, None, None, None,
 616 |         None, None, None, None,
 617 |     ]
 618 |     @classmethod
 619 |     def _decode_RFEI(cls, insn, insn_bytes):
 620 |         # Format RRR (s varies)
 621 |         return cls._do_tbl_layer(insn, insn_bytes, "t", cls._rfei_map)
 622 | 
 623 |     _decode_RFI = mnem("RFI", "RRR")
 624 |     _decode_RFME = mnem("RFME", "RRR", lambda insn: insn.s == 0)
 625 | 
 626 |     _rfet_map = [
 627 |         "RFE", "RFUI", "RFDE", None, # None is reserved
 628 |         "RFWO", "RFWU", None, None,
 629 |         None, None, None, None,
 630 |         None, None, None, None,
 631 |     ]
 632 |     @classmethod
 633 |     def _decode_RFET(cls, insn, insn_bytes):
 634 |         # Format RRR (no bits vary)
 635 |         return cls._do_tbl_layer(insn, insn_bytes, "s", cls._rfet_map)
 636 | 
 637 |     _decode_RFE = mnem("RFE", "RRR")
 638 |     _decode_RFUI = mnem("RFUI", "RRR")
 639 |     _decode_RFDE = mnem("RFDE", "RRR")
 640 |     _decode_RFWO = mnem("RFWO", "RRR")
 641 |     _decode_RFWU = mnem("RFWU", "RRR")
 642 | 
 643 |     _st1_map = [
 644 |         "SSR", "SSL", "SSA8L", "SSA8B",
 645 |         "SSAI", None, "RER", "WER", # None is reserved
 646 |         "ROTW", None, None, None, # None is reserved
 647 |         None, None, "NSA", "NSAU",
 648 |     ]
 649 |     @classmethod
 650 |     def _decode_ST1(cls, insn, insn_bytes):
 651 |         # Format RRR (t, s vary)
 652 |         return cls._do_tbl_layer(insn, insn_bytes, "r", cls._st1_map)
 653 | 
 654 |     _decode_SSR = mnem("SSR", "RRR", lambda insn: insn.t == 0)
 655 |     _decode_SSL = mnem("SSL", "RRR", lambda insn: insn.t == 0)
 656 |     _decode_SSA8L = mnem("SSA8L", "RRR", lambda insn: insn.t == 0)
 657 |     _decode_SSA8B = mnem("SSA8B", "RRR", lambda insn: insn.t == 0)
 658 |     _decode_SSAI = mnem("SSAI", "RRR", lambda insn: insn.t == 0,
 659 |                         inline0=lambda insn, _: insn.s + ((insn.t & 1) << 4) )
 660 |     _decode_RER = mnem("RER", "RRR")
 661 |     _decode_WER = mnem("WER", "RRR")
 662 |     _decode_ROTW = mnem("ROTW", "RRR", lambda insn: insn.s == 0)
 663 |     _decode_NSA = mnem("NSA", "RRR")
 664 |     _decode_NSAU = mnem("NSAU", "RRR")
 665 | 
 666 |     _tlb_map = [
 667 |         None, None, None, "RITLB0", # None is reserved
 668 |         "IITLB", "PITLB", "WITLB", "RITLB1",
 669 |         None, None, None, "RDTLB0",
 670 |         "IDTLB", "PDTLB", "WDTLB", "RDTLB1",
 671 |     ]
 672 |     @classmethod
 673 |     def _decode_TLB(cls, insn, insn_bytes):
 674 |         # Format RRR (t, s vary)
 675 |         return cls._do_tbl_layer(insn, insn_bytes, "r", cls._tlb_map)
 676 | 
 677 |     _decode_RITLB0 = mnem("RITLB0", "RRR")
 678 |     _decode_IITLB = mnem("IITLB", "RRR", lambda insn: insn.t == 0)
 679 |     _decode_PITLB = mnem("PITLB", "RRR")
 680 |     _decode_WITLB = mnem("WITLB", "RRR")
 681 |     _decode_RITLB1 = mnem("RITLB1", "RRR")
 682 |     _decode_RDTLB0 = mnem("RDTLB0", "RRR")
 683 |     _decode_IDTLB = mnem("IDTLB", "RRR", lambda insn: insn.t == 0)
 684 |     _decode_PDTLB = mnem("PDTLB", "RRR")
 685 |     _decode_WDTLB = mnem("WDTLB", "RRR")
 686 |     _decode_RDTLB1 = mnem("RDTLB1", "RRR")
 687 | 
 688 |     _rt0_map = [
 689 |         "NEG", "ABS", None, None,
 690 |         None, None, None, None,
 691 |         None, None, None, None,
 692 |         None, None, None, None,
 693 |     ]
 694 |     @classmethod
 695 |     def _decode_RT0(cls, insn, insn_bytes):
 696 |         # Format RRR (t, r vary)
 697 |         return cls._do_tbl_layer(insn, insn_bytes, "s", cls._rt0_map)
 698 | 
 699 |     _decode_NEG = mnem("NEG", "RRR")
 700 |     _decode_ABS = mnem("ABS", "RRR")
 701 | 
 702 |     _rst1_map = [
 703 |         "SLLI", "SLLI", "SRAI", "SRAI",
 704 |         "SRLI", None, "XSR", "ACCER",
 705 |         "SRC", "SRL", "SLL", "SRA",
 706 |         "MUL16U", "MUL16S", None, "IMP"
 707 |     ]
 708 |     @classmethod
 709 |     def _decode_RST1(cls, insn, insn_bytes):
 710 |         # Format RRR (t, s, r vary)
 711 |         return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._rst1_map)
 712 | 
 713 |     _decode_SLLI = mnem("SLLI", "RRR",
 714 |                         inline0=lambda insn, _: 32 - ( insn.t + ((insn.op2 & 1) << 4) ))
 715 |     _decode_SRAI = mnem("SRAI", "RRR",
 716 |                         inline0=lambda insn, _: insn.s + ((insn.op2 & 1) << 4))
 717 |     _decode_SRLI = mnem("SRLI", "RRR")
 718 |     _decode_XSR = mnem("XSR", "RSR")
 719 |     _decode_SRC = mnem("SRC", "RRR")
 720 |     _decode_SRL = mnem("SRL", "RRR", lambda insn: insn.s == 0)
 721 |     _decode_SLL = mnem("SLL", "RRR", lambda insn: insn.t == 0)
 722 |     _decode_SRA = mnem("SRA", "RRR", lambda insn: insn.s == 0)
 723 |     _decode_MUL16U = mnem("MUL16U", "RRR")
 724 |     _decode_MUL16S = mnem("MUL16S", "RRR")
 725 | 
 726 |     _accer_map = [
 727 |         None, None, None, None,
 728 |         None, None, "RER", "WER",
 729 |         None, None, None, None,
 730 |         None, None, None, None,
 731 |     ]
 732 |     @classmethod
 733 |     def _decode_ACCER(cls, insn, insn_bytes):
 734 |         # Format RRR (t, s vary)
 735 |         # There's a bug in the manual here: it says to filter on op2, however we
 736 |         # filtered on op2 to get here. Inspection suggests that we should in
 737 |         # fact filter on the following values for r:
 738 |         # RER = 0110
 739 |         # WER = 0111
 740 |         return cls._do_tbl_layer(insn, insn_bytes, "r", cls._accer_map)
 741 | 
 742 |     _decode_RER = mnem("RER", "RRR")
 743 |     _decode_WER = mnem("WER", "RRR")
 744 | 
 745 |     _imp_map = [
 746 |         "LICT", "SICT", "LICW", "SICW",
 747 |         None, None, None, None, # None is reserved
 748 |         "LDCT", "SDCT", None,  None,
 749 |         None, None, "RFDX", None,
 750 |     ]
 751 |     @classmethod
 752 |     def _decode_IMP(cls, insn, insn_bytes):
 753 |         # Format RRR (t, s vary)
 754 |         return cls._do_tbl_layer(insn, insn_bytes, "r", cls._imp_map)
 755 | 
 756 |     _decode_LICT = mnem("LICT", "RRR")
 757 |     _decode_SICT = mnem("SICT", "RRR")
 758 |     _decode_LICW = mnem("LICW", "RRR")
 759 |     _decode_SICW = mnem("SICW", "RRR")
 760 |     _decode_LDCT = mnem("LDCT", "RRR")
 761 |     _decode_SDCT = mnem("SDCT", "RRR")
 762 | 
 763 |     _rfdx_map = [
 764 |         "RFDO", "RFDD", None, None, # None is reserved
 765 |         None, None, None, None,
 766 |         None, None, None, None,
 767 |         None, None, None, None,
 768 |     ]
 769 |     @classmethod
 770 |     def _decode_RFDX(cls, insn, insn_bytes):
 771 |         # Format RRR (s varies)
 772 |         return cls._do_tbl_layer(insn, insn_bytes, "t", cls._rfdx_map)
 773 | 
 774 |     _decode_RFDO = mnem("RFDO", "RRR", lambda insn: insn.s == 0)
 775 |     _decode_RFDD = mnem("RFDD", "RRR", lambda insn: insn.s in [0, 1])
 776 | 
 777 |     _rst2_map = [
 778 |         "ANDB", "ANDBC", "ORB", "ORBC",
 779 |         "XORB", None, None, None,
 780 |         "MULL", None, "MULUH", "MULSH",
 781 |         "QUOU", "QUOS", "REMU", "REMS",
 782 |     ]
 783 |     @classmethod
 784 |     def _decode_RST2(cls, insn, insn_bytes):
 785 |         # Format RRR (t, s, r vary)
 786 |         return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._rst2_map)
 787 | 
 788 |     _decode_ANDB = mnem("ANDB", "RRR")
 789 |     _decode_ANDBC = mnem("ANDBC", "RRR")
 790 |     _decode_ORB = mnem("ORB", "RRR")
 791 |     _decode_ORBC = mnem("ORBC", "RRR")
 792 |     _decode_XORB = mnem("XORB", "RRR")
 793 |     _decode_MULL = mnem("MULL", "RRR")
 794 |     _decode_MULUH = mnem("MULUH", "RRR")
 795 |     _decode_MULSH = mnem("MULSH", "RRR")
 796 |     _decode_QUOU = mnem("QUOU", "RRR")
 797 |     _decode_QUOS = mnem("QUOS", "RRR")
 798 |     _decode_REMU = mnem("REMU", "RRR")
 799 |     _decode_REMS = mnem("REMS", "RRR")
 800 | 
 801 |     _rst3_map = [
 802 |         "RSR", "WSR",  "SEXT", "CLAMPS",
 803 |         "MIN", "MAX", "MINU", "MAXU",
 804 |         "MOVEQZ", "MOVNEZ", "MOVLTZ", "MOVGEZ",
 805 |         "MOVF", "MOVT", "RUR", "WUR",
 806 |     ]
 807 |     @classmethod
 808 |     def _decode_RST3(cls, insn, insn_bytes):
 809 |         # Formats RRR and RSR (t, s, r vary)
 810 |         return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._rst3_map)
 811 | 
 812 |     _decode_RSR = mnem("RSR", "RSR")
 813 |     _decode_WSR = mnem("WSR", "RSR")
 814 |     _decode_SEXT = mnem("SEXT", "RRR")
 815 |     _decode_CLAMPS = mnem("CLAMPS", "RRR")
 816 |     _decode_MIN = mnem("MIN", "RRR")
 817 |     _decode_MAX = mnem("MAX", "RRR")
 818 |     _decode_MINU = mnem("MINU", "RRR")
 819 |     _decode_MAXU = mnem("MAXU", "RRR")
 820 |     _decode_MOVEQZ = mnem("MOVEQZ", "RRR")
 821 |     _decode_MOVNEZ = mnem("MOVNEZ", "RRR")
 822 |     _decode_MOVLTZ = mnem("MOVLTZ", "RRR")
 823 |     _decode_MOVGEZ = mnem("MOVGEZ", "RRR")
 824 |     _decode_MOVF = mnem("MOVF", "RRR")
 825 |     _decode_MOVT = mnem("MOVT", "RRR")
 826 |     _decode_RUR = mnem("RUR", "RRR") # lol, could probably treat as RSR
 827 |     _decode_WUR = mnem("WUR", "RSR")
 828 | 
 829 |     _lscx_map = [
 830 |         "LSX", "LSXU", None, None, # None is reserved
 831 |         "SSX", "SSXU", None, None,
 832 |         None, None, None, None,
 833 |         None, None, None, None,
 834 |     ]
 835 |     @classmethod
 836 |     def _decode_LSCX(cls, insn, insn_bytes):
 837 |         # Format RRR (t, s, r vary)
 838 |         return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._lscx_map)
 839 | 
 840 |     _decode_LSX = mnem("LSX", "RRR")
 841 |     _decode_LSXU = mnem("LSXU", "RRR")
 842 |     _decode_SSX = mnem("SSX", "RRR")
 843 |     _decode_SSXU = mnem("SSXU", "RRR")
 844 | 
 845 |     _lsc4_map = [
 846 |         "L32E", None, None, None,
 847 |         "S32E", None, None, None,
 848 |         None, None, None, None,
 849 |         None, None, None, None,
 850 |     ]
 851 |     @classmethod
 852 |     def _decode_LSC4(cls, insn, insn_bytes):
 853 |         # Format RRI4 (t, s, r vary)
 854 |         return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._lsc4_map)
 855 | 
 856 |     _decode_L32E = mnem("L32E", "RRI4")
 857 |     _decode_S32E = mnem("S32E", "RRI4")
 858 | 
 859 |     _fp0_map = [
 860 |         "ADD_S", "SUB_S", "MUL_S", None, # None is reserved
 861 |         "MADD_S", "MSUB_S", None, None,
 862 |         "ROUND_S", "TRUNC_S", "FLOOR_S", "CEIL_S",
 863 |         "FLOAT_S", "UFLOAT_S", "UTRUNC_S", "FP1OP",
 864 |     ]
 865 |     @classmethod
 866 |     def _decode_FP0(cls, insn, insn_bytes):
 867 |         # Format RRR (t, s, r vary)
 868 |         return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._fp0_map)
 869 | 
 870 |     _decode_ADD_S = mnem("ADD_S", "RRR")
 871 |     _decode_SUB_S = mnem("SUB_S", "RRR")
 872 |     _decode_MUL_S = mnem("MUL_S", "RRR")
 873 |     _decode_MADD_S = mnem("MADD_S", "RRR")
 874 |     _decode_MSUB_S = mnem("MSUB_S", "RRR")
 875 |     _decode_ROUND_S = mnem("ROUND_S", "RRR")
 876 |     _decode_TRUNC_S = mnem("TRUNC_S", "RRR")
 877 |     _decode_FLOOR_S = mnem("FLOOR_S", "RRR")
 878 |     _decode_CEIL_S = mnem("CEIL_S", "RRR")
 879 |     _decode_FLOAT_S = mnem("FLOAT_S", "RRR")
 880 |     _decode_UFLOAT_S = mnem("UFLOAT_S", "RRR")
 881 |     _decode_UTRUNC_S = mnem("UTRUNC_S", "RRR")
 882 | 
 883 |     _fp1op_map = [
 884 |         "MOV_S", "ABS_S", None, None, # None is reserved
 885 |         "RFR", "WFR", "NEG_S", None,
 886 |         None, None, None, None,
 887 |         None, None, None, None,
 888 |     ]
 889 |     @classmethod
 890 |     def _decode_FP1OP(cls, insn, insn_bytes):
 891 |         # Format RRR (s, r vary)
 892 |         return cls._do_tbl_layer(insn, insn_bytes, "t", cls._fp1op_map)
 893 | 
 894 |     _decode_MOV_S = mnem("MOV.S", "RRR")
 895 |     _decode_ABS_S = mnem("ABS.S", "RRR")
 896 |     _decode_RFR = mnem("RFR", "RRR")
 897 |     _decode_WFR = mnem("WFR", "RRR")
 898 |     _decode_NEG_S = mnem("NEG.S", "RRR")
 899 | 
 900 |     _fp1_map = [
 901 |         None, "UN_S", "OEQ_S", "UEQ_S", # None is reserved
 902 |         "OLT_S", "ULT_S", "OLE_S", "ULE_S",
 903 |         "MOVEQZ_S", "MOVNEZ_S", "MOVLTZ_S", "MOVGEZ_S",
 904 |         "MOVF_S", "MOVT_S", None, None,
 905 |     ]
 906 |     @classmethod
 907 |     def _decode_FP1(cls, insn, insn_bytes):
 908 |         # Format RRR (t, s, r vary)
 909 |         return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._fp1_map)
 910 | 
 911 |     _decode_UN_S = mnem("UN.S", "RRR")
 912 |     _decode_OEQ_S = mnem("OEQ.S", "RRR")
 913 |     _decode_UEQ_S = mnem("UEQ.S", "RRR")
 914 |     _decode_OLT_S = mnem("OLT.S", "RRR")
 915 |     _decode_ULT_S = mnem("ULT.S", "RRR")
 916 |     _decode_OLE_S = mnem("OLE.S", "RRR")
 917 |     _decode_ULE_S = mnem("ULE.S", "RRR")
 918 |     _decode_MOVEQZ_S = mnem("MOVEQZ.S", "RRR")
 919 |     _decode_MOVNEZ_S = mnem("MOVNEZ.S", "RRR")
 920 |     _decode_MOVLTZ_S = mnem("MOVLTZ.S", "RRR")
 921 |     _decode_MOVGEZ_S = mnem("MOVGEZ.S", "RRR")
 922 |     _decode_MOVF_S = mnem("MOVF.S", "RRR")
 923 |     _decode_MOVT_S = mnem("MOVT.S", "RRR")
 924 | 
 925 |     _lsai_map = [
 926 |         "L8UI", "L16UI", "L32I", None, # None is reserved
 927 |         "S8I", "S16I", "S32I", "CACHE",
 928 |         None, "L16SI", "MOVI", "L32AI",
 929 |         "ADDI", "ADDMI", "S32C1I", "S32RI",
 930 |     ]
 931 |     @classmethod
 932 |     def _decode_LSAI(cls, insn, insn_bytes):
 933 |         # Formats RRI8 and RRI4 (t, s, imm8 vary)
 934 |         return cls._do_tbl_layer(insn, insn_bytes, "r", cls._lsai_map)
 935 | 
 936 |     _decode_L8UI = mnem("L8UI", "RRI8")
 937 |     _decode_L16UI = mnem("L16UI", "RRI8",
 938 |                          inline0=lambda insn, _: insn.imm8 << 1)
 939 |     _decode_L32I = mnem("L32I", "RRI8",
 940 |                         inline0=lambda insn, _: insn.imm8 << 2)
 941 |     _decode_S8I = mnem("S8I", "RRI8")
 942 |     _decode_S16I = mnem("S16I", "RRI8",
 943 |                         inline0=lambda insn, _: insn.imm8 << 1)
 944 |     _decode_S32I = mnem("S32I", "RRI8",
 945 |                         inline0=lambda insn, _: insn.imm8 << 2)
 946 |     _decode_L16SI = mnem("L16SI", "RRI8",
 947 |                          inline0=lambda insn, _: insn.imm8 << 1)
 948 |     _decode_MOVI = mnem("MOVI", "RRI8",
 949 |                         inline0=lambda insn, _:
 950 |                             sign_extend((insn.s << 8) + insn.imm8, 12)
 951 |                         )
 952 |     _decode_L32AI = mnem("L32AI", "RRI8",
 953 |                          inline0=lambda insn, _: insn.imm8 << 2)
 954 |     _decode_ADDI = mnem("ADDI", "RRI8")
 955 |     _decode_ADDMI = mnem("ADDMI", "RRI8")
 956 |     _decode_S32C1I = mnem("S32C1I", "RRI8")
 957 |     _decode_S32RI = mnem("S32RI", "RRI8",
 958 |                          inline0=lambda insn, _: insn.imm8 << 2)
 959 | 
 960 |     _cache_map = [
 961 |         "DPFR", "DPFW", "DPFRO", "DPFWO",
 962 |         "DHWB", "DHWBI", "DHI", "DII",
 963 |         "DCE", None, None, None, # None is reserved
 964 |         "IPF", "ICE", "IHI", "III",
 965 |     ]
 966 |     @classmethod
 967 |     def _decode_CACHE(cls, insn, insn_bytes):
 968 |         # Formats RRI8 and RRI4 (s, imm8 vary)
 969 |         return cls._do_tbl_layer(insn, insn_bytes, "t", cls._cache_map)
 970 | 
 971 |     _decode_DPFR = mnem("DPFR", "RRI8")
 972 |     _decode_DPFW = mnem("DPFW", "RRI8")
 973 |     _decode_DPFRO = mnem("DPFRO", "RRI8")
 974 |     _decode_DPFWO = mnem("DPFWO", "RRI8")
 975 |     _decode_DHWB = mnem("DHWB", "RRI8")
 976 |     _decode_DHWBI = mnem("DHWBI", "RRI8")
 977 |     _decode_DHI = mnem("DHI", "RRI8")
 978 |     _decode_DII = mnem("DII", "RRI8")
 979 |     _decode_IPF = mnem("IPF", "RRI8")
 980 |     _decode_IHI = mnem("IHI", "RRI8")
 981 |     _decode_III = mnem("III", "RRI8")
 982 | 
 983 |     _dce_map = [
 984 |         "DPFL", None, "DHU", "DIU", # None is reserved
 985 |         "DIWB", "DIWBI", None, None,
 986 |         None, None, None, None,
 987 |         None, None, None, None,
 988 |     ]
 989 |     @classmethod
 990 |     def _decode_DCE(cls, insn, insn_bytes):
 991 |         # Format RRI4 (s, imm4 vary)
 992 |         return cls._do_tbl_layer(insn, insn_bytes, "op1", cls._dce_map)
 993 | 
 994 |     _decode_DPFL = mnem("DPFL", "RRI4")
 995 |     _decode_DHU = mnem("DHU", "RRI4")
 996 |     _decode_DIU = mnem("DIU", "RRI4")
 997 |     _decode_DIWB = mnem("DIWB", "RRI4")
 998 |     _decode_DIWBI = mnem("DIWBI", "RRI4")
 999 | 
1000 |     _ice_map = [
1001 |         "IPFL", None, "IHU", "IIU", # None is reserved
1002 |         None, None, None, None,
1003 |         None, None, None, None,
1004 |         None, None, None, None,
1005 |     ]
1006 |     @classmethod
1007 |     def _decode_ICE(cls, insn, insn_bytes):
1008 |         # Format RRI4 (s, imm4 vary)
1009 |         return cls._do_tbl_layer(insn, insn_bytes, "op1", cls._ice_map)
1010 | 
1011 |     _decode_IPFL = mnem("IPFL", "RRI4")
1012 |     _decode_IHU = mnem("IHU", "RRI4")
1013 |     _decode_IIU = mnem("IIU", "RRI4")
1014 | 
1015 |     _lsci_map = [
1016 |         "LSI", None, None, None, # None is reserved
1017 |         "SSI", None, None, None,
1018 |         "LSIU", None, None, None,
1019 |         "SSIU", None, None, None,
1020 |     ]
1021 |     @classmethod
1022 |     def _decode_LSCI(cls, insn, insn_bytes):
1023 |         # format RRI8 (t, s, imm8 vary)
1024 |         return cls._do_tbl_layer(insn, insn_bytes, "r", cls._lsci_map)
1025 | 
1026 |     _decode_LSI = mnem("LSI", "RRI8")
1027 |     _decode_SSI = mnem("SSI", "RRI8")
1028 |     _decode_LSIU = mnem("LSIU", "RRI8")
1029 |     _decode_SSIU = mnem("SSIU", "RRI8")
1030 | 
1031 |     _mac16_map = [
1032 |         "MACID", "MACCD", "MACDD", "MACAD",
1033 |         "MACIA", "MACCA", "MACDA", "MACAA",
1034 |         "MACI", "MACC", None, None, # None is reserved
1035 |         None, None, None, None,
1036 |     ]
1037 |     @classmethod
1038 |     def _decode_MAC16(cls, insn, insn_bytes):
1039 |         # format RRR (t, s, r, op1 vary)
1040 |         return cls._do_tbl_layer(insn, insn_bytes, "op2", cls._mac16_map)
1041 | 
1042 |     # TODO: Skipping this MAC stuff, seems like a vector processor, that I doubt
1043 |     # the ESP8266 has... 
1044 | 
1045 |     _calln_map = [
1046 |         "CALL0", "CALL4", "CALL8", "CALL12",
1047 |     ]
1048 |     @classmethod
1049 |     def _decode_CALLN(cls, insn, insn_bytes):
1050 |         # Format CALL (offset varies)
1051 |         return cls._do_tbl_layer(insn, insn_bytes, "n", cls._calln_map)
1052 | 
1053 |     _decode_CALL0 = mnem("CALL0", "CALL")
1054 |     _decode_CALL4 = mnem("CALL4", "CALL")
1055 |     _decode_CALL8 = mnem("CALL8", "CALL")
1056 |     _decode_CALL12 = mnem("CALL12", "CALL")
1057 | 
1058 |     _si_map = [
1059 |         "J", "BZ", "BI0", "BI1",
1060 |     ]
1061 |     @classmethod
1062 |     def _decode_SI(cls, insn, insn_bytes):
1063 |         # Formats CALL, BRI8 and BRI12 (offset varies)
1064 |         return cls._do_tbl_layer(insn, insn_bytes, "n", cls._si_map)
1065 | 
1066 |     _decode_J = mnem("J", "CALL")
1067 | 
1068 |     _bz_map = [
1069 |         "BEQZ", "BNEZ", "BLTZ", "BGEZ",
1070 |     ]
1071 |     @classmethod
1072 |     def _decode_BZ(cls, insn, insn_bytes):
1073 |         # Format BRI12 (s, imm12 vary)
1074 |         return cls._do_tbl_layer(insn, insn_bytes, "m", cls._bz_map)
1075 | 
1076 |     _decode_BEQZ = mnem("BEQZ", "BRI12")
1077 |     _decode_BNEZ = mnem("BNEZ", "BRI12")
1078 |     _decode_BLTZ = mnem("BLTZ", "BRI12")
1079 |     _decode_BGEZ = mnem("BGEZ", "BRI12")
1080 | 
1081 |     _bi0_map = [
1082 |         "BEQI", "BNEI", "BLTI", "BGEI",
1083 |     ]
1084 |     @classmethod
1085 |     def _decode_BI0(cls, insn, insn_bytes):
1086 |         # Format BRI8 (s, r, imm8 vary)
1087 |         return cls._do_tbl_layer(insn, insn_bytes, "m", cls._bi0_map)
1088 | 
1089 |     _decode_BEQI = mnem("BEQI", "BRI8")
1090 |     _decode_BNEI = mnem("BNEI", "BRI8")
1091 |     _decode_BLTI = mnem("BLTI", "BRI8")
1092 |     _decode_BGEI = mnem("BGEI", "BRI8")
1093 | 
1094 |     _bi1_map = [
1095 |         "ENTRY",
1096 |         "B1",
1097 |         "BLTUI",
1098 |         "BGEUI",
1099 |     ]
1100 |     @classmethod
1101 |     def _decode_BI1(cls, insn, insn_bytes):
1102 |         # Formats BRI8 and BRI12 (s, r, imm8 vary)
1103 |         return cls._do_tbl_layer(insn, insn_bytes, "m", cls._bi1_map)
1104 | 
1105 |     _decode_ENTRY = mnem("ENTRY", "BRI12",
1106 |                          inline0=lambda insn, _: insn.imm12 << 3)
1107 |     _decode_BLTUI = mnem("BLTUI", "BRI8")
1108 |     _decode_BGEUI = mnem("BGEUI", "BRI8")
1109 | 
1110 |     _b1_map = [
1111 |         "BF", "BT", None, None, # None is reserved
1112 |         None, None, None, None,
1113 |         "LOOP", "LOOPNEZ", "LOOPGTZ", None,
1114 |         None, None, None, None,
1115 |     ]
1116 |     @classmethod
1117 |     def _decode_B1(cls, insn, insn_bytes):
1118 |         # Format BRI8 (s, imm8 vary)
1119 |         return cls._do_tbl_layer(insn, insn_bytes, "r", cls._b1_map)
1120 | 
1121 |     _decode_BF = mnem("BF", "BRI8")
1122 |     _decode_BT = mnem("BT", "BRI8")
1123 |     _decode_LOOP = mnem("LOOP", "BRI8")
1124 |     _decode_LOOPNEZ = mnem("LOOPNEZ", "BRI8")
1125 |     _decode_LOOPGTZ = mnem("LOOPGTZ", "BRI8")
1126 | 
1127 |     _b_map = [
1128 |         "BNONE", "BEQ", "BLT", "BLTU",
1129 |         "BALL", "BBC", "BBCI", "BBCI",
1130 |         "BANY", "BNE", "BGE", "BGEU",
1131 |         "BNALL", "BBS", "BBSI", "BBSI"
1132 |     ]
1133 |     @classmethod
1134 |     def _decode_B(cls, insn, insn_bytes):
1135 |         # Format RRI8 (t, s, imm8 vary)
1136 |         return cls._do_tbl_layer(insn, insn_bytes, "r", cls._b_map)
1137 | 
1138 |     _decode_BNONE = mnem("BNONE", "RRI8")
1139 |     _decode_BEQ = mnem("BEQ", "RRI8")
1140 |     _decode_BLT = mnem("BLT", "RRI8")
1141 |     _decode_BLTU = mnem("BLTU", "RRI8")
1142 |     _decode_BALL = mnem("BALL", "RRI8")
1143 |     _decode_BBC = mnem("BBC", "RRI8")
1144 |     _decode_BBCI = mnem("BBCI", "RRI8",
1145 |                         inline0=lambda insn, _: insn.t + ((insn.r & 1) << 4))
1146 |     _decode_BANY = mnem("BANY", "RRI8")
1147 |     _decode_BNE = mnem("BNE", "RRI8")
1148 |     _decode_BGE = mnem("BGE", "RRI8")
1149 |     _decode_BGEU = mnem("BGEU", "RRI8")
1150 |     _decode_BNALL = mnem("BNALL", "RRI8")
1151 |     _decode_BBS = mnem("BBS", "RRI8")
1152 |     _decode_BBSI = mnem("BBSI", "RRI8",
1153 |                         inline0=lambda insn, _: insn.t + ((insn.r & 1) << 4))
1154 | 
1155 |     _st2_map = [
1156 |         "MOVI_N", "MOVI_N", "MOVI_N", "MOVI_N",
1157 |         "MOVI_N", "MOVI_N", "MOVI_N", "MOVI_N",
1158 |         "BEQZ_N", "BEQZ_N", "BEQZ_N", "BEQZ_N",
1159 |         "BNEZ_N", "BNEZ_N", "BNEZ_N", "BNEZ_N",
1160 |     ]
1161 |     @classmethod
1162 |     def _decode_ST2(cls, insn, insn_bytes):
1163 |         # Formats RI7 and RI6 (s, r vary)
1164 |         return cls._do_tbl_layer(insn, insn_bytes, "t", cls._st2_map)
1165 | 
1166 |     _decode_MOVI_N = mnem("MOVI.N", "RI7",
1167 |                           inline0=lambda insn, _:
1168 |                                sign_extend(insn.imm7, 7) if
1169 |                                # Sign-extending the 7-bit value with the logical
1170 |                                # and of its two most significant bits
1171 |                                ((insn.imm7 >> 5) == 3) else
1172 |                                insn.imm7
1173 |                           )
1174 |     _decode_BEQZ_N = mnem("BEQZ.N", "RI6")
1175 |     _decode_BNEZ_N = mnem("BNEZ.N", "RI6")
1176 | 
1177 |     _st3_map = [
1178 |         "MOV_N", None, None, None, # None is reserved
1179 |         None, None, None, None,
1180 |         None, None, None, None,
1181 |         None, None, None, "S3",
1182 |     ]
1183 |     @classmethod
1184 |     def _decode_ST3(cls, insn, insn_bytes):
1185 |         # Format RRRN (t, s vary)
1186 |         return cls._do_tbl_layer(insn, insn_bytes, "r", cls._st3_map)
1187 | 
1188 |     _decode_MOV_N = mnem("MOV.N", "RRRN")
1189 | 
1190 |     _s3_map = [
1191 |         "RET_N", "RETW_N", "BREAK_N", "NOP_N",
1192 |         None, None, "ILL_N", None, # None is reserved
1193 |         None, None, None, None,
1194 |         None, None, None, None,
1195 |     ]
1196 |     @classmethod
1197 |     def _decode_S3(cls, insn, insn_bytes):
1198 |         # Format RRRN (no fields vary)
1199 |         return cls._do_tbl_layer(insn, insn_bytes, "t", cls._s3_map)
1200 | 
1201 |     _decode_RET_N = mnem("RET.N", "RRRN")
1202 |     _decode_RETW_N = mnem("RETW.N", "RRRN")
1203 |     _decode_BREAK_N = mnem("BREAK.N", "RRRN")
1204 |     _decode_NOP_N = mnem("NOP.N", "RRRN")
1205 |     _decode_ILL_N = mnem("ILL.N", "RRRN")
1206 | 
1207 |     # Here's where we do the per-format decoding. This isn't quite as useful as
1208 |     # I thought it would be, since Xtensa's instruction formats are not at all
1209 |     # rigid (they sneak immediates into whatever bits are available, as they
1210 |     # should).
1211 | 
1212 |     # We actually don't keep the instruction bytes around for the disassembly
1213 |     # stage, so everything has to be parsed out somewhere in the decoding stage.
1214 |     @classmethod
1215 |     def _decode_fmt_RRR(cls, insn, insn_bytes):
1216 |         insn.length = 3
1217 |         insn.instruction_type = InstructionType.RRR
1218 |         # EXTUI uses op2 to encode part of its operation, so parse it here
1219 |         insn.op2 = decode_op2(insn_bytes)
1220 |         _decode_components(insn, insn_bytes, ["t", "s", "r"])
1221 | 
1222 |     @classmethod
1223 |     def _decode_fmt_RSR(cls, insn, insn_bytes):
1224 |         insn.instruction_type = InstructionType.RSR
1225 |         insn.length = 3
1226 |         _decode_components(insn, insn_bytes, ["t", "sr"])
1227 | 
1228 |     @classmethod
1229 |     def _decode_fmt_CALLX(cls, insn, insn_bytes):
1230 |         insn.length = 3
1231 |         insn.instruction_type = InstructionType.CALLX
1232 |         _decode_components(insn, insn_bytes, ["n", "m", "s", "r"])
1233 | 
1234 |     @classmethod
1235 |     def _decode_fmt_RRI4(cls, insn, insn_bytes):
1236 |         insn.length = 3
1237 |         insn.instruction_type = InstructionType.RRI4
1238 |         _decode_components(insn, insn_bytes, ["r", "s", "t", "imm4"])
1239 | 
1240 |     @classmethod
1241 |     def _decode_fmt_RRI8(cls, insn, insn_bytes):
1242 |         insn.length = 3
1243 |         insn.instruction_type = InstructionType.RRI8
1244 |         _decode_components(insn, insn_bytes, ["r", "s", "t", "imm8"])
1245 | 
1246 |     @classmethod
1247 |     def _decode_fmt_RI16(cls, insn, insn_bytes):
1248 |         insn.length = 3
1249 |         insn.instruction_type = InstructionType.RI16
1250 |         _decode_components(insn, insn_bytes, ["t", "imm16"])
1251 | 
1252 |     @classmethod
1253 |     def _decode_fmt_CALL(cls, insn, insn_bytes):
1254 |         insn.length = 3
1255 |         insn.instruction_type = InstructionType.CALL
1256 |         _decode_components(insn, insn_bytes, ["n", "offset"])
1257 | 
1258 |     @classmethod
1259 |     def _decode_fmt_BRI8(cls, insn, insn_bytes):
1260 |         insn.length = 3
1261 |         insn.instruction_type = InstructionType.BRI8
1262 |         _decode_components(insn, insn_bytes, ["r", "s", "m", "n", "imm8"])
1263 | 
1264 |     @classmethod
1265 |     def _decode_fmt_BRI12(cls, insn, insn_bytes):
1266 |         insn.length = 3
1267 |         insn.instruction_type = InstructionType.BRI12
1268 |         _decode_components(insn, insn_bytes, ["s", "m", "n", "imm12"])
1269 | 
1270 |     @classmethod
1271 |     def _decode_fmt_RRRN(cls, insn, insn_bytes):
1272 |         insn.length = 2
1273 |         insn.instruction_type = InstructionType.RRRN
1274 |         _decode_components(insn, insn_bytes, ["r", "s", "t"])
1275 | 
1276 |     @classmethod
1277 |     def _decode_fmt_RI7(cls, insn, insn_bytes):
1278 |         insn.length = 2
1279 |         insn.instruction_type = InstructionType.RI7
1280 |         _decode_components(insn, insn_bytes, ["s", "i", "imm7"])
1281 | 
1282 |     @classmethod
1283 |     def _decode_fmt_RI6(cls, insn, insn_bytes):
1284 |         insn.length = 2
1285 |         insn.instruction_type = InstructionType.RI6
1286 |         _decode_components(insn, insn_bytes, ["s", "i", "z", "imm6"])
1287 | 


--------------------------------------------------------------------------------