├── .gitignore ├── .gitmodules ├── README.md ├── flirt └── .gitignore ├── img ├── 1.png ├── 2.png └── 3.png ├── requirements.txt ├── samples ├── hello_world.so ├── openbook.so ├── raydium-pool.so └── test.so ├── solana-init.py └── solana ├── __init__.py ├── config.py ├── constants.py ├── helpers.py ├── processor.py ├── relocations.py └── strings.py /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | *.swp 3 | __pycache__ 4 | *.sqlite 5 | *.i64 6 | *.id* 7 | *.nam 8 | *.til 9 | -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "flirt/solana-ida-signatures-factory"] 2 | path = flirt/solana-ida-signatures-factory 3 | url = https://github.com/PassKeyRa/solana-ida-signatures-factory 4 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # IDA Pro Solana bytecode processor 2 | 3 | This is the processor plugin for IDA Pro that adds the ability to analyze Solana Virtual Machine bytecode. Since SVM is based on the enhanced Berkeley Packet Filter (eBPF) and mostly uses the same instruction set, [this](https://github.com/zandi/eBPF_processor) eBPF processor plugin was used as a basis. 4 | 5 | ## How to use 6 | 7 | Install `requirements.txt`. Copy `solana-init.py` script and the `solana` folder to the directory `/procs` and select the processor on a Solana program file loading to IDA. 8 | 9 | To dump a program from Solana mainnet use the following command: 10 | 11 | ``` 12 | solana program dump
13 | ``` 14 | 15 | During the file import into IDA you may encounter the following error: 16 | 17 | ![](./img/1.png) 18 | 19 | This is because the Solana EBPF processor should be selected explicitly. Double-click on the processor name in the processor list and select it: 20 | 21 | ![](./img/2.png) 22 | 23 | Then select Yes: 24 | 25 | ![](./img/3.png) 26 | 27 | 28 | ## FLIRT signatures 29 | 30 | Proceed to the [solana-ida-signatures-factory](https://github.com/PassKeyRa/solana-ida-signatures-factory) repository to generate function signatures. 31 | 32 | ## What works now 33 | 34 | * Solana eBPF instructions disassembling, including function calls and jumps 35 | * Strings detection 36 | * Relocations detection 37 | * FLAIR preprocessor to generate PAT files with libs functions signatures 38 | 39 | ## TODO 40 | 41 | * Parse and name Anchor functions and structures 42 | 43 | ## Thanks 44 | 45 | Thanks to Clément Berthaux (clement (dot) berthaux (at) synacktiv (dot) com) and Michael Zandi (the (dot) zandi (at) gmail (dot) com) for developing the EBPF processor plugin, which is the base for this plugin. -------------------------------------------------------------------------------- /flirt/.gitignore: -------------------------------------------------------------------------------- 1 | libs 2 | -------------------------------------------------------------------------------- /img/1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Decurity/solana-ebpf-ida-processor/aacd215907266190ed9c6c1b408ca9309f92ecdd/img/1.png -------------------------------------------------------------------------------- /img/2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Decurity/solana-ebpf-ida-processor/aacd215907266190ed9c6c1b408ca9309f92ecdd/img/2.png -------------------------------------------------------------------------------- /img/3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Decurity/solana-ebpf-ida-processor/aacd215907266190ed9c6c1b408ca9309f92ecdd/img/3.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | pyelftools 2 | rust-demangler -------------------------------------------------------------------------------- /samples/hello_world.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Decurity/solana-ebpf-ida-processor/aacd215907266190ed9c6c1b408ca9309f92ecdd/samples/hello_world.so -------------------------------------------------------------------------------- /samples/openbook.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Decurity/solana-ebpf-ida-processor/aacd215907266190ed9c6c1b408ca9309f92ecdd/samples/openbook.so -------------------------------------------------------------------------------- /samples/raydium-pool.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Decurity/solana-ebpf-ida-processor/aacd215907266190ed9c6c1b408ca9309f92ecdd/samples/raydium-pool.so -------------------------------------------------------------------------------- /samples/test.so: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Decurity/solana-ebpf-ida-processor/aacd215907266190ed9c6c1b408ca9309f92ecdd/samples/test.so -------------------------------------------------------------------------------- /solana-init.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import pathlib 3 | 4 | from idaapi import * 5 | 6 | # Add the path to the IDA procs directory to the sys.path 7 | ida_procs_dir = pathlib.Path(__file__).parent.parent / 'procs' 8 | if ida_procs_dir not in sys.path: 9 | sys.path.append(str(ida_procs_dir)) 10 | 11 | from solana.processor import EBPFProc 12 | 13 | def PROCESSOR_ENTRY(): 14 | return EBPFProc() 15 | -------------------------------------------------------------------------------- /solana/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Decurity/solana-ebpf-ida-processor/aacd215907266190ed9c6c1b408ca9309f92ecdd/solana/__init__.py -------------------------------------------------------------------------------- /solana/config.py: -------------------------------------------------------------------------------- 1 | STRINGS_PREVIEW_LIMIT = 40 -------------------------------------------------------------------------------- /solana/constants.py: -------------------------------------------------------------------------------- 1 | # BPF ALU defines from uapi/linux/bpf_common.h 2 | # Mainly using these for disassembling atomic instructions 3 | BPF_ADD = 0x00 4 | BPF_SUB = 0x10 5 | BPF_MUL = 0x20 6 | BPF_DIV = 0x30 7 | BPF_OR = 0x40 8 | BPF_AND = 0x50 9 | BPF_LSH = 0x60 10 | BPF_RSH = 0x70 11 | BPF_NEG = 0x80 12 | BPF_MOD = 0x90 13 | BPF_XOR = 0xa0 14 | 15 | # and these atomic-specific constants from include/uapi/linux/bpf.h 16 | # /* atomic op type fields (stored in immediate) */ 17 | BPF_FETCH = 0x01 # /* not an opcode on its own, used to build others */ 18 | BPF_XCHG = (0xe0 | BPF_FETCH) # /* atomic exchange */ 19 | BPF_CMPXCHG = (0xf0 | BPF_FETCH) # /* atomic compare-and-write */ 20 | 21 | # being lazy, we only use this for atomic ops so far 22 | bpf_alu_string = {BPF_ADD: 'add', BPF_AND: 'and', BPF_OR: 'or', BPF_XOR: 'xor'} 23 | 24 | # Three least significant bits are operation class: 25 | ## BPF operation class: load from immediate. [DEPRECATED] 26 | BPF_LD = 0x00 27 | ## BPF operation class: load from register. 28 | BPF_LDX = 0x01 29 | ## BPF operation class: store immediate. 30 | BPF_ST = 0x02 31 | ## BPF operation class: store value from register. 32 | BPF_STX = 0x03 33 | ## BPF operation class: 32 bits arithmetic operation. 34 | BPF_ALU = 0x04 35 | ## BPF operation class: jump. 36 | BPF_JMP = 0x05 37 | ## BPF operation class: product / quotient / remainder. 38 | BPF_PQR = 0x06 39 | ## BPF operation class: 64 bits arithmetic operation. 40 | BPF_ALU64 = 0x07 41 | 42 | # Size modifiers: 43 | ## BPF size modifier: word (4 bytes). 44 | BPF_W = 0x00 45 | ## BPF size modifier: half-word (2 bytes). 46 | BPF_H = 0x08 47 | ## BPF size modifier: byte (1 byte). 48 | BPF_B = 0x10 49 | ## BPF size modifier: double word (8 bytes). 50 | BPF_DW = 0x18 51 | 52 | # Mode modifiers: 53 | ## BPF mode modifier: immediate value. 54 | BPF_IMM = 0x00 55 | ## BPF mode modifier: absolute load. 56 | BPF_ABS = 0x20 57 | ## BPF mode modifier: indirect load. 58 | BPF_IND = 0x40 59 | ## BPF mode modifier: load from / store to memory. 60 | BPF_MEM = 0x60 61 | # [ 0x80 reserved ] 62 | # [ 0xa0 reserved ] 63 | # [ 0xc0 reserved ] 64 | 65 | # For arithmetic (BPF_ALU/BPF_ALU64) and jump (BPF_JMP) instructions: 66 | # +----------------+--------+--------+ 67 | # | 4 bits |1 b.| 3 bits | 68 | # | operation code | src| insn class | 69 | # +----------------+----+------------+ 70 | # (MSB) (LSB) 71 | 72 | # Source modifiers: 73 | ## BPF source operand modifier: 32-bit immediate value. 74 | BPF_K = 0x00 75 | ## BPF source operand modifier: `src` register. 76 | BPF_X = 0x08 77 | 78 | # Operation codes -- BPF_ALU or BPF_ALU64 classes: 79 | ## BPF ALU/ALU64 operation code: addition. 80 | BPF_ADD = 0x00 81 | ## BPF ALU/ALU64 operation code: subtraction. 82 | BPF_SUB = 0x10 83 | ## BPF ALU/ALU64 operation code: multiplication. [DEPRECATED] 84 | BPF_MUL = 0x20 85 | ## BPF ALU/ALU64 operation code: division. [DEPRECATED] 86 | BPF_DIV = 0x30 87 | ## BPF ALU/ALU64 operation code: or. 88 | BPF_OR = 0x40 89 | ## BPF ALU/ALU64 operation code: and. 90 | BPF_AND = 0x50 91 | ## BPF ALU/ALU64 operation code: left shift. 92 | BPF_LSH = 0x60 93 | ## BPF ALU/ALU64 operation code: right shift. 94 | BPF_RSH = 0x70 95 | ## BPF ALU/ALU64 operation code: negation. [DEPRECATED] 96 | BPF_NEG = 0x80 97 | ## BPF ALU/ALU64 operation code: modulus. [DEPRECATED] 98 | BPF_MOD = 0x90 99 | ## BPF ALU/ALU64 operation code: exclusive or. 100 | BPF_XOR = 0xa0 101 | ## BPF ALU/ALU64 operation code: move. 102 | BPF_MOV = 0xb0 103 | ## BPF ALU/ALU64 operation code: sign extending right shift. 104 | BPF_ARSH = 0xc0 105 | ## BPF ALU/ALU64 operation code: endianness conversion. 106 | BPF_END = 0xd0 107 | ## BPF ALU/ALU64 operation code: high or. 108 | BPF_HOR = 0xf0 109 | 110 | # Operation codes -- BPF_PQR class: 111 | # 7 6 5 4 3 2-0 112 | # 0 Unsigned Multiplication Product Lower Half / Quotient 32 Bit Immediate PQR 113 | # 1 Signed Division Product Upper Half / Remainder 64 Bit Register PQR 114 | ## BPF PQR operation code: unsigned high multiplication. 115 | BPF_UHMUL = 0x20 116 | ## BPF PQR operation code: unsigned division quotient. 117 | BPF_UDIV = 0x40 118 | ## BPF PQR operation code: unsigned division remainder. 119 | BPF_UREM = 0x60 120 | ## BPF PQR operation code: low multiplication. 121 | BPF_LMUL = 0x80 122 | ## BPF PQR operation code: signed high multiplication. 123 | BPF_SHMUL = 0xA0 124 | ## BPF PQR operation code: signed division quotient. 125 | BPF_SDIV = 0xC0 126 | ## BPF PQR operation code: signed division remainder. 127 | BPF_SREM = 0xE0 128 | 129 | # Operation codes -- BPF_JMP class: 130 | ## BPF JMP operation code: jump. 131 | BPF_JA = 0x00 132 | ## BPF JMP operation code: jump if equal. 133 | BPF_JEQ = 0x10 134 | ## BPF JMP operation code: jump if greater than. 135 | BPF_JGT = 0x20 136 | ## BPF JMP operation code: jump if greater or equal. 137 | BPF_JGE = 0x30 138 | ## BPF JMP operation code: jump if `src` & `reg`. 139 | BPF_JSET = 0x40 140 | ## BPF JMP operation code: jump if not equal. 141 | BPF_JNE = 0x50 142 | ## BPF JMP operation code: jump if greater than (signed). 143 | BPF_JSGT = 0x60 144 | ## BPF JMP operation code: jump if greater or equal (signed). 145 | BPF_JSGE = 0x70 146 | ## BPF JMP operation code: syscall function call. 147 | BPF_CALL = 0x80 148 | ## BPF JMP operation code: return from program. 149 | BPF_EXIT = 0x90 150 | ## BPF JMP operation code: jump if lower than. 151 | BPF_JLT = 0xa0 152 | ## BPF JMP operation code: jump if lower or equal. 153 | BPF_JLE = 0xb0 154 | ## BPF JMP operation code: jump if lower than (signed). 155 | BPF_JSLT = 0xc0 156 | ## BPF JMP operation code: jump if lower or equal (signed). 157 | BPF_JSLE = 0xd0 158 | 159 | 160 | # RELOCATIONS 161 | 162 | REL_TYPE = { 163 | 0: 'R_BPF_NONE', 164 | 1: 'R_BPF_64_64', 165 | 2: 'R_BPF_64_ABS64', 166 | 3: 'R_BPF_64_ABS32', 167 | 4: 'R_BPF_64_NODYLD32', 168 | 8: 'R_BPF_64_RELATIVE', # SOLANA SPEC (https://github.com/solana-labs/llvm-project/blob/038d472bcd0b82ff768b515cc77dfb1e3a396ca8/llvm/include/llvm/BinaryFormat/ELFRelocs/BPF.def#L11) 169 | 10: 'R_BPF_64_32' 170 | } 171 | 172 | REL_PATCH_SIZE = { 173 | 0: None, 174 | 1: 32, 175 | 2: 64, 176 | 3: 32, 177 | 4: 32, 178 | 8: 32, 179 | 10: 32 180 | } -------------------------------------------------------------------------------- /solana/helpers.py: -------------------------------------------------------------------------------- 1 | def decode_name(name): 2 | name = name.replace('.rel.text.','') 3 | name = name.replace('.rel.data.rel.ro.','') 4 | return name -------------------------------------------------------------------------------- /solana/processor.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # "THE BEER-WARE LICENSE" (Revision 42): 3 | # wrote this file. As long as you 4 | # retain this notice you can do whatever you want with this stuff. If we meet 5 | # some day, and you think this stuff is worth it, you can buy me a beer in 6 | # return. Clement Berthaux 7 | # ---------------------------------------------------------------------------- 8 | 9 | from idaapi import (PR_ASSEMBLE, PR_SEGS, PR_DEFSEG32, PR_USE32, PRN_HEX, PR_RNAMESOK, PR_NO_SEGMOVE, 10 | ASH_HEXF3, AS_UNEQU, AS_COLON, ASB_BINF4, AS_N2CHR, 11 | SN_NOCHECK, SN_FORCE, STKVAR_VALID_SIZE, 12 | CF_USE1, CF_USE2, CF_USE3, CF_CHG1, 13 | CF_JUMP, CF_CALL, CF_STOP, 14 | o_reg, o_imm, dt_dword, dt_qword, 15 | o_near, o_displ, dt_word, o_phrase, o_mem, 16 | REF_OFF32, 17 | dr_O, dr_R, 18 | fl_F, fl_CN, fl_CF, fl_JN, 19 | STRTYPE_TERMCHR, 20 | OOF_SIGNED, OOFS_NEEDSIGN, OOFW_IMM, OOFW_64, OOFW_32, OOFW_16) 21 | 22 | 23 | import rust_demangler 24 | import idaapi 25 | import idautils 26 | import ctypes 27 | import os 28 | 29 | from solana.relocations import parse_relocation, process_relocations, apply_rel_mods, apply_relocation 30 | from solana.helpers import decode_name 31 | from solana.strings import add_string, recover_known_strings 32 | from solana.constants import * 33 | from solana.config import * 34 | 35 | class DecodingError(Exception): 36 | pass 37 | 38 | class EBPFProc(idaapi.processor_t): 39 | id = 0x8000 + 247 # 0x8000+ are reserved for third party plugins 40 | flag = PR_ASSEMBLE | PR_SEGS | PR_DEFSEG32 | PR_USE32 | PRN_HEX | PR_RNAMESOK | PR_NO_SEGMOVE 41 | cnbits = 8 42 | dnbits = 8 43 | psnames = ['EBPF'] 44 | plnames = ['Solana VM'] 45 | segreg_size = 0 46 | instruc_start = 0 47 | assembler = { 48 | 'flag': ASH_HEXF3 | AS_UNEQU | AS_COLON | ASB_BINF4 | AS_N2CHR, 49 | "uflag": 0, 50 | "name": "eBPF", 51 | "origin": ".org", 52 | "end": ".end", 53 | "cmnt": ";", 54 | "ascsep": '"', 55 | "accsep": "'", 56 | "esccodes": "\"'", 57 | "a_ascii": "db", 58 | "a_byte": "db", 59 | "a_word": "dw", 60 | 'a_dword': "dd", 61 | 'a_qword': "dq", 62 | "a_bss": "dfs %s", 63 | "a_seg": "seg", 64 | "a_curip": "PC", 65 | "a_public": "", 66 | "a_weak": "", 67 | "a_extrn": ".extern", 68 | "a_comdef": "", 69 | "a_align": ".align", 70 | "lbrace": "(", 71 | "rbrace": ")", 72 | "a_mod": "%", 73 | "a_band": "&", 74 | "a_bor": "|", 75 | "a_xor": "^", 76 | "a_bnot": "~", 77 | "a_shl": "<<", 78 | "a_shr": ">>", 79 | "a_sizeof_fmt": "size %s", 80 | 81 | } 82 | 83 | def __init__(self): 84 | idaapi.processor_t.__init__(self) 85 | 86 | self.init_instructions() 87 | self.init_registers() 88 | 89 | self.relocations = {} 90 | self.functions = {} 91 | self.sorted_strings = [] 92 | 93 | def ev_loader_elf_machine(self, li, machine_type, p_procname, p_pd, loader, reader): # doesn't work from ida python for some reason 94 | if machine_type == 247: 95 | p_procname = 'Solana VM' 96 | return machine_type 97 | 98 | def ev_newfile(self, fname): 99 | for ea, name in idautils.Names(): 100 | name = decode_name(name) 101 | self.functions[name] = ea 102 | idaapi.set_name(ea, name, SN_NOCHECK | SN_FORCE) # demangle function names 103 | seg = idaapi.getseg(ea) 104 | if seg.type == idaapi.SEG_XTRN: # create external functions 105 | idaapi.add_func(ea, ea+8) 106 | 107 | self.relocations, self.funcs, self.rodata, self.symtab = process_relocations(fname) 108 | self.sorted_strings = recover_known_strings(self.sorted_strings, self.symtab) 109 | 110 | return True 111 | 112 | # callback from demangle_name 113 | # since the default demangler in IDA takes C++ names, 114 | # here we replace it with rust_demangler 115 | # returns: [res_from_ev_demangle_name, outbuffer, res_from_demangle_name] 116 | def ev_demangle_name(self, name, disable_mask, demreq): 117 | try: 118 | return [1, rust_demangler.demangle(name), 1] # use rust demangler 119 | except Exception as e: 120 | return [1, name, 1] 121 | 122 | 123 | ''' ----- Instructions processing -----''' 124 | 125 | def init_instructions(self): 126 | # https://github.com/solana-labs/rbpf/blob/179a0f94b68ae0bef892b214750a54448d61b1be/src/ebpf.rs#L205 127 | 128 | self.OPCODES = { 129 | # MEM 130 | BPF_LD | BPF_IMM | BPF_DW: ('lddw', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 131 | BPF_LDX | BPF_MEM | BPF_W: ('ldxw', self._ana_reg_regdisp, CF_USE1|CF_USE2|CF_CHG1), 132 | BPF_LDX | BPF_MEM | BPF_H: ('ldxh', self._ana_reg_regdisp, CF_USE1|CF_USE2|CF_CHG1), 133 | BPF_LDX | BPF_MEM | BPF_B: ('ldxb', self._ana_reg_regdisp, CF_USE1|CF_USE2|CF_CHG1), 134 | BPF_LDX | BPF_MEM | BPF_DW: ('ldxdw', self._ana_reg_regdisp, CF_USE1|CF_USE2|CF_CHG1), 135 | BPF_ST | BPF_MEM | BPF_W: ('stw', self._ana_regdisp_reg, CF_USE1|CF_USE2|CF_CHG1), 136 | BPF_ST | BPF_MEM | BPF_H: ('sth', self._ana_regdisp_reg, CF_USE1|CF_USE2|CF_CHG1), 137 | BPF_ST | BPF_MEM | BPF_B: ('stb', self._ana_regdisp_reg, CF_USE1|CF_USE2|CF_CHG1), 138 | BPF_ST | BPF_MEM | BPF_DW: ('stdw', self._ana_regdisp_reg, CF_USE1|CF_USE2|CF_CHG1), 139 | BPF_STX | BPF_MEM | BPF_W: ('stxw', self._ana_regdisp_reg, CF_USE1|CF_USE2|CF_CHG1), 140 | BPF_STX | BPF_MEM | BPF_H: ('stxh', self._ana_regdisp_reg, CF_USE1|CF_USE2|CF_CHG1), 141 | BPF_STX | BPF_MEM | BPF_B: ('stxb', self._ana_regdisp_reg, CF_USE1|CF_USE2|CF_CHG1), 142 | BPF_STX | BPF_MEM | BPF_DW: ('stxdw', self._ana_regdisp_reg, CF_USE1|CF_USE2|CF_CHG1), 143 | 144 | # ALU 32 145 | BPF_ALU | BPF_K | BPF_ADD: ('add32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 146 | BPF_ALU | BPF_X | BPF_ADD: ('add32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 147 | BPF_ALU | BPF_K | BPF_SUB: ('sub32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 148 | BPF_ALU | BPF_X | BPF_SUB: ('sub32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 149 | BPF_ALU | BPF_K | BPF_MUL: ('mul32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 150 | BPF_ALU | BPF_X | BPF_MUL: ('mul32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 151 | BPF_ALU | BPF_K | BPF_DIV: ('div32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 152 | BPF_ALU | BPF_X | BPF_DIV: ('div32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 153 | BPF_ALU | BPF_K | BPF_OR: ('or32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 154 | BPF_ALU | BPF_X | BPF_OR: ('or32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 155 | BPF_ALU | BPF_K | BPF_AND: ('and32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 156 | BPF_ALU | BPF_X | BPF_AND: ('and32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 157 | BPF_ALU | BPF_K | BPF_LSH: ('lsh32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 158 | BPF_ALU | BPF_X | BPF_LSH: ('lsh32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 159 | BPF_ALU | BPF_K | BPF_RSH: ('rsh32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 160 | BPF_ALU | BPF_X | BPF_RSH: ('rsh32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 161 | BPF_ALU | BPF_NEG: ('neg32', self._ana_1reg, CF_USE1|CF_USE2|CF_CHG1), 162 | BPF_ALU | BPF_K | BPF_MOD: ('mod32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 163 | BPF_ALU | BPF_X | BPF_MOD: ('mod32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 164 | BPF_ALU | BPF_K | BPF_XOR: ('xor32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 165 | BPF_ALU | BPF_X | BPF_XOR: ('xor32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 166 | BPF_ALU | BPF_K | BPF_MOV: ('mov32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 167 | BPF_ALU | BPF_X | BPF_MOV: ('mov32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 168 | BPF_ALU | BPF_K | BPF_ARSH: ('arsh32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 169 | BPF_ALU | BPF_X | BPF_ARSH: ('arsh32', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 170 | 171 | BPF_PQR | BPF_K | BPF_LMUL: ('lmul32', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 172 | BPF_PQR | BPF_X | BPF_LMUL: ('lmul32', self._ana_2regs, CF_USE1 |CF_USE2|CF_CHG1), 173 | # BPF_PQR | BPF_K | BPF_UHMUL: ('uhmul32', self._ana_reg_imm, CF_USE1 | CF_USE2), 174 | # BPF_PQR | BPF_X | BPF_UHMUL: ('uhmul32', self._ana_2regs, CF_USE1 | CF_USE2), 175 | BPF_PQR | BPF_K | BPF_UDIV: ('udiv32', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 176 | BPF_PQR | BPF_X | BPF_UDIV: ('udiv32', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 177 | BPF_PQR | BPF_K | BPF_UREM: ('urem32', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 178 | BPF_PQR | BPF_X | BPF_UREM: ('urem32', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 179 | # BPF_PQR | BPF_K | BPF_SHMUL: ('shmul32', self._ana_reg_imm, CF_USE1 | CF_USE2), 180 | # BPF_PQR | BPF_X | BPF_SHMUL: ('shmul32', self._ana_2regs, CF_USE1 | CF_USE2), 181 | BPF_PQR | BPF_K | BPF_SDIV: ('sdiv32', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 182 | BPF_PQR | BPF_X | BPF_SDIV: ('sdiv32', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 183 | BPF_PQR | BPF_K | BPF_SREM: ('srem32', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 184 | BPF_PQR | BPF_X | BPF_SREM: ('srem32', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 185 | 186 | 187 | BPF_ALU | BPF_K | BPF_END: ('le', self._ana_reg_imm, CF_USE1|CF_CHG1), 188 | BPF_ALU | BPF_X | BPF_END: ('be', self._ana_reg_imm, CF_USE1|CF_CHG1), 189 | 190 | # ALU 64 191 | BPF_ALU64 | BPF_K | BPF_ADD: ('add64', self._ana_reg_imm, CF_USE1 | CF_USE2 | CF_CHG1), 192 | BPF_ALU64 | BPF_X | BPF_ADD: ('add64', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 193 | BPF_ALU64 | BPF_K | BPF_SUB: ('sub64', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 194 | BPF_ALU64 | BPF_X | BPF_SUB: ('sub64', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 195 | BPF_ALU64 | BPF_K | BPF_MUL: ('mul64', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 196 | BPF_ALU64 | BPF_X | BPF_MUL: ('mul64', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 197 | BPF_ALU64 | BPF_K | BPF_DIV: ('div64', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 198 | BPF_ALU64 | BPF_X | BPF_DIV: ('div64', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 199 | BPF_ALU64 | BPF_K | BPF_OR: ('or64', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 200 | BPF_ALU64 | BPF_X | BPF_OR: ('or64', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 201 | BPF_ALU64 | BPF_K | BPF_AND: ('and64', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 202 | BPF_ALU64 | BPF_X | BPF_AND: ('and64', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 203 | BPF_ALU64 | BPF_K | BPF_LSH: ('lsh64', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 204 | BPF_ALU64 | BPF_X | BPF_LSH: ('lsh64', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 205 | BPF_ALU64 | BPF_K | BPF_RSH: ('rsh64', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 206 | BPF_ALU64 | BPF_X | BPF_RSH: ('rsh64', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 207 | BPF_ALU64 | BPF_NEG: ('neg64', self._ana_1reg, CF_USE1|CF_USE2|CF_CHG1), 208 | BPF_ALU64 | BPF_K | BPF_MOD: ('mod64', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 209 | BPF_ALU64 | BPF_X | BPF_MOD: ('mod64', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 210 | BPF_ALU64 | BPF_K | BPF_XOR: ('xor64', self._ana_reg_imm, CF_USE1|CF_USE2|CF_CHG1), 211 | BPF_ALU64 | BPF_X | BPF_XOR: ('xor64', self._ana_2regs, CF_USE1|CF_USE2|CF_CHG1), 212 | BPF_ALU64 | BPF_K | BPF_MOV: ('mov64', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 213 | BPF_ALU64 | BPF_X | BPF_MOV: ('mov64', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 214 | BPF_ALU64 | BPF_K | BPF_ARSH: ('arsh64', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 215 | BPF_ALU64 | BPF_X | BPF_ARSH: ('arsh64', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 216 | BPF_ALU64 | BPF_K | BPF_HOR: ('hor64', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), # new, SOLANA SPEC? 217 | 218 | BPF_PQR | BPF_B | BPF_K | BPF_LMUL: ('lmul64', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 219 | BPF_PQR | BPF_B | BPF_X | BPF_LMUL: ('lmul64', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 220 | BPF_PQR | BPF_B | BPF_K | BPF_UHMUL: ('uhmul64', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 221 | BPF_PQR | BPF_B | BPF_X | BPF_UHMUL: ('uhmul64', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 222 | BPF_PQR | BPF_B | BPF_K | BPF_UDIV: ('udiv64', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 223 | BPF_PQR | BPF_B | BPF_X | BPF_UDIV: ('udiv64', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 224 | BPF_PQR | BPF_B | BPF_K | BPF_UREM: ('urem64', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 225 | BPF_PQR | BPF_B | BPF_X | BPF_UREM: ('urem64', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 226 | BPF_PQR | BPF_B | BPF_K | BPF_SHMUL: ('shmul64', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 227 | BPF_PQR | BPF_B | BPF_X | BPF_SHMUL: ('shmul64', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 228 | BPF_PQR | BPF_B | BPF_K | BPF_SDIV: ('sdiv64', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 229 | BPF_PQR | BPF_B | BPF_X | BPF_SDIV: ('sdiv64', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 230 | BPF_PQR | BPF_B | BPF_K | BPF_SREM: ('srem64', self._ana_reg_imm, CF_USE1 | CF_USE2|CF_CHG1), 231 | BPF_PQR | BPF_B | BPF_X | BPF_SREM: ('srem64', self._ana_2regs, CF_USE1 | CF_USE2|CF_CHG1), 232 | 233 | # BRANCHES 234 | BPF_JMP | BPF_JA: ('ja', self._ana_jmp, CF_USE1|CF_JUMP), 235 | BPF_JMP | BPF_K | BPF_JEQ: ('jeq', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 236 | BPF_JMP | BPF_X | BPF_JEQ: ('jeq', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 237 | BPF_JMP | BPF_K | BPF_JGT: ('jgt', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 238 | BPF_JMP | BPF_X | BPF_JGT: ('jgt', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 239 | BPF_JMP | BPF_K | BPF_JGE: ('jge', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 240 | BPF_JMP | BPF_X | BPF_JGE: ('jge', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 241 | BPF_JMP | BPF_K | BPF_JLT: ('jlt', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 242 | BPF_JMP | BPF_X | BPF_JLT: ('jlt', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 243 | BPF_JMP | BPF_K | BPF_JLE: ('jle', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), # new 244 | BPF_JMP | BPF_X | BPF_JLE: ('jle', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), # new 245 | BPF_JMP | BPF_K | BPF_JSET: ('jset', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 246 | BPF_JMP | BPF_X | BPF_JSET: ('jset', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 247 | BPF_JMP | BPF_K | BPF_JNE: ('jne', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 248 | BPF_JMP | BPF_X | BPF_JNE: ('jne', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 249 | 250 | BPF_JMP | BPF_K | BPF_JSGT: ('jsgt', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 251 | BPF_JMP | BPF_X | BPF_JSGT: ('jsgt', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 252 | BPF_JMP | BPF_K | BPF_JSGE: ('jsge', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 253 | BPF_JMP | BPF_X | BPF_JSGE: ('jsge', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 254 | BPF_JMP | BPF_K | BPF_JSLT: ('jslt', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 255 | BPF_JMP | BPF_X | BPF_JSLT: ('jslt', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), # new 256 | BPF_JMP | BPF_K | BPF_JSLE: ('jsle', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), # new 257 | BPF_JMP | BPF_X | BPF_JSLE: ('jsle', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), # new 258 | 259 | BPF_JMP | BPF_CALL: ('call', self._ana_call, CF_USE1|CF_CALL), # call imm 260 | BPF_JMP | BPF_X | BPF_CALL: ('callx', self._ana_callx, CF_USE1|CF_CALL), # tail call 261 | BPF_JMP | BPF_EXIT: ('exit', self._ana_nop, CF_STOP) # return r0 262 | } 263 | 264 | self.instruc_end = 0xff 265 | self.instruc = [({'name':self.OPCODES[i][0], 'feature':self.OPCODES[i][2]} if i in self.OPCODES else {'name':'unknown_opcode', 'feature':0}) for i in range(0xff)] 266 | 267 | def init_registers(self): 268 | self.reg_names = ['r0', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10', 'CS', 'DS'] 269 | 270 | self.reg_cs = 0 271 | self.reg_ds = 1 272 | 273 | self.reg_first_sreg = self.reg_cs 274 | self.reg_last_sreg = self.reg_ds 275 | 276 | self.reg_code_sreg = self.reg_cs 277 | self.reg_data_sreg = self.reg_ds 278 | 279 | def ev_ana_insn(self, insn): 280 | try: 281 | return self._ana(insn) 282 | except DecodingError: 283 | return 0 284 | 285 | def _ana(self, insn): 286 | self.opcode = insn.get_next_byte() 287 | registers = insn.get_next_byte() 288 | 289 | self.src = (registers >> 4) & 15 290 | self.dst = registers & 15 291 | 292 | self.off = insn.get_next_word() 293 | 294 | self.imm = insn.get_next_dword() 295 | 296 | if self.opcode == BPF_LD | BPF_IMM | BPF_DW: 297 | insn.get_next_dword() 298 | imm2 = insn.get_next_dword() 299 | self.imm += imm2 << 32 300 | 301 | insn.itype = self.opcode 302 | 303 | if self.opcode not in self.OPCODES: 304 | raise DecodingError("wuut") 305 | 306 | self.OPCODES[self.opcode][1](insn) 307 | 308 | return insn.size 309 | 310 | def _ana_nop(self, insn): 311 | pass 312 | 313 | def _ana_reg_imm(self, insn): 314 | insn[0].type = o_reg 315 | insn[0].dtype = dt_dword 316 | insn[0].reg = self.dst 317 | 318 | insn[1].type = o_imm 319 | # special quad-word load 320 | if self.opcode == BPF_LD | BPF_IMM | BPF_DW: 321 | insn[1].dtype = dt_qword 322 | else: 323 | insn[1].dtype = dt_dword 324 | 325 | insn[1].value = self.imm 326 | 327 | insn[0].addr = insn.ea 328 | insn[1].addr = insn.ea 329 | 330 | def _ana_1reg(self, insn): 331 | insn[0].type = o_reg 332 | insn[0].dtype = dt_dword 333 | insn[0].reg = self.dst 334 | 335 | def _ana_2regs(self, insn): 336 | insn[0].type = o_reg 337 | insn[0].dtype = dt_dword 338 | insn[0].reg = self.dst 339 | 340 | insn[1].type = o_reg 341 | insn[1].dtype = dt_dword 342 | insn[1].reg = self.src 343 | 344 | def _ana_call(self, insn): 345 | insn[0].type = o_near 346 | insn[0].value = self.imm 347 | insn[0].dtype = dt_dword 348 | 349 | if insn.ea in self.relocations: 350 | extern_ea = idaapi.get_segm_by_name("extern").start_ea 351 | target_addr = idaapi.get_name_ea(extern_ea, self.relocations[insn.ea]['name']) 352 | if target_addr == idaapi.BADADDR: 353 | target_addr = idaapi.get_name_ea(extern_ea, "__imp_" + self.relocations[insn.ea]['name']) 354 | if target_addr == idaapi.BADADDR: 355 | target_addr = idaapi.get_name_ea(0, self.relocations[insn.ea]['name']) 356 | if target_addr == idaapi.BADADDR: 357 | insn[0].addr = idaapi.BADADDR 358 | return 359 | 360 | insn[0].addr = target_addr 361 | 362 | if self.src == 0: 363 | to_patch = target_addr // 8 364 | elif self.src == 1: 365 | to_patch = (target_addr - 8 - insn.ea) // 8 366 | else: 367 | to_patch = None 368 | print("UNKNOWN CALL TYPE") 369 | 370 | if to_patch: 371 | try: 372 | idaapi.patch_bytes(insn.ea + 4, to_patch.to_bytes(4, byteorder='little', signed=True)) 373 | except Exception as e: 374 | print(f"[{hex(insn.ea)}] Patching call at {hex(insn.ea)} to {hex(to_patch)} (original: {to_patch}) failed: {e}") 375 | else: 376 | offset = ctypes.c_int32(self.imm).value 377 | if self.src == 0: 378 | # call imm 379 | insn[0].addr = 8 * offset 380 | elif self.src == 1: 381 | # tail call 382 | insn[0].addr = 8 * offset + insn.ea + 8 383 | else: 384 | print("UNKNOWN CALL TYPE") 385 | 386 | def _ana_callx(self, insn): 387 | insn[0].type = o_reg 388 | insn[0].dtype = dt_dword 389 | insn[0].reg = self.imm 390 | 391 | def _ana_jmp(self, insn): 392 | insn[0].type = o_near 393 | offset = ctypes.c_int16(self.off).value 394 | if offset < 0: 395 | pass 396 | insn[0].addr = 8*offset + insn.ea + 8 397 | insn[0].dtype = dt_word 398 | 399 | def _ana_cond_jmp_reg_imm(self, insn): 400 | insn[0].type = o_reg 401 | insn[0].dtype = dt_dword 402 | insn[0].reg = self.dst 403 | 404 | insn[1].type = o_imm 405 | insn[1].value = self.imm 406 | insn[1].dtype = dt_dword 407 | 408 | offset = ctypes.c_int16(self.off).value 409 | if offset < 0: 410 | pass 411 | insn[2].type = o_near 412 | insn[2].addr = 8 * offset + insn.ea + 8 413 | insn[2].dtype = dt_dword 414 | 415 | def _ana_cond_jmp_reg_reg(self, insn): 416 | insn[0].type = o_reg 417 | insn[0].dtype = dt_dword 418 | insn[0].reg = self.dst 419 | 420 | insn[1].type = o_reg 421 | insn[1].dtype = dt_dword 422 | insn[1].reg = self.src 423 | 424 | offset = ctypes.c_int16(self.off).value 425 | if offset < 0: 426 | pass 427 | insn[2].type = o_near 428 | insn[2].addr = 8 * offset + insn.ea + 8 429 | insn[2].dtype = dt_dword 430 | 431 | def _ana_regdisp_reg(self, insn): 432 | insn[0].type = o_displ 433 | insn[0].dtype = dt_word 434 | insn[0].value = self.off 435 | insn[0].phrase = self.dst 436 | 437 | insn[1].type = o_reg 438 | insn[1].dtype = dt_dword 439 | insn[1].reg = self.src 440 | 441 | def _ana_regdisp_reg_atomic(self, insn): 442 | insn[0].type = o_displ 443 | insn[0].dtype = dt_word 444 | insn[0].value = self.off 445 | insn[0].phrase = self.dst 446 | 447 | insn[1].type = o_reg 448 | insn[1].dtype = dt_dword 449 | insn[1].reg = self.src 450 | 451 | insn[2].type = o_imm 452 | insn[2].dtype = dt_dword 453 | insn[2].value = self.imm 454 | 455 | def _ana_reg_regdisp(self, insn): 456 | insn[0].type = o_reg 457 | insn[0].dtype = dt_dword 458 | insn[0].reg = self.dst 459 | 460 | insn[1].type = o_displ 461 | insn[1].dtype = dt_word 462 | insn[1].value = self.off 463 | insn[1].phrase = self.src 464 | 465 | if self.opcode in [0x40, 0x48, 0x50, 0x58]: 466 | insn[0].reg = 0 # hardcoded r0 destination 467 | insn[1].value = self.imm # use imm not offset for displacement 468 | insn[1].dtype = dt_dword # imm are 32-bit, off are 16-bit. 469 | 470 | 471 | def _ana_phrase_imm(self, insn): 472 | insn[0].type = o_reg 473 | insn[0].dtype = dt_dword 474 | insn[0].reg = 0 # hardcode destination to r0 475 | 476 | insn[1].type = o_phrase 477 | insn[1].dtype = dt_dword 478 | insn[1].value = self.imm 479 | 480 | def ev_emu_insn(self, insn): 481 | Feature = insn.get_canon_feature() 482 | 483 | if Feature & CF_JUMP: 484 | dst_op_index = 0 if insn.itype == 0x5 else 2 485 | insn.add_cref(insn[dst_op_index].addr, insn[dst_op_index].offb, fl_JN) 486 | idaapi.remember_problem(idaapi.PR_JUMP, insn.ea) 487 | 488 | if insn[0].type == o_displ or insn[1].type == o_displ: 489 | op_ind = 0 if insn[0].type == o_displ else 1 490 | # TODO: trace sp when it changes and call add_auto_stkpnt 491 | if idaapi.may_create_stkvars(): 492 | val = ctypes.c_int16(insn[op_ind].value).value # create_stkvar takes signed value 493 | if insn.create_stkvar(insn[op_ind], val, STKVAR_VALID_SIZE): 494 | idaapi.op_stkvar(insn.ea, op_ind) 495 | 496 | if insn[1].type == o_imm and insn[1].dtype == dt_qword: 497 | if insn.ea in self.relocations: 498 | self.sorted_strings = apply_relocation(self.functions, self.rodata, self.sorted_strings, insn, self.relocations[insn.ea]) 499 | 500 | abort = False 501 | if Feature & CF_CALL: 502 | if insn.ea in self.relocations: 503 | self.sorted_strings = apply_relocation(self.functions, self.rodata, self.sorted_strings, insn, self.relocations[insn.ea]) 504 | if self.relocations[insn.ea]['name'] == 'abort': 505 | abort = True 506 | else: 507 | insn.add_cref(insn[0].addr, insn[0].offb, fl_CF) 508 | 509 | # continue execution flow if not stop instruction (exit), not abort, and not unconditional jump 510 | flow = (Feature & CF_STOP == 0) and not abort and not insn.itype == 0x5 511 | 512 | if flow: 513 | insn.add_cref(insn.ea + insn.size, 0, fl_F) 514 | 515 | for op in insn: 516 | if op.type == o_imm and op.dtype == dt_qword: 517 | addr = op.value 518 | seg = idaapi.getseg(addr) 519 | if seg: 520 | if seg.sclass == 6: # CONST .rodata 521 | try: 522 | s = idaapi.get_strlit_contents(addr, -1, STRTYPE_TERMCHR).decode() 523 | idaapi.add_dref(op.addr, addr, dr_R) 524 | except Exception as e: 525 | pass 526 | 527 | return True 528 | 529 | def ev_out_insn(self, ctx): 530 | cmd = ctx.insn 531 | ft = cmd.get_canon_feature() 532 | buf = ctx.outbuf 533 | 534 | # handle byteswap instruction suffix encoded in immediate, don't print immediate 535 | if cmd.itype == 0xd4 or cmd.itype == 0xdc: 536 | # directly use immediate as suffix in decimal 537 | # analysis function sets second operand as immediate 538 | if cmd.ops[1].type == o_imm: 539 | ctx.out_mnem(15, f"{cmd.ops[1].value}") 540 | else: 541 | print("[ev_out_insn] analysis error: invalid 2nd operand type for byteswap instruction") 542 | # special handling for atomic instruction, mnemonic is determined by immediate, not opcode 543 | elif cmd.itype == 0xdb or cmd.itype == 0xc3: 544 | atomic_alu_ops = [BPF_ADD, BPF_AND, BPF_OR, BPF_XOR] 545 | atomic_alu_fetch_ops = [op | BPF_FETCH for op in atomic_alu_ops] 546 | if cmd.ops[2].type == o_imm: 547 | # TODO: add size/width to disassembly? 548 | if cmd.ops[2].value in atomic_alu_ops: 549 | # first case; 'lock' instruction we first came across 550 | ctx.out_mnem(15, f" {bpf_alu_string[cmd.ops[2].value]}") 551 | elif cmd.ops[2].value in atomic_alu_fetch_ops: 552 | print("[ev_out_insn] untested case for atomic instruction: ALU fetch op") 553 | ctx.out_mnem(15, f" fetch {bpf_alu_string[cmd.ops[2].value]}") 554 | elif cmd.ops[2].value == BPF_CMPXCHG: 555 | print("[ev_out_insn] untested case for atomic instruction: CMPXCHG") 556 | ctx.out_mnem(15, " cmpxchg") 557 | elif cmd.ops[2].value == BPF_XCHG: 558 | print("[ev_out_insn] untested case for atomic instruction: XCHG") 559 | ctx.out_mnem(15, " xchg") 560 | else: 561 | print("[ev_out_insn] invalid operation type in immediate for atomic instruction") 562 | else: 563 | print("[ev_out_insn] analysis error: 3rd parameter for atomic instruction must be o_imm. debug me!") 564 | elif ft & CF_CALL and idaapi.get_name(cmd.ea) and idaapi.get_name(cmd.ea).startswith('sol_'): 565 | ctx.out_custom_mnem("syscall", 15) 566 | else: 567 | ctx.out_mnem(15) 568 | 569 | if ft & CF_USE1: 570 | if ft & CF_CALL: 571 | pass 572 | ctx.out_one_operand(0) 573 | if ft & CF_USE2: 574 | ctx.out_char(',') 575 | ctx.out_char(' ') 576 | ctx.out_one_operand(1) 577 | if ft & CF_USE3: 578 | ctx.out_char(',') 579 | ctx.out_char(' ') 580 | ctx.out_one_operand(2) 581 | idaapi.idaapi_Cvar().gl_comm = 1 582 | ctx.flush_outbuf() 583 | 584 | def ev_out_operand(self, ctx, op): 585 | if op.type == o_reg: 586 | ctx.out_register(self.reg_names[op.reg]) 587 | 588 | elif op.type == o_imm: 589 | if op.dtype == dt_qword: 590 | addr = op.value 591 | name = idaapi.get_name(addr) 592 | if name: 593 | ctx.out_name_expr(op, addr, idaapi.BADADDR) #1D1E8 594 | else: 595 | ctx.out_value(op, OOF_SIGNED|OOFW_IMM|OOFW_64) 596 | elif op.dtype == dt_dword: 597 | ctx.out_value(op, OOF_SIGNED|OOFW_IMM|OOFW_32) 598 | else: 599 | print(f"[ev_out_operand] immediate operand, unhandled dtype: {op.dtype:#8x}") 600 | ctx.out_value(op, OOF_SIGNED|OOFW_IMM|OOFW_32) 601 | 602 | elif op.type in [o_near, o_mem]: 603 | #print(f"[{hex(ctx.insn_ea)}] op.type: {op.type}, op.addr: {hex(op.addr)}") 604 | target_addr = idaapi.get_next_cref_from(ctx.insn_ea, 0) 605 | if target_addr != idaapi.BADADDR: 606 | ok = ctx.out_name_expr(op, target_addr, idaapi.BADADDR) 607 | else: 608 | ok = ctx.out_name_expr(op, op.addr, idaapi.BADADDR) 609 | if not ok: 610 | #print(f'[{hex(ctx.insn_ea)}] out_name_expr[0] failed: {op.addr}') 611 | ctx.out_tagon(idaapi.COLOR_ERROR) 612 | ctx.out_value(op, OOF_SIGNED|OOFW_IMM|OOFW_32) 613 | ctx.out_tagoff(idaapi.COLOR_ERROR) 614 | 615 | 616 | elif op.type == o_phrase: 617 | ctx.out_printf('skb') # text color is a bit off. fix later. 618 | ctx.out_symbol('[') 619 | ctx.out_value(op, OOF_SIGNED|OOFW_IMM|OOFW_32) # "OpDecimal" fails on this, figure out why & fix it. 620 | ctx.out_symbol(']') 621 | 622 | elif op.type == o_displ: 623 | if op.dtype == dt_dword: 624 | ctx.out_printf('skb') 625 | ctx.out_symbol('[') 626 | ctx.out_register(self.reg_names[op.phrase]) 627 | if op.value: 628 | if op.dtype == dt_word: 629 | ctx.out_value(op, OOFS_NEEDSIGN|OOF_SIGNED|OOFW_IMM|OOFW_16) 630 | elif op.dtype == dt_dword: 631 | ctx.out_value(op, OOFS_NEEDSIGN|OOF_SIGNED|OOFW_IMM|OOFW_32) 632 | else: 633 | print("[ev_out_operand] unexpected displacement dtype: {op.dtype:#8x}") 634 | ctx.out_value(op, OOFS_NEEDSIGN|OOF_SIGNED|OOFW_IMM) 635 | ctx.out_symbol(']') 636 | else: 637 | return False 638 | return True 639 | 640 | def ev_endbinary(self, *args): 641 | print(f'[INFO] ev_endbinary: {args}') 642 | 643 | def PROCESSOR_ENTRY(): 644 | return EBPFProc() 645 | -------------------------------------------------------------------------------- /solana/relocations.py: -------------------------------------------------------------------------------- 1 | import idaapi 2 | 3 | from elftools.elf.elffile import ELFFile 4 | from elftools.elf.relocation import RelocationSection 5 | from elftools.elf.sections import SymbolTableSection 6 | 7 | from solana.constants import REL_TYPE, REL_PATCH_SIZE 8 | from solana.config import STRINGS_PREVIEW_LIMIT 9 | from solana.strings import add_string 10 | from solana.helpers import decode_name 11 | 12 | def parse_relocation(rel_type, loc, val): 13 | type_ = REL_TYPE[rel_type] 14 | changes = [] 15 | if type_ == 'R_BPF_64_64': 16 | changes.append({'loc': loc + 4, 'val': val & 0xFFFFFFFF}) 17 | changes.append({'loc': loc + 8 + 4, 'val': val >> 32}) 18 | elif type_ == 'R_BPF_64_ABS64': 19 | changes.append({'loc': loc, 'val': val}) 20 | elif type_ == 'R_BPF_64_ABS32': 21 | pass 22 | elif type_ == 'R_BPF_64_NODYLD32': 23 | changes.append({'loc': loc, 'val': val & 0xFFFFFFFF}) 24 | elif type_ == 'R_BPF_64_32': 25 | changes.append({'loc': loc + 4, 'val': val & 0xFFFFFFFF}) 26 | elif type_ == 'R_BPF_64_RELATIVE': 27 | pass 28 | else: 29 | print(f'[WARN] unknown relocation type: {type_}') 30 | 31 | return changes 32 | 33 | def process_relocations(filename): 34 | with open(filename, 'rb') as f: 35 | elffile = ELFFile(f) 36 | 37 | sections = [] 38 | for section in elffile.iter_sections(): 39 | sections.append(section) 40 | 41 | relocations = {} 42 | functions = {} 43 | rodata = {} 44 | 45 | symtab_s = elffile.get_section_by_name('.symtab') 46 | symtab = [] 47 | 48 | if symtab_s: 49 | for sym in symtab_s.iter_symbols(): 50 | symtab.append({'name': sym.name, 'val': sym.entry['st_value'], 'size': sym.entry['st_size']}) 51 | 52 | for s in sections: 53 | # dynamic 54 | if s.header['sh_type'] == 'SHT_REL' and s.name == '.rel.dyn': 55 | dynsym = elffile.get_section_by_name(".dynsym") 56 | if not dynsym or not isinstance(dynsym, SymbolTableSection): 57 | print("dynsym not found. what?") 58 | continue 59 | 60 | symbols = [] 61 | for symbol in dynsym.iter_symbols(): 62 | symbols.append({'name': symbol.name, 'val': symbol.entry['st_value']}) 63 | 64 | for reloc in s.iter_relocations(): 65 | relsym = symbols[reloc['r_info_sym']] 66 | 67 | name = decode_name(relsym['name']) 68 | 69 | reloc_parsed = parse_relocation(reloc['r_info_type'], reloc['r_offset'], relsym['val']) 70 | mods = [] 71 | 72 | for r in reloc_parsed: 73 | mods.append({'loc': idaapi.get_fileregion_ea(r['loc']), 'val': r['val']}) 74 | 75 | relocation = { 76 | 'type': reloc['r_info_type'], 77 | 'name': name, 78 | 'mods': mods 79 | } 80 | 81 | relocations[idaapi.get_fileregion_ea(reloc['r_offset'])] = relocation 82 | 83 | continue 84 | 85 | if s.header['sh_type'] == 'SHT_REL': 86 | if not symtab_s: 87 | print("symtab section not found. what?") 88 | continue 89 | 90 | code_s = sections[s.header['sh_info']] 91 | base_offset = code_s.header['sh_offset'] 92 | 93 | section_name = decode_name(s.name) 94 | ea_addr = idaapi.get_fileregion_ea(base_offset) 95 | if s.name.startswith('.rel.text.'): 96 | functions[section_name] = ea_addr 97 | elif s.name.startswith('.rel.data.rel.ro.'): 98 | rodata[section_name] = ea_addr 99 | 100 | for reloc in s.iter_relocations(): 101 | relsym = symtab[reloc['r_info_sym']] 102 | 103 | name = decode_name(relsym['name']) 104 | 105 | reloc_parsed = parse_relocation(reloc['r_info_type'], reloc['r_offset'], relsym['val']) 106 | mods = [] 107 | 108 | for r in reloc_parsed: 109 | mods.append({'loc': idaapi.get_fileregion_ea(base_offset + r['loc']), 'val': r['val']}) 110 | 111 | relocation = { 112 | 'type': reloc['r_info_type'], 113 | 'name': name, 114 | 'mods': mods 115 | } 116 | 117 | relocations[idaapi.get_fileregion_ea(base_offset + reloc['r_offset'])] = relocation 118 | 119 | return relocations, functions, rodata, symtab 120 | 121 | def apply_rel_mods(mods, patch_size): 122 | for mod in mods: 123 | if mod['val'] != 0: 124 | if patch_size == 32: 125 | idaapi.patch_dword(mod['loc'], mod['val']) 126 | elif patch_size == 64: 127 | idaapi.patch_qword(mod['loc'], mod['val']) 128 | else: 129 | print('[ERROR] apply relocation: none type') 130 | 131 | def apply_relocation(functions, rodata, sorted_strings, insn, relocation): 132 | if relocation['type'] == 8: # lddw usually 133 | source_addr = insn[0].addr 134 | target_addr = insn[1].value 135 | seg = idaapi.getseg(target_addr) 136 | if seg.sclass == 3: # CONST .data.rel.ro 137 | # Resolve reference 138 | try: 139 | ref_addr = idaapi.get_dword(target_addr + 4) 140 | ref_len = idaapi.get_dword(target_addr + 8) 141 | seg_ = idaapi.getseg(ref_addr) 142 | if seg_.sclass == 6: # CONST .rodata 143 | sorted_strings, len_ = add_string(sorted_strings, ref_addr, ref_len) 144 | name = idaapi.get_name(ref_addr) 145 | if name: 146 | idaapi.create_dword(target_addr, 4) 147 | idaapi.create_dword(target_addr + 4, 4) 148 | idaapi.create_dword(target_addr + 8, 4) 149 | 150 | idaapi.op_offset(target_addr + 4, 0, idaapi.REF_OFF32) 151 | 152 | idaapi.set_name(target_addr, f"{name}_ref", idaapi.SN_FORCE) 153 | idaapi.set_name(target_addr + 4, f"{name}_ref_addr", idaapi.SN_FORCE) 154 | idaapi.set_name(target_addr + 8, f"{name}_ref_len", idaapi.SN_FORCE) 155 | 156 | idaapi.add_dref(insn.ea, target_addr, idaapi.dr_O) 157 | 158 | s = idaapi.get_strlit_contents(ref_addr, len_, idaapi.STRTYPE_TERMCHR).decode("utf-8", errors="ignore") 159 | s_preview = 'Ref to "' + s[:STRINGS_PREVIEW_LIMIT] + '"' 160 | if len(s) > STRINGS_PREVIEW_LIMIT: 161 | s_preview += "..." 162 | 163 | idaapi.set_cmt(source_addr, "", 0) 164 | idaapi.set_cmt(source_addr, s_preview, 0) 165 | 166 | 167 | except Exception as e: 168 | print(f'error during reference resolution: {e}') 169 | elif seg.sclass == 4: # CONST .text 170 | if not idaapi.get_func(target_addr): 171 | idaapi.add_func(target_addr) 172 | insn.add_cref(target_addr, insn[1].offb, idaapi.fl_CF) 173 | elif seg.sclass == 6: # CONST .rodata 174 | sorted_strings, len_ = add_string(sorted_strings, target_addr) 175 | insn.add_dref(target_addr, insn[1].offb, idaapi.dr_R) 176 | else: 177 | print(f'unhandled sclass: {seg.sclass}') 178 | 179 | patch_size = REL_PATCH_SIZE[relocation['type']] 180 | apply_rel_mods(relocation['mods'], patch_size) 181 | 182 | if REL_TYPE[relocation['type']] == 'R_BPF_64_32': # call 183 | if relocation['mods'][0]['val'] != 0: # internal function call 184 | insn.add_cref(relocation['mods'][0]['val'], insn[0].offb, idaapi.fl_CF) 185 | mods = parse_relocation(relocation['type'], insn.ea, relocation['mods'][0]['val']) 186 | apply_rel_mods(mods, patch_size) 187 | else: 188 | insn.add_cref(functions[relocation['name']], insn[0].offb, idaapi.fl_CF) 189 | 190 | if REL_TYPE[relocation['type']] == 'R_BPF_64_64': # lddw 191 | if relocation['name'] in rodata: 192 | data_addr = rodata[relocation['name']] 193 | mods = parse_relocation(relocation['type'], insn.ea, data_addr) 194 | apply_rel_mods(mods, patch_size) 195 | insn.add_dref(data_addr, insn[1].offb, idaapi.dr_R) 196 | 197 | return sorted_strings -------------------------------------------------------------------------------- /solana/strings.py: -------------------------------------------------------------------------------- 1 | import idaapi 2 | import idc 3 | 4 | def binary_search(addr, sorted_strings): 5 | left = 0 6 | right = len(sorted_strings) - 1 7 | 8 | while left <= right: 9 | mid = (left + right) // 2 10 | curr_addr = sorted_strings[mid][0] 11 | curr_len = sorted_strings[mid][1] 12 | 13 | if curr_addr <= addr < curr_addr + curr_len: 14 | return mid 15 | 16 | if curr_addr < addr: 17 | left = mid + 1 18 | else: 19 | right = mid - 1 20 | 21 | return left - 1 # Return insertion point - 1 22 | 23 | def find_previous_string_idx(addr, sorted_strings): 24 | if not sorted_strings: 25 | return None 26 | 27 | if addr < sorted_strings[0][0]: 28 | return None 29 | 30 | idx = binary_search(addr, sorted_strings) 31 | if idx >= 0 and idx < len(sorted_strings): 32 | return idx 33 | 34 | return None 35 | 36 | def find_next_string_idx(addr, sorted_strings): 37 | if not sorted_strings: 38 | return None 39 | 40 | if addr >= sorted_strings[-1][0]: 41 | return None 42 | 43 | idx = binary_search(addr, sorted_strings) 44 | next_idx = idx + 1 45 | 46 | if next_idx < len(sorted_strings): 47 | return next_idx 48 | 49 | return None 50 | 51 | def getstr(addr, max_len=512): 52 | data = idaapi.get_bytes(addr, max_len) 53 | for i in range(len(data)): 54 | if data[i] == 0: 55 | return data[:i] 56 | return data 57 | 58 | def add_string(sorted_strings, addr, size=None) -> tuple[list, int]: 59 | previous_idx = find_previous_string_idx(addr, sorted_strings) 60 | if previous_idx is None: 61 | if size is None or size == 0: 62 | s = getstr(addr, 512) 63 | size = len(s) 64 | 65 | if sorted_strings and sorted_strings[0][0] < addr + size: 66 | size = sorted_strings[0][0] - addr 67 | 68 | sorted_strings.insert(0, [addr, size]) 69 | success = idc.create_strlit(addr, addr + size) 70 | idaapi.set_name(addr, "str_%08X" % addr, idaapi.SN_FORCE) 71 | return sorted_strings, size 72 | 73 | previous_string = sorted_strings[previous_idx] 74 | if previous_string[0] == addr: 75 | if size is None or size == 0: 76 | size = 512 77 | 78 | if previous_string[1] > size: 79 | success = idc.create_strlit(addr, addr + size) 80 | if success: 81 | sorted_strings[previous_idx] = [addr, size] 82 | else: 83 | size = previous_string[1] 84 | return sorted_strings, size 85 | 86 | if previous_string[0] + previous_string[1] >= addr: 87 | # Patch previous string 88 | new_len = addr - previous_string[0] 89 | success = idc.create_strlit(previous_string[0], previous_string[0] + new_len) 90 | if success: 91 | sorted_strings[previous_idx] = [previous_string[0], new_len] 92 | 93 | next_string = None 94 | if previous_idx + 1 < len(sorted_strings): 95 | next_string = sorted_strings[previous_idx + 1] 96 | if next_string[0] == addr: 97 | if size not in [None, 0]: 98 | if next_string[1] != size: 99 | # Patch already existing string 100 | success = idc.create_strlit(addr, addr + size) 101 | if success: 102 | sorted_strings[previous_idx + 1] = [addr, size] 103 | else: 104 | return sorted_strings, sorted_strings[previous_idx + 1][1] 105 | return sorted_strings, size 106 | 107 | if size is None or size == 0: # 285c0 108 | if next_string is not None: 109 | size = next_string[0] - addr 110 | else: 111 | s = getstr(addr, 512) 112 | size = len(s) 113 | 114 | if size == 0: 115 | return sorted_strings, size 116 | 117 | success = idc.create_strlit(addr, addr + size) 118 | if success: 119 | sorted_strings.insert(previous_idx + 1, [addr, size]) 120 | idaapi.set_name(addr, "str_%08X" % addr, idaapi.SN_FORCE) 121 | return sorted_strings, size 122 | 123 | def recover_known_strings(sorted_strings, symtab): 124 | _rodata = idaapi.get_segm_by_name(".rodata") 125 | 126 | strings_to_create = {} 127 | 128 | for s in symtab: 129 | if s['val'] >= _rodata.start_ea and s['val'] <= _rodata.end_ea: 130 | l = s['size'] 131 | if l > 0: 132 | strings_to_create[s['val']] = l 133 | 134 | _data_rel_ro = idaapi.get_segm_by_name(".data.rel.ro") 135 | start_ea = _data_rel_ro.start_ea 136 | loopcount = _data_rel_ro.end_ea - start_ea 137 | 138 | for addr in range(0, loopcount - 4, 4): 139 | Addr = idaapi.get_dword(start_ea+addr) 140 | l = idaapi.get_dword(start_ea+addr+4) 141 | if l < 1024 and Addr + l < 2**32: 142 | if Addr >= _rodata.start_ea and Addr <= _rodata.end_ea: 143 | if Addr not in strings_to_create: 144 | strings_to_create[Addr] = l 145 | 146 | for k in strings_to_create.keys(): 147 | if strings_to_create[k] > 0: 148 | sorted_strings, size = add_string(sorted_strings, k, strings_to_create[k]) 149 | 150 | return sorted_strings 151 | --------------------------------------------------------------------------------