├── README.md ├── ebpf.py └── img └── bpf_ida.png /README.md: -------------------------------------------------------------------------------- 1 | # eBPF IDA Proc 2 | 3 | This is an IDA Processor that can be used to disassemble eBPF bytecode. It was developed for a challenge. As such, it hasn't been tested with any other target than the crackme-esque filter I developed it for. 4 | 5 | ## Author 6 | 7 | Clément Berthaux - clement (dot) berthaux (at) synacktiv (dot) com 8 | 9 | ## Installation 10 | 11 | You just need to place `ebpf.py` in your `IDA_ROOT\procs` folder. 12 | 13 | ![Example of filter opened in IDA](img/bpf_ida.png) 14 | -------------------------------------------------------------------------------- /ebpf.py: -------------------------------------------------------------------------------- 1 | # ---------------------------------------------------------------------------- 2 | # "THE BEER-WARE LICENSE" (Revision 42): 3 | # wrote this file. As long as you 4 | # retain this notice you can do whatever you want with this stuff. If we meet 5 | # some day, and you think this stuff is worth it, you can buy me a beer in 6 | # return. Clement Berthaux 7 | # ---------------------------------------------------------------------------- 8 | 9 | from idaapi import * 10 | from idc import * 11 | 12 | class DecodingError(Exception): 13 | pass 14 | 15 | class INST_TYPES(object): 16 | pass 17 | 18 | class EBPFProc(processor_t): 19 | id = 0xeb7f 20 | flag = PR_ASSEMBLE | PR_SEGS | PR_DEFSEG32 | PR_USE32 | PRN_HEX | PR_RNAMESOK | PR_NO_SEGMOVE 21 | cnbits = 8 22 | dnbits = 8 23 | psnames = ['EBPF'] 24 | plnames = ['EBPF'] 25 | segreg_size = 0 26 | instruc_start = 0 27 | assembler = { 28 | 'flag': ASH_HEXF3 | AS_UNEQU | AS_COLON | ASB_BINF4 | AS_N2CHR, 29 | "uflag": 0, 30 | "name": "wut", 31 | "origin": ".org", 32 | "end": ".end", 33 | "cmnt": ";", 34 | "ascsep": '"', 35 | "accsep": "'", 36 | "esccodes": "\"'", 37 | "a_ascii": "db", 38 | "a_byte": "db", 39 | "a_word": "dw", 40 | 'a_dword': "dd", 41 | 'a_qword': "dq", 42 | "a_bss": "dfs %s", 43 | "a_seg": "seg", 44 | "a_curip": "PC", 45 | "a_public": "", 46 | "a_weak": "", 47 | "a_extrn": ".extern", 48 | "a_comdef": "", 49 | "a_align": ".align", 50 | "lbrace": "(", 51 | "rbrace": ")", 52 | "a_mod": "%", 53 | "a_band": "&", 54 | "a_bor": "|", 55 | "a_xor": "^", 56 | "a_bnot": "~", 57 | "a_shl": "<<", 58 | "a_shr": ">>", 59 | "a_sizeof_fmt": "size %s", 60 | 61 | } 62 | 63 | def __init__(self): 64 | processor_t.__init__(self) 65 | 66 | self.init_instructions() 67 | self.init_registers() 68 | 69 | def init_instructions(self): 70 | # there is a logic behind the opcode values but I chose to ignore it 71 | self.OPCODES = { 72 | # ALU 73 | 0x07:('add', self._ana_reg_imm, CF_USE1 | CF_USE2), 74 | 0x0f:('add', self._ana_2regs, CF_USE1|CF_USE2), 75 | 0x17:('sub', self._ana_reg_imm, CF_USE1 | CF_USE2), 76 | 0x1f:('sub', self._ana_2regs, CF_USE1|CF_USE2), 77 | 0x27:('mul', self._ana_reg_imm, CF_USE1|CF_USE2), 78 | 0x2f:('mul', self._ana_2regs, CF_USE1|CF_USE2), 79 | 0x37:('div', self._ana_reg_imm, CF_USE1|CF_USE2), 80 | 0x3f:('div', self._ana_2regs, CF_USE1|CF_USE2), 81 | 0x47:('or', self._ana_reg_imm, CF_USE1|CF_USE2), 82 | 0x4f:('or', self._ana_2regs, CF_USE1|CF_USE2), 83 | 0x57:('and', self._ana_reg_imm, CF_USE1|CF_USE2), 84 | 0x5f:('and', self._ana_2regs, CF_USE1|CF_USE2), 85 | 0x67:('lsh', self._ana_reg_imm, CF_USE1|CF_USE2), 86 | 0x6f:('lsh', self._ana_2regs, CF_USE1|CF_USE2), 87 | 0x77:('rsh', self._ana_reg_imm, CF_USE1|CF_USE2), 88 | 0x7f:('rsh', self._ana_2regs, CF_USE1|CF_USE2), 89 | 0x87:('neg', self._ana_1reg, CF_USE1|CF_USE2), 90 | 0x77:('mod', self._ana_reg_imm, CF_USE1|CF_USE2), 91 | 0x7f:('mod', self._ana_2regs, CF_USE1|CF_USE2), 92 | 0xa7:('xor', self._ana_reg_imm, CF_USE1|CF_USE2), 93 | 0xaf:('xor', self._ana_2regs, CF_USE1|CF_USE2), 94 | 0xb7:('mov', self._ana_reg_imm, CF_USE1 | CF_USE2), 95 | 0xbf:('mov', self._ana_2regs, CF_USE1 | CF_USE2), 96 | 0xc7:('arsh', self._ana_reg_imm, CF_USE1 | CF_USE2), 97 | 0xcf:('arsh', self._ana_2regs, CF_USE1 | CF_USE2), 98 | 99 | # TODO: ALU 32 bit opcodes 100 | 101 | # MEM 102 | 0x18:('lddw', self._ana_reg_imm, CF_USE1|CF_USE2), 103 | 0x20:('ldaw', self._ana_phrase_imm, CF_USE1|CF_USE2), 104 | 0x28:('ldah', self._ana_phrase_imm, CF_USE1|CF_USE2), 105 | 0x30:('ldab', self._ana_phrase_imm, CF_USE1|CF_USE2), 106 | 0x38:('ldadw', self._ana_phrase_imm, CF_USE1|CF_USE2), 107 | 0x40:('ldinw', self._ana_reg_regdisp, CF_USE1|CF_USE2), 108 | 0x48:('ldinh', self._ana_reg_regdisp, CF_USE1|CF_USE2), 109 | 0x50:('ldinb', self._ana_reg_regdisp, CF_USE1|CF_USE2), 110 | 0x58:('ldindw', self._ana_reg_regdisp, CF_USE1|CF_USE2), 111 | 0x61:('ldxw', self._ana_reg_regdisp, CF_USE1|CF_USE2), 112 | 0x69:('ldxh', self._ana_reg_regdisp, CF_USE1|CF_USE2), 113 | 0x71:('ldxb', self._ana_reg_regdisp, CF_USE1|CF_USE2), 114 | 0x79:('ldxdw', self._ana_reg_regdisp, CF_USE1|CF_USE2), 115 | 0x62:('stw', self._ana_regdisp_reg, CF_USE1|CF_USE2), 116 | 0x6a:('sth', self._ana_regdisp_reg, CF_USE1|CF_USE2), 117 | 0x72:('stb', self._ana_regdisp_reg, CF_USE1|CF_USE2), 118 | 0x7a:('stdw', self._ana_regdisp_reg, CF_USE1|CF_USE2), 119 | 0x63:('stxw', self._ana_regdisp_reg, CF_USE1|CF_USE2), 120 | 0x6b:('stxh', self._ana_regdisp_reg, CF_USE1|CF_USE2), 121 | 0x73:('stxb', self._ana_regdisp_reg, CF_USE1|CF_USE2), 122 | 0x7b:('stxdw', self._ana_regdisp_reg, CF_USE1|CF_USE2), 123 | 124 | # BRANCHES 125 | 0x05:('ja', self._ana_jmp, CF_USE1|CF_JUMP), 126 | 0x15:('jeq', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 127 | 0x1d:('jeq', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 128 | 0x25:('jgt', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 129 | 0x2d:('jgt', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 130 | 0x35:('jge', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 131 | 0x3d:('jge', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 132 | 0x45:('jset', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 133 | 0x4d:('jset', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 134 | 0x55:('jne', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 135 | 0x5d:('jne', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 136 | 0x65:('jsgt', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 137 | 0x6d:('jsgt', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 138 | 0x75:('jsge', self._ana_cond_jmp_reg_imm, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 139 | 0x7d:('jsge', self._ana_cond_jmp_reg_reg, CF_USE1 | CF_USE2 | CF_USE3 | CF_JUMP), 140 | 141 | 0x85:('call', self._ana_call, CF_USE1|CF_CALL), 142 | 143 | 0x95:('ret', self._ana_nop, CF_STOP) 144 | } 145 | 146 | Instructions = [{'name':x[0], 'feature':x[2]} for x in self.OPCODES.values()] 147 | self.inames = {v[0]:k for k,v in self.OPCODES.items()} 148 | self.instruc_end = 0xff 149 | self.instruc = [({'name':self.OPCODES[i][0], 'feature':self.OPCODES[i][2]} if i in self.OPCODES else {'name':'unknown_opcode', 'feature':0}) for i in xrange(0xff)] 150 | 151 | # self.icode_return = 0x95 152 | 153 | def init_registers(self): 154 | self.regNames = ['r0', 'r1', 'r2', 'r3', 'r4', 'r5', 'r6', 'r7', 'r8', 'r9', 'r10', 'CS', 'DS'] 155 | 156 | self.regFirstSreg = 0 157 | self.regLastSreg = 1 158 | 159 | self.regCodeSreg = 0 160 | self.regDataSreg = 1 161 | 162 | def ana(self): 163 | try: 164 | return self._ana() 165 | except DecodingError: 166 | return 0 167 | 168 | def _ana(self): 169 | self.opcode = ua_next_byte() 170 | registers = ua_next_byte() 171 | 172 | self.src = (registers >> 4) & 15 173 | self.dst = registers & 15 174 | 175 | self.off = ua_next_word() 176 | 177 | # if self.off & 0x8000: 178 | # self.off -= 0x10000 179 | 180 | self.imm = ua_next_long() 181 | 182 | if self.opcode == 0x18: 183 | ua_next_long() 184 | imm2 = ua_next_long() 185 | self.imm += imm2 << 32 186 | 187 | 188 | self.cmd.itype = self.opcode 189 | 190 | if self.opcode not in self.OPCODES: 191 | raise DecodingError("wuut") 192 | 193 | self.OPCODES[self.opcode][1]() 194 | 195 | return self.cmd.size 196 | 197 | def _ana_nop(self): 198 | pass 199 | 200 | def _ana_reg_imm(self): 201 | self.cmd[0].type = o_reg 202 | self.cmd[0].dtyp = dt_dword 203 | self.cmd[0].reg = self.dst 204 | 205 | self.cmd[1].type = o_imm 206 | if self.opcode == 0x18: 207 | self.cmd[1].dtyp = dt_qword 208 | else: 209 | self.cmd[1].dtyp = dt_dword 210 | 211 | self.cmd[1].value = self.imm 212 | 213 | def _ana_1reg(self): 214 | self.cmd[0].type = o_reg 215 | self.cmd[0].dtyp = dt_dword 216 | self.cmd[0].reg = self.dst 217 | 218 | def _ana_2regs(self): 219 | self.cmd[0].type = o_reg 220 | self.cmd[0].dtyp = dt_dword 221 | self.cmd[0].reg = self.dst 222 | 223 | self.cmd[1].type = o_reg 224 | self.cmd[1].dtyp = dt_dword 225 | self.cmd[1].reg = self.src 226 | 227 | def _ana_call(self): 228 | self.cmd[0].type = o_imm 229 | self.cmd[0].value = self.imm 230 | self.cmd[0].dtyp = dt_dword 231 | 232 | def _ana_jmp(self): 233 | self.cmd[0].type = o_near 234 | self.cmd[0].addr = 8*self.off + self.cmd.ea + 8 235 | self.cmd[0].dtyp = dt_dword 236 | 237 | def _ana_cond_jmp_reg_imm(self): 238 | self.cmd[0].type = o_reg 239 | self.cmd[0].dtyp = dt_dword 240 | self.cmd[0].reg = self.dst 241 | 242 | self.cmd[1].type = o_imm 243 | self.cmd[1].value = self.imm 244 | self.cmd[1].dtyp = dt_dword 245 | 246 | self.cmd[2].type = o_near 247 | self.cmd[2].addr = 8 * self.off + self.cmd.ea + 8 248 | self.cmd[2].dtyp = dt_dword 249 | 250 | def _ana_cond_jmp_reg_reg(self): 251 | self.cmd[0].type = o_reg 252 | self.cmd[0].dtyp = dt_dword 253 | self.cmd[0].reg = self.dst 254 | 255 | self.cmd[1].type = o_reg 256 | self.cmd[1].dtyp = dt_dword 257 | self.cmd[1].reg = self.src 258 | 259 | self.cmd[2].type = o_near 260 | self.cmd[2].addr = 8 * self.off + self.cmd.ea + 8 261 | self.cmd[2].dtyp = dt_dword 262 | 263 | def _ana_regdisp_reg(self): 264 | self.cmd[0].type = o_displ 265 | self.cmd[0].dtyp = dt_dword 266 | self.cmd[0].value = self.off 267 | self.cmd[0].phrase = self.dst 268 | 269 | self.cmd[1].type = o_reg 270 | self.cmd[1].dtyp = dt_dword 271 | self.cmd[1].reg = self.src 272 | 273 | def _ana_reg_regdisp(self): 274 | self.cmd[0].type = o_reg 275 | self.cmd[0].dtyp = dt_dword 276 | self.cmd[0].reg = self.dst 277 | 278 | self.cmd[1].type = o_displ 279 | self.cmd[1].dtyp = dt_dword 280 | self.cmd[1].value = self.off 281 | self.cmd[1].phrase = self.src 282 | 283 | 284 | def _ana_phrase_imm(self): 285 | self.cmd[0].type = o_reg 286 | self.cmd[0].dtyp = dt_dword 287 | self.cmd[0].reg = self.dst 288 | 289 | self.cmd[1].type = o_phrase 290 | self.cmd[1].dtyp = dt_dword 291 | self.cmd[1].value = self.imm 292 | 293 | 294 | def emu(self): 295 | Feature = self.cmd.get_canon_feature() 296 | 297 | if Feature & CF_JUMP: 298 | dst_op_index = 0 if self.cmd.itype == 0x5 else 2 299 | ua_add_cref(self.cmd[dst_op_index].offb, self.cmd[dst_op_index].addr, fl_JN) 300 | QueueSet(Q_jumps, self.cmd.ea) 301 | 302 | if self.cmd[0].type == o_displ or self.cmd[1].type == o_displ: 303 | op_ind = 0 if self.cmd[0].type == o_displ else 1 304 | ua_stkvar2(self.cmd[op_ind], self.cmd[op_ind].value, 1) 305 | op_stkvar(self.cmd.ea, op_ind) 306 | 307 | # if Feature & CF_CALL: 308 | # ua_add_cref(self.cmd[0].offb, self.cmd[0].addr, fl_CN) 309 | 310 | flow = (Feature & CF_STOP == 0) and not self.cmd.itype == 0x5 311 | 312 | if flow: 313 | ua_add_cref(0, self.cmd.ea + self.cmd.size, fl_F) 314 | return True 315 | 316 | def out(self): 317 | cmd = self.cmd 318 | ft = cmd.get_canon_feature() 319 | buf = init_output_buffer(1024) 320 | OutMnem(15) 321 | 322 | if ft & CF_USE1: 323 | out_one_operand(0) 324 | if ft & CF_USE2: 325 | OutChar(',') 326 | OutChar(' ') 327 | out_one_operand(1) 328 | if ft & CF_USE3: 329 | OutChar(',') 330 | OutChar(' ') 331 | out_one_operand(2) 332 | term_output_buffer() 333 | cvar.gl_comm = 1 334 | MakeLine(buf) 335 | 336 | def outop(self, op): 337 | if op.type == o_reg: 338 | out_register(self.regNames[op.reg]) 339 | elif op.type == o_imm: 340 | OutValue(op, OOFW_IMM) 341 | elif op.type in [o_near, o_mem]: 342 | ok = out_name_expr(op, op.addr, BADADDR) 343 | if not ok: 344 | out_tagon(COLOR_ERROR) 345 | OutLong(op.addr, 16) 346 | out_tagoff(COLOR_ERROR) 347 | QueueMark(Q_noName, self.cmd.ea) 348 | 349 | elif op.type == o_phrase: 350 | out_symbol('[') 351 | OutValue(op, OOFW_IMM) 352 | out_symbol(']') 353 | 354 | elif op.type == o_displ: 355 | out_symbol('[') 356 | out_register(self.regNames[op.phrase]) 357 | if op.value: 358 | OutValue(op, OOFS_NEEDSIGN|OOFW_IMM) 359 | out_symbol(']') 360 | else: 361 | return False 362 | return True 363 | 364 | def PROCESSOR_ENTRY(): 365 | return EBPFProc() 366 | -------------------------------------------------------------------------------- /img/bpf_ida.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/saaph/eBPF_processor/4be35f656d0c8ae5201edb8cea0ec0f335ef045c/img/bpf_ida.png --------------------------------------------------------------------------------