├── README.md └── luac.py /README.md: -------------------------------------------------------------------------------- 1 | # LuaPytecode 2 | Parses Lua 5.1 bytecode. for an updated fork of this, please check lundump.py in [this repository](https://github.com/CPunch/LuaDecompy). (It also features an experimental decompiler :D) 3 | 4 | # Example 5 | 6 | loads a raw lua bytecode dump 7 | ```python 8 | import luac 9 | 10 | lc = luac.LuaUndump() 11 | chunk = lc.loadFile("test.luac") 12 | 13 | print("\n===== [[Disassembly]] =====\n") 14 | 15 | lc.print_dissassembly() 16 | ``` 17 | 18 | or just parse lua bytecode from an array 19 | ```python 20 | import luac 21 | 22 | bytecode = "27\\76\\117\\97\\81\\0\\1\\4\\8\\4\\8\\0\\21\\0\\0\\0\\0\\0\\0\\0\\112\\114\\105\\110\\116\\40\\39\\104\\101\\108\\108\\111\\32\\119\\111\\114\\108\\100\\39\\41\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\2\\2\\4\\0\\0\\0\\5\\0\\0\\0\\65\\64\\0\\0\\28\\64\\0\\1\\30\\0\\128\\0\\2\\0\\0\\0\\4\\6\\0\\0\\0\\0\\0\\0\\0\\112\\114\\105\\110\\116\\0\\4\\12\\0\\0\\0\\0\\0\\0\\0\\104\\101\\108\\108\\111\\32\\119\\111\\114\\108\\100\\0\\0\\0\\0\\0\\4\\0\\0\\0\\1\\0\\0\\0\\1\\0\\0\\0\\1\\0\\0\\0\\1\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0\\0".split('\\') 23 | bytecode = list(map(int, bytecode)) 24 | lc = luac.LuaUndump() 25 | chunk = lc.decode_bytecode(bytecode) 26 | 27 | lc.print_dissassembly() 28 | ``` 29 | -------------------------------------------------------------------------------- /luac.py: -------------------------------------------------------------------------------- 1 | ''' 2 | l(un)dump.py 3 | 4 | A Lua5.1 cross-platform bytecode deserializer && serializer. This module pulls int and size_t sizes from the 5 | chunk header, meaning it should be able to deserialize lua bytecode dumps from most platforms, 6 | regardless of the host machine. 7 | 8 | For details on the Lua5.1 bytecode format, I read [this PDF](https://archive.org/download/a-no-frills-intro-to-lua-5.1-vm-instructions/a-no-frills-intro-to-lua-5.1-vm-instructions_archive.torrent) 9 | as well as read the lundump.c source file from the Lua5.1 source. 10 | ''' 11 | 12 | import struct 13 | import array 14 | from enum import IntEnum, Enum, auto 15 | 16 | class InstructionType(Enum): 17 | ABC = auto(), 18 | ABx = auto(), 19 | AsBx = auto() 20 | 21 | class Opcodes(IntEnum): 22 | MOVE = 0, 23 | LOADK = 1, 24 | LOADBOOL = 2, 25 | LOADNIL = 3, 26 | GETUPVAL = 4, 27 | GETGLOBAL = 5, 28 | GETTABLE = 6, 29 | SETGLOBAL = 7, 30 | SETUPVAL = 8, 31 | SETTABLE = 9, 32 | NEWTABLE = 10, 33 | SELF = 11, 34 | ADD = 12, 35 | SUB = 13, 36 | MUL = 14, 37 | DIV = 15, 38 | MOD = 16, 39 | POW = 17, 40 | UNM = 18, 41 | NOT = 19, 42 | LEN = 20, 43 | CONCAT = 21, 44 | JMP = 22, 45 | EQ = 23, 46 | LT = 24, 47 | LE = 25, 48 | TEST = 26, 49 | TESTSET = 27, 50 | CALL = 28, 51 | TAILCALL = 29, 52 | RETURN = 30, 53 | FORLOOP = 31, 54 | FORPREP = 32, 55 | TFORLOOP = 33, 56 | SETLIST = 34, 57 | CLOSE = 35, 58 | CLOSURE = 36, 59 | VARARG = 37 60 | 61 | class ConstType(IntEnum): 62 | NIL = 0, 63 | BOOL = 1, 64 | NUMBER = 3, 65 | STRING = 4, 66 | 67 | _RKBCInstr = [Opcodes.SETTABLE, Opcodes.ADD, Opcodes.SUB, Opcodes.MUL, Opcodes.DIV, Opcodes.MOD, Opcodes.POW, Opcodes.EQ, Opcodes.LT] 68 | _RKCInstr = [Opcodes.GETTABLE, Opcodes.SELF] 69 | _KBx = [Opcodes.LOADK, Opcodes.GETGLOBAL, Opcodes.SETGLOBAL] 70 | 71 | _LUAMAGIC = b'\x1bLua' 72 | 73 | # is an 'RK' value a K? (result is true for K, false for R) 74 | def whichRK(rk: int): 75 | return (rk & (1 << 8)) > 0 76 | 77 | # read an RK as a K 78 | def readRKasK(rk: int): 79 | return (rk & ~(1 << 8)) 80 | 81 | class Instruction: 82 | def __init__(self, type: InstructionType, name: str) -> None: 83 | self.type = type 84 | self.name = name 85 | self.opcode: int = None 86 | self.A: int = None 87 | self.B: int = None 88 | self.C: int = None 89 | 90 | # 'RK's are special in because can be a register or a konstant. a bitflag is read to determine which 91 | def __formatRK(self, rk: int) -> str: 92 | if whichRK(rk): 93 | return "K[" + str(readRKasK(rk)) + "]" 94 | else: 95 | return "R[" + str(rk) + "]" 96 | 97 | def toString(self): 98 | instr = "%10s" % self.name 99 | regs = "" 100 | 101 | if self.type == InstructionType.ABC: 102 | # by default, treat them as registers 103 | A = "%d" % self.A 104 | B = "%d" % self.B 105 | C = "%d" % self.C 106 | 107 | # these opcodes have RKs for B & C 108 | if self.opcode in _RKBCInstr: 109 | A = "R[%d]" % self.A 110 | B = self.__formatRK(self.B) 111 | C = self.__formatRK(self.C) 112 | elif self.opcode in _RKCInstr: # just for C 113 | A = "R[%d]" % self.A 114 | C = self.__formatRK(self.C) 115 | 116 | regs = "%6s %6s %6s" % (A, B, C) 117 | elif self.type == InstructionType.ABx or self.type == InstructionType.AsBx: 118 | A = "R[%d]" % self.A 119 | B = "%d" % self.B 120 | 121 | if self.opcode in _KBx: 122 | B = "K[%d]" % self.B 123 | 124 | regs = "%6s %6s" % (A, B) 125 | 126 | return "%s : %s" % (instr, regs) 127 | 128 | def getAnnotation(self, chunk): 129 | if self.opcode == Opcodes.MOVE: 130 | return "move R[%d] into R[%d]" % (self.B, self.A) 131 | elif self.opcode == Opcodes.LOADK: 132 | return "load %s into R[%d]" % (chunk.getConstant(self.B).toCode(), self.A) 133 | elif self.opcode == Opcodes.GETGLOBAL: 134 | return 'move _G[%s] into R[%d]' % (chunk.getConstant(self.B).toCode(), self.A) 135 | elif self.opcode == Opcodes.ADD: 136 | return 'add %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) 137 | elif self.opcode == Opcodes.SUB: 138 | return 'sub %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) 139 | elif self.opcode == Opcodes.MUL: 140 | return 'mul %s to %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) 141 | elif self.opcode == Opcodes.DIV: 142 | return 'div %s from %s, place into R[%d]' % (self.__formatRK(self.C), self.__formatRK(self.B), self.A) 143 | elif self.opcode == Opcodes.CONCAT: 144 | count = self.C - self.B + 1 145 | return "concat %d values from R[%d] to R[%d], store into R[%d]" % (count, self.B, self.C, self.A) 146 | else: 147 | return "" 148 | 149 | class Constant: 150 | def __init__(self, type: ConstType, data) -> None: 151 | self.type = type 152 | self.data = data 153 | 154 | def toString(self): 155 | return "[%s] %s" % (self.type.name, str(self.data)) 156 | 157 | # format the constant so that it is parsable by lua 158 | def toCode(self): 159 | if self.type == ConstType.STRING: 160 | return "\"" + self.data + "\"" 161 | elif self.type == ConstType.BOOL: 162 | if self.data: 163 | return "true" 164 | else: 165 | return "false" 166 | elif self.type == ConstType.NUMBER: 167 | return "%g" % self.data 168 | else: 169 | return "nil" 170 | 171 | class Local: 172 | def __init__(self, name: str, start: int, end: int): 173 | self.name = name 174 | self.start = start 175 | self.end = end 176 | 177 | class Chunk: 178 | def __init__(self) -> None: 179 | self.constants: list[Constant] = [] 180 | self.instructions: list[Instruction] = [] 181 | self.protos: list[Chunk] = [] 182 | 183 | self.name: str = "Unnamed proto" 184 | self.frst_line: int = 0 185 | self.last_line: int = 0 186 | self.numUpvals: int = 0 187 | self.numParams: int = 0 188 | self.isVarg: bool = False 189 | self.maxStack: int = 0 190 | 191 | self.upvalues: list[str] = [] 192 | self.lineNums: list[int] = [] 193 | self.locals: list[Local] = [] 194 | 195 | def appendInstruction(self, instr: Instruction): 196 | self.instructions.append(instr) 197 | 198 | def appendConstant(self, const: Constant): 199 | self.constants.append(const) 200 | 201 | def appendProto(self, proto): 202 | self.protos.append(proto) 203 | 204 | def appendLine(self, line: int): 205 | self.lineNums.append(line) 206 | 207 | def appendLocal(self, local: Local): 208 | self.locals.append(local) 209 | 210 | def appendUpval(self, upval: str): 211 | self.upvalues.append(upval) 212 | 213 | def findLocal(self, pc: int) -> Local: 214 | for l in self.locals: 215 | if l.start <= pc and l.end >= pc: 216 | return l 217 | 218 | # there's no local information (may have been stripped) 219 | return None 220 | 221 | def getConstant(self, indx: int) -> Constant: 222 | return self.constants[indx] 223 | 224 | def print(self): 225 | print("\n==== [[" + str(self.name) + "'s constants]] ====\n") 226 | for i in range(len(self.constants)): 227 | print("%d: %s" % (i, self.constants[i].toString())) 228 | 229 | print("\n==== [[" + str(self.name) + "'s locals]] ====\n") 230 | for i in range(len(self.locals)): 231 | print("R[%d]: %s" % (i, self.locals[i].name)) 232 | 233 | print("\n==== [[" + str(self.name) + "'s dissassembly]] ====\n") 234 | for i in range(len(self.instructions)): 235 | print("[%3d] %-40s ; %s" % (i, self.instructions[i].toString(), self.instructions[i].getAnnotation(self))) 236 | 237 | if len(self.protos) > 0: 238 | print("\n==== [[" + str(self.name) + "'s protos]] ====\n") 239 | for z in self.protos: 240 | z.print() 241 | 242 | instr_lookup_tbl = [ 243 | Instruction(InstructionType.ABC, "MOVE"), Instruction(InstructionType.ABx, "LOADK"), Instruction(InstructionType.ABC, "LOADBOOL"), 244 | Instruction(InstructionType.ABC, "LOADNIL"), Instruction(InstructionType.ABC, "GETUPVAL"), Instruction(InstructionType.ABx, "GETGLOBAL"), 245 | Instruction(InstructionType.ABC, "GETTABLE"), Instruction(InstructionType.ABx, "SETGLOBAL"), Instruction(InstructionType.ABC, "SETUPVAL"), 246 | Instruction(InstructionType.ABC, "SETTABLE"), Instruction(InstructionType.ABC, "NEWTABLE"), Instruction(InstructionType.ABC, "SELF"), 247 | Instruction(InstructionType.ABC, "ADD"), Instruction(InstructionType.ABC, "SUB"), Instruction(InstructionType.ABC, "MUL"), 248 | Instruction(InstructionType.ABC, "DIV"), Instruction(InstructionType.ABC, "MOD"), Instruction(InstructionType.ABC, "POW"), 249 | Instruction(InstructionType.ABC, "UNM"), Instruction(InstructionType.ABC, "NOT"), Instruction(InstructionType.ABC, "LEN"), 250 | Instruction(InstructionType.ABC, "CONCAT"), Instruction(InstructionType.AsBx, "JMP"), Instruction(InstructionType.ABC, "EQ"), 251 | Instruction(InstructionType.ABC, "LT"), Instruction(InstructionType.ABC, "LE"), Instruction(InstructionType.ABC, "TEST"), 252 | Instruction(InstructionType.ABC, "TESTSET"), Instruction(InstructionType.ABC, "CALL"), Instruction(InstructionType.ABC, "TAILCALL"), 253 | Instruction(InstructionType.ABC, "RETURN"), Instruction(InstructionType.AsBx, "FORLOOP"), Instruction(InstructionType.AsBx, "FORPREP"), 254 | Instruction(InstructionType.ABC, "TFORLOOP"), Instruction(InstructionType.ABC, "SETLIST"), Instruction(InstructionType.ABC, "CLOSE"), 255 | Instruction(InstructionType.ABx, "CLOSURE"), Instruction(InstructionType.ABC, "VARARG") 256 | ] 257 | 258 | # at [p]osition, with [s]ize of bits 259 | def get_bits(num: int, p: int, s: int): 260 | return (num>>p) & (~((~0)< int: 264 | return (num & (~((~((~0)< Instruction: 267 | opcode = get_bits(data, 0, 6) 268 | template = instr_lookup_tbl[opcode] 269 | instr = Instruction(template.type, template.name) 270 | 271 | # i read the lopcodes.h file to get these bit position and sizes. 272 | instr.opcode = opcode 273 | instr.A = get_bits(data, 6, 8) # starts after POS_OP + SIZE_OP (6), with a size of 8 274 | 275 | if instr.type == InstructionType.ABC: 276 | instr.B = get_bits(data, 23, 9) # starts after POS_C + SIZE_C (23), with a size of 9 277 | instr.C = get_bits(data, 14, 9) # starts after POS_A + SIZE_A (14), with a size of 9 278 | elif instr.type == InstructionType.ABx: 279 | instr.B = get_bits(data, 14, 18) # starts after POS_A + SIZE_A (14), with a size of 18 280 | elif instr.type == InstructionType.AsBx: 281 | instr.B = get_bits(data, 14, 18) - 131071 # Bx is now signed, so just sub half of the MAX_UINT for 18 bits 282 | 283 | return instr 284 | 285 | # returns a u32 instruction 286 | def _encode_instr(instr: Instruction) -> int: 287 | data = 0 288 | 289 | # encode instruction (basically, do the inverse of _decode_instr) 290 | data = set_bits(data, instr.opcode, 0, 6) 291 | data = set_bits(data, instr.A, 6, 8) 292 | 293 | if instr.type == InstructionType.ABC: 294 | data = set_bits(data, instr.B, 23, 9) 295 | data = set_bits(data, instr.C, 14, 9) 296 | elif instr.type == InstructionType.ABx: 297 | data = set_bits(data, instr.B, 14, 18) 298 | elif instr.type == InstructionType.AsBx: 299 | data = set_bits(data, instr.B + 131071, 14, 18) 300 | 301 | return data 302 | 303 | class LuaUndump: 304 | def __init__(self): 305 | self.rootChunk: Chunk = None 306 | self.index = 0 307 | 308 | def _loadBlock(self, sz) -> bytearray: 309 | if self.index + sz > len(self.bytecode): 310 | raise Exception("Malformed bytecode!") 311 | 312 | temp = bytearray(self.bytecode[self.index:self.index+sz]) 313 | self.index = self.index + sz 314 | return temp 315 | 316 | def _get_byte(self) -> int: 317 | return self._loadBlock(1)[0] 318 | 319 | def _get_uint32(self) -> int: 320 | order = 'big' if self.big_endian else 'little' 321 | return int.from_bytes(self._loadBlock(4), byteorder=order, signed=False) 322 | 323 | def _get_uint(self) -> int: 324 | order = 'big' if self.big_endian else 'little' 325 | return int.from_bytes(self._loadBlock(self.int_size), byteorder=order, signed=False) 326 | 327 | def _get_size_t(self) -> int: 328 | order = 'big' if self.big_endian else 'little' 329 | return int.from_bytes(self._loadBlock(self.size_t), byteorder=order, signed=False) 330 | 331 | def _get_double(self) -> int: 332 | order = '>d' if self.big_endian else ' str: 336 | size = self._get_size_t() 337 | if (size == 0): 338 | return "" 339 | 340 | # [:-1] to remove the NULL terminator 341 | return ("".join(chr(x) for x in self._loadBlock(size)))[:-1] 342 | 343 | def decode_chunk(self) -> Chunk: 344 | chunk = Chunk() 345 | 346 | # chunk meta info 347 | chunk.name = self._get_string() 348 | chunk.frst_line = self._get_uint() 349 | chunk.last_line = self._get_uint() 350 | chunk.numUpvals = self._get_byte() 351 | chunk.numParams = self._get_byte() 352 | chunk.isVarg = (self._get_byte() != 0) 353 | chunk.maxStack = self._get_byte() 354 | 355 | # parse instructions 356 | num = self._get_uint() 357 | for i in range(num): 358 | chunk.appendInstruction(_decode_instr(self._get_uint32())) 359 | 360 | # get constants 361 | num = self._get_uint() 362 | for i in range(num): 363 | constant: Constant = None 364 | type = self._get_byte() 365 | 366 | if type == 0: # nil 367 | constant = Constant(ConstType.NIL, None) 368 | elif type == 1: # bool 369 | constant = Constant(ConstType.BOOL, (self._get_byte() != 0)) 370 | elif type == 3: # number 371 | constant = Constant(ConstType.NUMBER, self._get_double()) 372 | elif type == 4: # string 373 | constant = Constant(ConstType.STRING, self._get_string()) 374 | else: 375 | raise Exception("Unknown Datatype! [%d]" % type) 376 | 377 | chunk.appendConstant(constant) 378 | 379 | # parse protos 380 | num = self._get_uint() 381 | for i in range(num): 382 | chunk.appendProto(self.decode_chunk()) 383 | 384 | # debug stuff, maybe i'll add this to chunks to have better disassembly annotation in the future? 385 | # eh, for now just consume the bytes. 386 | 387 | # line numbers 388 | num = self._get_uint() 389 | for i in range(num): 390 | self._get_uint() 391 | 392 | # locals 393 | num = self._get_uint() 394 | for i in range(num): 395 | name = self._get_string() # local name 396 | start = self._get_uint() # local start PC 397 | end = self._get_uint() # local end PC 398 | chunk.appendLocal(Local(name, start, end)) 399 | 400 | # upvalues 401 | num = self._get_uint() 402 | for i in range(num): 403 | chunk.appendUpval(self._get_string()) # upvalue name 404 | 405 | return chunk 406 | 407 | def decode_rawbytecode(self, rawbytecode): 408 | # bytecode sanity checks 409 | if not rawbytecode[0:4] == _LUAMAGIC: 410 | raise Exception("Lua Bytecode expected!") 411 | 412 | bytecode = array.array('b', rawbytecode) 413 | return self.decode_bytecode(bytecode) 414 | 415 | def decode_bytecode(self, bytecode): 416 | self.bytecode = bytecode 417 | 418 | # aligns index, skips header 419 | self.index = 4 420 | 421 | self.vm_version = self._get_byte() 422 | self.bytecode_format = self._get_byte() 423 | self.big_endian = (self._get_byte() == 0) 424 | self.int_size = self._get_byte() 425 | self.size_t = self._get_byte() 426 | self.instr_size = self._get_byte() # gets size of instructions 427 | self.l_number_size = self._get_byte() # size of lua_Number 428 | self.integral_flag = self._get_byte() # is lua_Number defined as an int? false = float/double, true = int/long/short/etc. 429 | 430 | self.rootChunk = self.decode_chunk() 431 | return self.rootChunk 432 | 433 | def loadFile(self, luaCFile): 434 | with open(luaCFile, 'rb') as luac_file: 435 | bytecode = luac_file.read() 436 | return self.decode_rawbytecode(bytecode) 437 | 438 | def print_dissassembly(self): 439 | self.rootChunk.print() 440 | 441 | class LuaDump: 442 | def __init__(self, rootChunk: Chunk): 443 | self.rootChunk = rootChunk 444 | self.bytecode = bytearray() 445 | 446 | # header info 447 | self.vm_version = 0x51 448 | self.bytecode_format = 0x00 449 | self.big_endian = False 450 | 451 | # data sizes 452 | self.int_size = 4 453 | self.size_t = 8 454 | self.instr_size = 4 455 | self.l_number_size = 8 456 | self.integral_flag = False # lua_Number is a double 457 | 458 | def _writeBlock(self, data: bytes): 459 | self.bytecode += bytearray(data) 460 | 461 | def _set_byte(self, b: int): 462 | self.bytecode.append(b) 463 | 464 | def _set_uint32(self, i: int): 465 | order = 'big' if self.big_endian else 'little' 466 | self._writeBlock(i.to_bytes(4, order, signed=False)) 467 | 468 | def _set_uint(self, i: int): 469 | order = 'big' if self.big_endian else 'little' 470 | self._writeBlock(i.to_bytes(self.int_size, order, signed=False)) 471 | 472 | def _set_size_t(self, i: int): 473 | order = 'big' if self.big_endian else 'little' 474 | self._writeBlock(i.to_bytes(self.size_t, order, signed=False)) 475 | 476 | def _set_double(self, f: float): 477 | order = '>d' if self.big_endian else ' bytearray: 554 | self._dumpHeader() 555 | self._dumpChunk(self.rootChunk) 556 | 557 | return self.bytecode --------------------------------------------------------------------------------